mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
add "stop" command (#6739)
This commit is contained in:
@@ -360,7 +360,6 @@ func (s *Scheduler) processCompleted(ctx context.Context) {
|
||||
slog.Debug("runner expired event received", "modelPath", runner.modelPath)
|
||||
runner.refMu.Lock()
|
||||
if runner.refCount > 0 {
|
||||
// Shouldn't happen, but safeguard to ensure no leaked runners
|
||||
slog.Debug("expired event with positive ref count, retrying", "modelPath", runner.modelPath, "refCount", runner.refCount)
|
||||
go func(runner *runnerRef) {
|
||||
// We can't unload yet, but want to as soon as the current request completes
|
||||
@@ -802,6 +801,25 @@ func (s *Scheduler) unloadAllRunners() {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *Scheduler) expireRunner(model *Model) {
|
||||
s.loadedMu.Lock()
|
||||
defer s.loadedMu.Unlock()
|
||||
runner, ok := s.loaded[model.ModelPath]
|
||||
if ok {
|
||||
runner.refMu.Lock()
|
||||
runner.expiresAt = time.Now()
|
||||
if runner.expireTimer != nil {
|
||||
runner.expireTimer.Stop()
|
||||
runner.expireTimer = nil
|
||||
}
|
||||
runner.sessionDuration = 0
|
||||
if runner.refCount <= 0 {
|
||||
s.expiredCh <- runner
|
||||
}
|
||||
runner.refMu.Unlock()
|
||||
}
|
||||
}
|
||||
|
||||
// If other runners are loaded, make sure the pending request will fit in system memory
|
||||
// If not, pick a runner to unload, else return nil and the request can be loaded
|
||||
func (s *Scheduler) maybeFindCPURunnerToUnload(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList) *runnerRef {
|
||||
|
||||
Reference in New Issue
Block a user