diff --git a/server/sched.go b/server/sched.go index 3fc54e55..612e4702 100644 --- a/server/sched.go +++ b/server/sched.go @@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) { s.loadedMu.Unlock() runner.refMu.Unlock() slog.Debug("duplicate expired event, ignoring", "runner", runner) + } else if runner.pid != runnerToUnload.pid { + // If the pids do not match, we likely had multiple load + // failures for the same model in quick succession due to + // request context canceled and are draining the queue of + // events. Ensure the orphaned runner is properly shut down, but + // do not delete the mismatched loaded runner, or wait for VRAM + // convergence. + slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload) + runner.unload() + s.loadedMu.Unlock() + runner.refMu.Unlock() } else { slog.Debug("starting background wait for VRAM recovery", "runner", runner) finished := runner.waitForVRAMRecovery()