From d950ff12c09c07a1cda7242373071fb9e7af9ddc Mon Sep 17 00:00:00 2001 From: Daniel Hiltgen Date: Thu, 22 May 2025 14:31:36 -0700 Subject: [PATCH] sched: fix runner leak during reloading unload (#10819) When the same model is being reloaded rapidly with client connections being canceled before the model finishes loading, the queued unload event could cause a leak of runners by deleting a different runner from the loaded list. --- server/sched.go | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/server/sched.go b/server/sched.go index 3fc54e55..612e4702 100644 --- a/server/sched.go +++ b/server/sched.go @@ -387,6 +387,17 @@ func (s *Scheduler) processCompleted(ctx context.Context) { s.loadedMu.Unlock() runner.refMu.Unlock() slog.Debug("duplicate expired event, ignoring", "runner", runner) + } else if runner.pid != runnerToUnload.pid { + // If the pids do not match, we likely had multiple load + // failures for the same model in quick succession due to + // request context canceled and are draining the queue of + // events. Ensure the orphaned runner is properly shut down, but + // do not delete the mismatched loaded runner, or wait for VRAM + // convergence. + slog.Debug("orphaned runner shutting down", "orphan", runner, "loaded", runnerToUnload) + runner.unload() + s.loadedMu.Unlock() + runner.refMu.Unlock() } else { slog.Debug("starting background wait for VRAM recovery", "runner", runner) finished := runner.waitForVRAMRecovery()