sched: only error when over-allocating system memory (#5626)

This commit is contained in:
Jeffrey Morgan
2024-07-11 00:53:12 -07:00
committed by GitHub
parent efbf41ed81
commit 791650ddef
2 changed files with 9 additions and 37 deletions

View File

@@ -122,6 +122,15 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
}
}
// On linux, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
if systemMemoryRequired > systemTotalMemory {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "system", format.HumanBytes2(systemTotalMemory))
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(systemTotalMemory))
}
}
estimate.log()
// Loop through potential servers