llm: looser checks for minimum memory (#5677)

This commit is contained in:
Jeffrey Morgan
2024-07-13 09:20:05 -07:00
committed by GitHub
parent 02fea420e5
commit ef98803d63

View File

@@ -127,7 +127,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
// On linux, over-allocating CPU memory will almost always result in an error // On linux, over-allocating CPU memory will almost always result in an error
if runtime.GOOS == "linux" { if runtime.GOOS == "linux" {
systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize systemMemoryRequired := estimate.TotalSize - estimate.VRAMSize
available := min(systemTotalMemory, systemFreeMemory+systemSwapFreeMemory) available := systemFreeMemory + systemSwapFreeMemory
if systemMemoryRequired > available { if systemMemoryRequired > available {
slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory)) slog.Warn("model request too large for system", "requested", format.HumanBytes2(systemMemoryRequired), "available", available, "total", format.HumanBytes2(systemTotalMemory), "free", format.HumanBytes2(systemFreeMemory), "swap", format.HumanBytes2(systemSwapFreeMemory))
return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available)) return nil, fmt.Errorf("model requires more system memory (%s) than is available (%s)", format.HumanBytes2(systemMemoryRequired), format.HumanBytes2(available))