fix the cpu estimatedTotal memory + get the expiry time for loading models (#4461)

2025-12-10 15:57:04 +00:00 · 2024-05-15 15:43:16 -07:00
parent 5fa36a0833
commit d1692fd3e0
2 changed files with 9 additions and 0 deletions
--- a/llm/server.go
+++ b/llm/server.go
@@ -89,6 +89,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr

 		cpuRunner = serverForCpu()
 		gpuCount = 0
+		_, _, estimatedTotal = EstimateGPULayers(gpus, ggml, projectors, opts)
 	} else {
 		if gpus[0].Library == "metal" {
 			memInfo, err := gpu.GetCPUMem()