Centralize server config handling

This moves all the env var reading into one central module and logs the loaded config once at startup which should help in troubleshooting user server logs
2025-12-11 00:07:07 +00:00 · 2024-05-04 11:46:01 -07:00
parent 6707768ebd
commit f56aa20014
12 changed files with 235 additions and 162 deletions
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -3,12 +3,11 @@ package llm
 import (
 	"fmt"
 	"log/slog"
-	"os"
-	"strconv"

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/format"
 	"github.com/ollama/ollama/gpu"
+	"github.com/ollama/ollama/server/envconfig"
 )

 // This algorithm looks for a complete fit to determine if we need to unload other models
@@ -50,15 +49,8 @@ func EstimateGPULayers(gpus []gpu.GpuInfo, ggml *GGML, projectors []string, opts
 	for _, info := range gpus {
 		memoryAvailable += info.FreeMemory
 	}
-	userLimit := os.Getenv("OLLAMA_MAX_VRAM")
-	if userLimit != "" {
-		avail, err := strconv.ParseUint(userLimit, 10, 64)
-		if err != nil {
-			slog.Error("invalid setting, ignoring", "OLLAMA_MAX_VRAM", userLimit, "error", err)
-		} else {
-			slog.Info("user override memory limit", "OLLAMA_MAX_VRAM", avail, "actual", memoryAvailable)
-			memoryAvailable = avail
-		}
+	if envconfig.MaxVRAM > 0 {
+		memoryAvailable = envconfig.MaxVRAM
 	}

 	slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", format.HumanBytes2(memoryAvailable))