mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Introduce GPU Overhead env var (#5922)
Provide a mechanism for users to set aside an amount of VRAM on each GPU to make room for other applications they want to start after Ollama, or workaround memory prediction bugs
This commit is contained in:
@@ -231,6 +231,25 @@ var (
|
||||
MaxVRAM = Uint("OLLAMA_MAX_VRAM", 0)
|
||||
)
|
||||
|
||||
func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
return func() uint64 {
|
||||
if s := Var(key); s != "" {
|
||||
if n, err := strconv.ParseUint(s, 10, 64); err != nil {
|
||||
slog.Warn("invalid environment variable, using default", "key", key, "value", s, "default", defaultValue)
|
||||
} else {
|
||||
return n
|
||||
}
|
||||
}
|
||||
|
||||
return defaultValue
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// Set aside VRAM per GPU
|
||||
GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
)
|
||||
|
||||
type EnvVar struct {
|
||||
Name string
|
||||
Value any
|
||||
@@ -241,6 +260,7 @@ func AsMap() map[string]EnvVar {
|
||||
ret := map[string]EnvVar{
|
||||
"OLLAMA_DEBUG": {"OLLAMA_DEBUG", Debug(), "Show additional debug information (e.g. OLLAMA_DEBUG=1)"},
|
||||
"OLLAMA_FLASH_ATTENTION": {"OLLAMA_FLASH_ATTENTION", FlashAttention(), "Enabled flash attention"},
|
||||
"OLLAMA_GPU_OVERHEAD": {"OLLAMA_GPU_OVERHEAD", GpuOverhead(), "Reserve a portion of VRAM per GPU (bytes)"},
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
|
||||
Reference in New Issue
Block a user