mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
llm: make load time stall duration configurable via OLLAMA_LOAD_TIMEOUT
With the new very large parameter models, some users are willing to wait for a very long time for models to load.
This commit is contained in:
@@ -112,6 +112,26 @@ func KeepAlive() (keepAlive time.Duration) {
|
||||
return keepAlive
|
||||
}
|
||||
|
||||
// LoadTimeout returns the duration for stall detection during model loads. LoadTimeout can be configured via the OLLAMA_LOAD_TIMEOUT environment variable.
|
||||
// Zero or Negative values are treated as infinite.
|
||||
// Default is 5 minutes.
|
||||
func LoadTimeout() (loadTimeout time.Duration) {
|
||||
loadTimeout = 5 * time.Minute
|
||||
if s := Var("OLLAMA_LOAD_TIMEOUT"); s != "" {
|
||||
if d, err := time.ParseDuration(s); err == nil {
|
||||
loadTimeout = d
|
||||
} else if n, err := strconv.ParseInt(s, 10, 64); err == nil {
|
||||
loadTimeout = time.Duration(n) * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
if loadTimeout <= 0 {
|
||||
return time.Duration(math.MaxInt64)
|
||||
}
|
||||
|
||||
return loadTimeout
|
||||
}
|
||||
|
||||
func Bool(k string) func() bool {
|
||||
return func() bool {
|
||||
if s := Var(k); s != "" {
|
||||
@@ -245,10 +265,8 @@ func Uint64(key string, defaultValue uint64) func() uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
var (
|
||||
// Set aside VRAM per GPU
|
||||
GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
)
|
||||
// Set aside VRAM per GPU
|
||||
var GpuOverhead = Uint64("OLLAMA_GPU_OVERHEAD", 0)
|
||||
|
||||
type EnvVar struct {
|
||||
Name string
|
||||
@@ -264,6 +282,7 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_HOST": {"OLLAMA_HOST", Host(), "IP Address for the ollama server (default 127.0.0.1:11434)"},
|
||||
"OLLAMA_KEEP_ALIVE": {"OLLAMA_KEEP_ALIVE", KeepAlive(), "The duration that models stay loaded in memory (default \"5m\")"},
|
||||
"OLLAMA_LLM_LIBRARY": {"OLLAMA_LLM_LIBRARY", LLMLibrary(), "Set LLM library to bypass autodetection"},
|
||||
"OLLAMA_LOAD_TIMEOUT": {"OLLAMA_LOAD_TIMEOUT", LoadTimeout(), "How long to allow model loads to stall before giving up (default \"5m\")"},
|
||||
"OLLAMA_MAX_LOADED_MODELS": {"OLLAMA_MAX_LOADED_MODELS", MaxRunners(), "Maximum number of loaded models per GPU"},
|
||||
"OLLAMA_MAX_QUEUE": {"OLLAMA_MAX_QUEUE", MaxQueue(), "Maximum number of queued requests"},
|
||||
"OLLAMA_MODELS": {"OLLAMA_MODELS", Models(), "The path to the models directory"},
|
||||
|
||||
Reference in New Issue
Block a user