mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
Tighten up memory prediction logging
Prior to this change, we logged the memory prediction multiple times as the scheduler iterates to find a suitable configuration, which can be confusing since only the last log before the server starts is actually valid. This now logs once just before starting the server on the final configuration. It also reports what library instead of always saying "offloading to gpu" when using CPU.
This commit is contained in:
@@ -116,6 +116,8 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
|
||||
}
|
||||
}
|
||||
|
||||
estimate.log()
|
||||
|
||||
// Loop through potential servers
|
||||
finalErr := errors.New("no suitable llama servers found")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user