mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
scale graph based on gpu count
This commit is contained in:
@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
graphFullOffload = graphPartialOffload
|
||||
}
|
||||
|
||||
graphFullOffload *= uint64(info.DeviceCount)
|
||||
graphPartialOffload *= uint64(info.DeviceCount)
|
||||
|
||||
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
|
||||
memoryRequiredTotal := memoryMinimum + graphFullOffload
|
||||
|
||||
|
||||
Reference in New Issue
Block a user