scale graph based on gpu count

This commit is contained in:
Michael Yang
2024-04-16 14:44:13 -07:00
parent 7c9792a6e0
commit 26df674785
2 changed files with 4 additions and 1 deletions

View File

@@ -79,6 +79,9 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
graphFullOffload = graphPartialOffload
}
graphFullOffload *= uint64(info.DeviceCount)
graphPartialOffload *= uint64(info.DeviceCount)
// memoryRequiredTotal represents the memory required for full GPU offloading (all layers)
memoryRequiredTotal := memoryMinimum + graphFullOffload