mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
ggml: fix crash for array head counts
If it's an array, it uses the max value in the array If array values for head counts becomes more popular, we can consider a more invasive change like #10225 to calculate more accurate estimates. Fixes: #9984
This commit is contained in:
@@ -149,7 +149,12 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
|
||||
}
|
||||
|
||||
if graphPartialOffload == 0 {
|
||||
graphPartialOffload = f.KV().GQA() * kvTotal / 6
|
||||
headsKV := f.KV().HeadCountKVMin()
|
||||
if headsKV == 0 {
|
||||
headsKV = 1
|
||||
}
|
||||
gqa := f.KV().HeadCountMax() / headsKV
|
||||
graphPartialOffload = gqa * kvTotal / 6
|
||||
}
|
||||
if graphFullOffload == 0 {
|
||||
graphFullOffload = graphPartialOffload
|
||||
|
||||
Reference in New Issue
Block a user