gptoss: fix memory calc (#11700)

This commit is contained in:
Michael Yang
2025-08-05 15:56:12 -07:00
committed by GitHub
parent ee92ca3e1d
commit fcec04bf42

View File

@@ -676,7 +676,7 @@ func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType stri
} }
} }
fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6 fullOffload = 4 * f.KV().HeadCountMax() / cmp.Or(f.KV().HeadCountKVMin(), 1) * kvTotal / 6
partialOffload = 2 * fullOffload partialOffload = fullOffload
} }
return return