Add support for new models and fix GitHub issues

- Add Gemma3n model support with text generation capabilities - Add new CUDA mean operations for improved performance - Add macOS documentation and performance tests - Update LLAMA patches for ROCm/CUDA compatibility - Fix various model conversion and processing issues - Update CI workflows and build configurations - Add library model tests and Shakespeare test data 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-12 00:37:04 +00:00 · 2025-07-20 00:12:36 +08:00
parent 1fa71c2670
commit cbcbc9ae07
81 changed files with 132316 additions and 747 deletions
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -151,7 +151,12 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin
 	}

 	if graphPartialOffload == 0 {
-		graphPartialOffload = f.KV().GQA() * kvTotal / 6
+		headsKV := f.KV().HeadCountKVMin()
+		if headsKV == 0 {
+			headsKV = 1
+		}
+		gqa := f.KV().HeadCountMax() / headsKV
+		graphPartialOffload = gqa * kvTotal / 6
 	}
 	if graphFullOffload == 0 {
 		graphFullOffload = graphPartialOffload