gemma3 quantization (#9776)

2025-12-16 10:47:01 +00:00 · 2025-03-14 17:41:07 -07:00
parent 2d2247e59e
commit ef378ad673
5 changed files with 149 additions and 0 deletions
--- a/llama/llama.cpp/src/llama-quant.cpp
+++ b/llama/llama.cpp/src/llama-quant.cpp
@@ -737,6 +737,15 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        // This used to be a regex, but <regex> has an extreme cost to compile times.
        bool quantize = name.rfind("weight") == name.size() - 6; // ends with 'weight'?

+        // don't quantize vision stuff
+        quantize &= name.find("v.blk.") == std::string::npos;
+
+        quantize &= name.find("mm.mm_input_projection.weight") == std::string::npos;
+        quantize &= name.find("mm.mm_soft_emb_norm.weight") == std::string::npos;
+        quantize &= name.find("v.patch_embedding.weight") == std::string::npos;
+        quantize &= name.find("v.position_embedding.weight") == std::string::npos;
+        quantize &= name.find("v.post_layernorm.weight") == std::string::npos;
+
        // quantize only 2D and 3D tensors (experts)
        quantize &= (ggml_n_dims(tensor) >= 2);