chore: update mllama to use ollama engine (#10637)

2025-12-16 10:47:01 +00:00 · 2025-05-13 17:36:02 -07:00
parent 0478d440f0
commit 23125648b8
67 changed files with 785 additions and 4354 deletions
--- a/llama/llama.cpp/src/llama-quant.cpp
+++ b/llama/llama.cpp/src/llama-quant.cpp
@@ -639,9 +639,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
        if (llama_model_has_encoder(&model)) {
            n_attn_layer *= 3;
        }
-        if (qs.n_attention_wv != n_attn_layer) {
-            LLAMA_LOG_WARN("%s: n_attention_wv is unexpected, expected: %d, found: %d\n", __func__, n_attn_layer, qs.n_attention_wv);
-        }
+        GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
    }

    size_t total_size_org = 0;