mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-16 10:47:01 +00:00
chore: update mllama to use ollama engine (#10637)
This commit is contained in:
4
llama/llama.cpp/src/llama-quant.cpp
vendored
4
llama/llama.cpp/src/llama-quant.cpp
vendored
@@ -639,9 +639,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
|
||||
if (llama_model_has_encoder(&model)) {
|
||||
n_attn_layer *= 3;
|
||||
}
|
||||
if (qs.n_attention_wv != n_attn_layer) {
|
||||
LLAMA_LOG_WARN("%s: n_attention_wv is unexpected, expected: %d, found: %d\n", __func__, n_attn_layer, qs.n_attention_wv);
|
||||
}
|
||||
GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected");
|
||||
}
|
||||
|
||||
size_t total_size_org = 0;
|
||||
|
||||
Reference in New Issue
Block a user