chore: update mllama to use ollama engine (#10637)

2025-12-15 02:07:03 +00:00 · 2025-05-13 17:36:02 -07:00
parent 0478d440f0
commit 23125648b8
67 changed files with 785 additions and 4354 deletions
--- a/llama/llama.cpp/src/llama-model.h
+++ b/llama/llama.cpp/src/llama-model.h
@@ -11,7 +11,6 @@
 #include <string>
 #include <unordered_map>
 #include <vector>
-#include <stdexcept>

 struct llama_cparams;
 struct llama_ubatch;
@@ -75,7 +74,6 @@ enum llm_type {
    LLM_TYPE_40B,
    LLM_TYPE_65B,
    LLM_TYPE_70B,
-    LLM_TYPE_90B,
    LLM_TYPE_236B,
    LLM_TYPE_290B,
    LLM_TYPE_314B,
@@ -320,16 +318,6 @@ struct llama_layer {

    struct ggml_tensor * bskcn_tv = nullptr;

-    // cross attention
-    struct ggml_tensor * cross_attn_k_norm = nullptr;
-    struct ggml_tensor * cross_attn_k_proj = nullptr;
-    struct ggml_tensor * cross_attn_o_proj = nullptr;
-    struct ggml_tensor * cross_attn_q_norm = nullptr;
-    struct ggml_tensor * cross_attn_q_proj = nullptr;
-    struct ggml_tensor * cross_attn_v_proj = nullptr;
-    struct ggml_tensor * cross_attn_attn_gate = nullptr;
-    struct ggml_tensor * cross_attn_mlp_gate = nullptr;
-
    struct llama_layer_posnet posnet;

    struct llama_layer_convnext convnext;