mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
llama: update vendor code to commit ba1cb19c (#8101)
This commit is contained in:
@@ -15,10 +15,10 @@ in general, the values are (bskcn_tv, 1 - bskcn_tv)
|
||||
1 file changed, 253 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index d1791af0..b01770d0 100644
|
||||
index 9e292c4f..26be6254 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -195,6 +195,7 @@ enum llm_arch {
|
||||
@@ -196,6 +196,7 @@ enum llm_arch {
|
||||
LLM_ARCH_GRANITE,
|
||||
LLM_ARCH_GRANITE_MOE,
|
||||
LLM_ARCH_CHAMELEON,
|
||||
@@ -26,7 +26,7 @@ index d1791af0..b01770d0 100644
|
||||
LLM_ARCH_UNKNOWN,
|
||||
};
|
||||
|
||||
@@ -249,6 +250,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
@@ -251,6 +252,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||
{ LLM_ARCH_GRANITE, "granite" },
|
||||
{ LLM_ARCH_GRANITE_MOE, "granitemoe" },
|
||||
{ LLM_ARCH_CHAMELEON, "chameleon" },
|
||||
@@ -34,15 +34,15 @@ index d1791af0..b01770d0 100644
|
||||
{ LLM_ARCH_UNKNOWN, "(unknown)" },
|
||||
};
|
||||
|
||||
@@ -306,6 +308,7 @@ enum llm_kv {
|
||||
@@ -308,6 +310,7 @@ enum llm_kv {
|
||||
LLM_KV_ATTENTION_RELATIVE_BUCKETS_COUNT,
|
||||
LLM_KV_ATTENTION_SLIDING_WINDOW,
|
||||
LLM_KV_ATTENTION_SCALE,
|
||||
+ LLM_KV_ATTENTION_BLOCK_SKIP_CONNECTION,
|
||||
|
||||
LLM_KV_ROPE_DIMENSION_COUNT,
|
||||
LLM_KV_ROPE_FREQ_BASE,
|
||||
@@ -408,20 +411,21 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
||||
@@ -411,20 +414,21 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||
{ LLM_KV_RESIDUAL_SCALE, "%s.residual_scale" },
|
||||
{ LLM_KV_EMBEDDING_SCALE, "%s.embedding_scale" },
|
||||
|
||||
@@ -77,8 +77,8 @@ index d1791af0..b01770d0 100644
|
||||
+ { LLM_KV_ATTENTION_BLOCK_SKIP_CONNECTION, "%s.attention.block_skip_connection.%d" },
|
||||
|
||||
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
||||
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
|
||||
@@ -603,6 +607,7 @@ enum llm_tensor {
|
||||
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
||||
@@ -607,6 +611,7 @@ enum llm_tensor {
|
||||
LLM_TENSOR_ENC_OUTPUT_NORM,
|
||||
LLM_TENSOR_CLS,
|
||||
LLM_TENSOR_CLS_OUT,
|
||||
@@ -86,7 +86,7 @@ index d1791af0..b01770d0 100644
|
||||
};
|
||||
|
||||
static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_NAMES = {
|
||||
@@ -1541,6 +1546,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
@@ -1564,6 +1569,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
|
||||
},
|
||||
},
|
||||
@@ -111,7 +111,7 @@ index d1791af0..b01770d0 100644
|
||||
{
|
||||
LLM_ARCH_UNKNOWN,
|
||||
{
|
||||
@@ -2401,6 +2424,7 @@ enum e_model {
|
||||
@@ -2425,6 +2448,7 @@ enum e_model {
|
||||
MODEL_15B,
|
||||
MODEL_16B,
|
||||
MODEL_20B,
|
||||
@@ -119,7 +119,7 @@ index d1791af0..b01770d0 100644
|
||||
MODEL_30B,
|
||||
MODEL_32B,
|
||||
MODEL_34B,
|
||||
@@ -2451,6 +2475,8 @@ struct llama_hparams {
|
||||
@@ -2475,6 +2499,8 @@ struct llama_hparams {
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_head_kv_arr;
|
||||
std::array<uint32_t, LLAMA_MAX_LAYERS> n_ff_arr;
|
||||
|
||||
@@ -128,7 +128,7 @@ index d1791af0..b01770d0 100644
|
||||
uint32_t n_layer_dense_lead = 0;
|
||||
uint32_t n_lora_q = 0;
|
||||
uint32_t n_lora_kv = 0;
|
||||
@@ -2521,6 +2547,7 @@ struct llama_hparams {
|
||||
@@ -2546,6 +2572,7 @@ struct llama_hparams {
|
||||
if (this->n_head_arr != other.n_head_arr) return true;
|
||||
if (this->n_head_kv_arr != other.n_head_kv_arr) return true;
|
||||
if (this->n_ff_arr != other.n_ff_arr) return true;
|
||||
@@ -136,7 +136,7 @@ index d1791af0..b01770d0 100644
|
||||
|
||||
if (this->n_rel_attn_bkts != other.n_rel_attn_bkts) return true;
|
||||
if (this->n_layer_dense_lead != other.n_layer_dense_lead) return true;
|
||||
@@ -2630,6 +2657,14 @@ struct llama_hparams {
|
||||
@@ -2658,6 +2685,14 @@ struct llama_hparams {
|
||||
return ssm_d_state * ssm_d_inner;
|
||||
}
|
||||
}
|
||||
@@ -151,7 +151,7 @@ index d1791af0..b01770d0 100644
|
||||
};
|
||||
|
||||
static_assert(std::is_trivially_copyable<llama_hparams>::value, "llama_hparams must be trivially copyable");
|
||||
@@ -2816,6 +2851,8 @@ struct llama_layer {
|
||||
@@ -2844,6 +2879,8 @@ struct llama_layer {
|
||||
struct ggml_tensor * ffn_gate_scale;
|
||||
struct ggml_tensor * ffn_up_scale;
|
||||
struct ggml_tensor * ffn_down_scale;
|
||||
@@ -160,7 +160,7 @@ index d1791af0..b01770d0 100644
|
||||
};
|
||||
|
||||
// very similar to llama_batch,
|
||||
@@ -6209,6 +6246,21 @@ static void llm_load_hparams(
|
||||
@@ -6247,6 +6284,21 @@ static void llm_load_hparams(
|
||||
default: model.type = e_model::MODEL_UNKNOWN;
|
||||
}
|
||||
} break;
|
||||
@@ -182,7 +182,7 @@ index d1791af0..b01770d0 100644
|
||||
default: (void)0;
|
||||
}
|
||||
|
||||
@@ -7198,6 +7250,7 @@ static const std::map<llm_tensor, llm_tensor_info> llm_tensor_info_mapping = {
|
||||
@@ -7239,6 +7291,7 @@ static const std::map<llm_tensor, llm_tensor_info> llm_tensor_info_mapping = {
|
||||
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
|
||||
// this tensor is loaded for T5, but never used
|
||||
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
|
||||
@@ -190,7 +190,7 @@ index d1791af0..b01770d0 100644
|
||||
};
|
||||
|
||||
// checks if the weight tensor can be used with the specified buffer type and device
|
||||
@@ -9205,6 +9258,35 @@ static bool llm_load_tensors(
|
||||
@@ -9253,6 +9306,35 @@ static bool llm_load_tensors(
|
||||
|
||||
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
|
||||
|
||||
@@ -226,7 +226,7 @@ index d1791af0..b01770d0 100644
|
||||
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
|
||||
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
|
||||
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
|
||||
@@ -16652,6 +16734,158 @@ struct llm_build_context {
|
||||
@@ -16671,6 +16753,158 @@ struct llm_build_context {
|
||||
|
||||
return gf;
|
||||
}
|
||||
@@ -385,7 +385,7 @@ index d1791af0..b01770d0 100644
|
||||
};
|
||||
|
||||
static struct ggml_cgraph * llama_build_graph_defrag(llama_context & lctx, const std::vector<uint32_t> & ids) {
|
||||
@@ -16921,6 +17155,10 @@ static struct ggml_cgraph * llama_build_graph(
|
||||
@@ -16942,6 +17176,10 @@ static struct ggml_cgraph * llama_build_graph(
|
||||
{
|
||||
result = llm.build_chameleon();
|
||||
} break;
|
||||
@@ -396,7 +396,7 @@ index d1791af0..b01770d0 100644
|
||||
default:
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
@@ -20132,6 +20370,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||
@@ -20137,6 +20375,7 @@ enum llama_rope_type llama_rope_type(const struct llama_model * model) {
|
||||
case LLM_ARCH_GRANITE:
|
||||
case LLM_ARCH_GRANITE_MOE:
|
||||
case LLM_ARCH_CHAMELEON:
|
||||
|
||||
Reference in New Issue
Block a user