mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
llama: Decouple patching script from submodule (#7139)
* Refine llama.cpp vendoring workflow tools Switch from the sync.sh over to make based tooling * Run new make sync and patch flow
This commit is contained in:
@@ -1,3 +1,14 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Thu, 6 Jun 2024 23:55:47 -0700
|
||||
Subject: [PATCH] cuda
|
||||
|
||||
---
|
||||
ggml/include/ggml-cuda.h | 2 ++
|
||||
ggml/src/ggml-backend.c | 5 +++++
|
||||
ggml/src/ggml-cuda.cu | 6 ++++--
|
||||
3 files changed, 11 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ggml/include/ggml-cuda.h b/ggml/include/ggml-cuda.h
|
||||
index 71bb6dcf..08be0895 100644
|
||||
--- a/ggml/include/ggml-cuda.h
|
||||
@@ -1,3 +1,12 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Mon, 16 Sep 2024 15:53:13 -0700
|
||||
Subject: [PATCH] pretokenizer
|
||||
|
||||
---
|
||||
src/llama.cpp | 14 +++-----------
|
||||
1 file changed, 3 insertions(+), 11 deletions(-)
|
||||
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 4c0a1bb6..800dfb95 100644
|
||||
--- a/src/llama.cpp
|
||||
@@ -1,3 +1,12 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Mon, 16 Sep 2024 15:53:12 -0700
|
||||
Subject: [PATCH] metal
|
||||
|
||||
---
|
||||
ggml/src/ggml-metal.m | 30 +++++++++++++-----------------
|
||||
1 file changed, 13 insertions(+), 17 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
|
||||
index 9da08fe2..3a433703 100644
|
||||
--- a/ggml/src/ggml-metal.m
|
||||
@@ -1,3 +1,12 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Wed, 12 Jun 2024 12:18:40 -0700
|
||||
Subject: [PATCH] ggml-metal
|
||||
|
||||
---
|
||||
ggml/src/ggml-metal.m | 4 ++--
|
||||
1 file changed, 2 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-metal.m b/ggml/src/ggml-metal.m
|
||||
index 3a433703..829c5e39 100644
|
||||
--- a/ggml/src/ggml-metal.m
|
||||
@@ -1,28 +1,36 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Mon, 16 Sep 2024 15:53:14 -0700
|
||||
Subject: [PATCH] embeddings
|
||||
|
||||
---
|
||||
src/llama.cpp | 15 +++++++++------
|
||||
1 file changed, 9 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index 4c0a1bb6..17e5bc2a 100644
|
||||
index 800dfb95..a639522d 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -16928,7 +16928,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
|
||||
@@ -16920,7 +16920,7 @@ static size_t llama_output_reserve(llama_context & lctx, size_t n_outputs) {
|
||||
const auto n_embd = hparams.n_embd;
|
||||
|
||||
|
||||
// TODO: use a per-batch flag for logits presence instead
|
||||
- const bool has_logits = !cparams.embeddings;
|
||||
+ const bool has_logits = cparams.causal_attn;
|
||||
const bool has_embd = cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE);
|
||||
|
||||
|
||||
const size_t logits_size = has_logits ? n_vocab*n_outputs_max : 0;
|
||||
@@ -17200,20 +17200,23 @@ static int llama_decode_internal(
|
||||
@@ -17192,20 +17192,23 @@ static int llama_decode_internal(
|
||||
// no output
|
||||
res = nullptr;
|
||||
embd = nullptr;
|
||||
- } else if (cparams.embeddings) {
|
||||
- res = nullptr; // do not extract logits for embedding case
|
||||
- embd = nullptr;
|
||||
- for (int i = ggml_graph_n_nodes(gf) - 1; i >= 0; --i) {
|
||||
+ }
|
||||
+
|
||||
+ if (cparams.embeddings) {
|
||||
+ for (int i = ggml_graph_n_nodes(gf) - 1; i >= 0; --i) {
|
||||
for (int i = ggml_graph_n_nodes(gf) - 1; i >= 0; --i) {
|
||||
+ embd = ggml_graph_node(gf, i);
|
||||
if (strcmp(ggml_graph_node(gf, i)->name, "result_embd_pooled") == 0) {
|
||||
- embd = ggml_graph_node(gf, i);
|
||||
@@ -39,5 +47,5 @@ index 4c0a1bb6..17e5bc2a 100644
|
||||
+ res = nullptr; // do not extract logits when not needed
|
||||
+ }
|
||||
// LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs);
|
||||
|
||||
|
||||
ggml_backend_sched_alloc_graph(lctx.sched, gf);
|
||||
@@ -1,3 +1,12 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Mon, 16 Sep 2024 15:53:15 -0700
|
||||
Subject: [PATCH] clip-unicode
|
||||
|
||||
---
|
||||
examples/llava/clip.cpp | 40 +++++++++++++++++++++++++++++++++++++++-
|
||||
1 file changed, 39 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
|
||||
index 14e02c8d..6e849d8e 100644
|
||||
--- a/examples/llava/clip.cpp
|
||||
@@ -1,5 +1,21 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Mon, 16 Sep 2024 15:53:16 -0700
|
||||
Subject: [PATCH] solar-pro
|
||||
|
||||
solar-pro introduces block skip connections where blocks are connected
|
||||
to other, non-sequential blocks with a scale multiple
|
||||
|
||||
this change adds 4 new keys to store the skip connections and one new
|
||||
tensor to store the scalar. the scalar is implemented a 1-dimensional
|
||||
tensor with 2 elements dervied from the model's bskcn_tv configuration.
|
||||
in general, the values are (bskcn_tv, 1 - bskcn_tv)
|
||||
---
|
||||
src/llama.cpp | 269 +++++++++++++++++++++++++++++++++++++++++++++++---
|
||||
1 file changed, 255 insertions(+), 14 deletions(-)
|
||||
|
||||
diff --git a/src/llama.cpp b/src/llama.cpp
|
||||
index bdad28b3..1fe6189a 100644
|
||||
index a639522d..83b80b59 100644
|
||||
--- a/src/llama.cpp
|
||||
+++ b/src/llama.cpp
|
||||
@@ -217,6 +217,7 @@ enum llm_arch {
|
||||
@@ -1,8 +1,17 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Daniel Hiltgen <daniel@ollama.com>
|
||||
Date: Wed, 9 Oct 2024 17:26:23 -0700
|
||||
Subject: [PATCH] conditional-fattn
|
||||
|
||||
---
|
||||
ggml/src/ggml-cuda.cu | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
|
||||
index 8a844b02..61d61542 100644
|
||||
index 809d6ab1..fe77b81c 100644
|
||||
--- a/ggml/src/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda.cu
|
||||
@@ -2310,9 +2310,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
@@ -2347,9 +2347,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
case GGML_OP_ARGSORT:
|
||||
ggml_cuda_op_argsort(ctx, dst);
|
||||
break;
|
||||
@@ -1,3 +1,12 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Jesse Gross <jesse@ollama.com>
|
||||
Date: Mon, 30 Sep 2024 16:31:04 -0700
|
||||
Subject: [PATCH] blas
|
||||
|
||||
---
|
||||
ggml/src/ggml-blas.cpp | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-blas.cpp b/ggml/src/ggml-blas.cpp
|
||||
index 6d99c6be..8e1ab99d 100644
|
||||
--- a/ggml/src/ggml-blas.cpp
|
||||
Reference in New Issue
Block a user