mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-13 01:07:12 +00:00
llama: update to commit e1e8e099 (#10513)
This commit is contained in:
@@ -22,10 +22,10 @@ multiple batches of processing until everything is complete.
|
||||
4 files changed, 51 insertions(+), 106 deletions(-)
|
||||
|
||||
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
|
||||
index 0343ba8a..4b3e6a83 100644
|
||||
index cd06ad91..77177c5e 100644
|
||||
--- a/src/llama-context.cpp
|
||||
+++ b/src/llama-context.cpp
|
||||
@@ -594,13 +594,12 @@ llm_graph_result_ptr llama_context::build_kv_self_shift(
|
||||
@@ -583,13 +583,12 @@ llm_graph_result_ptr llama_context::build_kv_self_shift(
|
||||
|
||||
llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
ggml_context * ctx0,
|
||||
@@ -41,7 +41,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
#if 0
|
||||
// CPU defrag
|
||||
//
|
||||
@@ -672,32 +671,20 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
@@ -661,32 +660,20 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
ggml_backend_tensor_set(v_l[il], buf_v.data(), 0, buf_v.size());
|
||||
}
|
||||
#else
|
||||
@@ -79,7 +79,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
|
||||
ggml_tensor * view_v_src;
|
||||
ggml_tensor * view_v_dst;
|
||||
@@ -705,34 +692,30 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
@@ -694,34 +681,30 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
if (cparams.flash_attn) {
|
||||
// NOTE: the V cache is not transposed when using flash attention
|
||||
view_v_src = ggml_view_2d(ctx0, kv_self->v_l[il],
|
||||
@@ -122,7 +122,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
#endif
|
||||
|
||||
return res;
|
||||
@@ -741,8 +724,6 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
@@ -730,8 +713,6 @@ llm_graph_result_ptr llama_context::build_kv_self_defrag(
|
||||
void llama_context::kv_self_update() {
|
||||
auto & kv = kv_self;
|
||||
|
||||
@@ -131,7 +131,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
if (kv->has_shift) {
|
||||
if (!kv->get_can_shift()) {
|
||||
GGML_ABORT("The current context does not support K-shift");
|
||||
@@ -763,8 +744,6 @@ void llama_context::kv_self_update() {
|
||||
@@ -752,8 +733,6 @@ void llama_context::kv_self_update() {
|
||||
res->set_inputs(nullptr);
|
||||
|
||||
graph_compute(gf, false);
|
||||
@@ -140,7 +140,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
}
|
||||
|
||||
{
|
||||
@@ -779,49 +758,28 @@ void llama_context::kv_self_update() {
|
||||
@@ -768,49 +747,28 @@ void llama_context::kv_self_update() {
|
||||
// defragment the KV cache if needed
|
||||
if (kv->do_defrag) {
|
||||
LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__);
|
||||
@@ -202,7 +202,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
}
|
||||
|
||||
enum llama_pooling_type llama_context::pooling_type() const {
|
||||
@@ -1305,9 +1263,12 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
@@ -1294,9 +1252,12 @@ int llama_context::decode(llama_batch & inp_batch) {
|
||||
// find KV slot
|
||||
{
|
||||
if (!kv_self->find_slot(ubatch)) {
|
||||
@@ -219,7 +219,7 @@ index 0343ba8a..4b3e6a83 100644
|
||||
|
||||
if (!kv_self->recurrent) {
|
||||
diff --git a/src/llama-context.h b/src/llama-context.h
|
||||
index baa03276..a59ff8fd 100644
|
||||
index a50c4afa..30f84bfd 100644
|
||||
--- a/src/llama-context.h
|
||||
+++ b/src/llama-context.h
|
||||
@@ -5,6 +5,7 @@
|
||||
@@ -230,7 +230,7 @@ index baa03276..a59ff8fd 100644
|
||||
|
||||
#include "ggml-cpp.h"
|
||||
|
||||
@@ -180,7 +181,8 @@ private:
|
||||
@@ -179,7 +180,8 @@ private:
|
||||
|
||||
llm_graph_result_ptr build_kv_self_defrag(
|
||||
ggml_context * ctx0,
|
||||
|
||||
Reference in New Issue
Block a user