update llama.cpp submodule to ceca1ae (#3064)

2025-12-12 08:47:01 +00:00 · 2024-03-11 12:57:48 -07:00
parent f878e91070
commit 369eda65f5
6 changed files with 36 additions and 61 deletions
--- a/llm/patches/01-cache.diff
+++ b/llm/patches/01-cache.diff
@@ -1,19 +1,21 @@
 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
-index f255ad76..914ecfdd 100644
+index 8fe5e0b1..3e82acb9 100644
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
-@@ -1101,12 +1101,13 @@ struct server_context {
+@@ -997,13 +997,15 @@ struct llama_server_context
+                 slot.n_sent_text += result.text_to_send.size();
                 // add the token to slot queue and cache
             }
- 
 -            slot.add_token_string(result);
-             if (slot.params.stream) {
+
+             if (slot.params.stream)
+             {
                 send_partial_response(slot, result);
             }
         }
 
 +        slot.add_token_string(result);
 +
-         if (incomplete) {
+         if (incomplete)
+         {
             slot.has_next_token = true;
-         }