update llama.cpp submodule to f364eb6 (#4060)

2025-12-11 00:07:07 +00:00 · 2024-04-30 17:25:39 -04:00
parent 8488388cbd
commit 18d9a7e1f1
2 changed files with 6 additions and 3 deletions
--- a/llm/ext_server/server.cpp
+++ b/llm/ext_server/server.cpp
@@ -1032,7 +1032,7 @@ struct llama_server_context
            slot.has_next_token = false;
        }
-        if (!slot.cache_tokens.empty() && result.tok == llama_token_eos(model))
+        if (llama_token_is_eog(model, result.tok))
        {
            slot.stopped_eos = true;
            slot.has_next_token = false;
@@ -1144,12 +1144,15 @@ struct llama_server_context
        res.result_json = json
        {
            {"content",    tkn.text_to_send},
            {"stop",       false},
            {"slot_id",    slot.id},
            {"multimodal", multimodal}
        };
        if (!llama_token_is_eog(model, tkn.tok)) {
            res.result_json["content"] = tkn.text_to_send;
        }
        if (slot.sparams.n_probs > 0)
        {
            std::vector<completion_token_output> probs_output = {};
--- a/llm/llama.cpp
+++ b/llm/llama.cpp