llm: update llama.cpp commit to 7c26775 (#4896)

* llm: update llama.cpp submodule to `7c26775`

* disable `LLAMA_BLAS` for now

* `-DLLAMA_OPENMP=off`
This commit is contained in:
Jeffrey Morgan
2024-06-17 15:56:16 -04:00
committed by GitHub
parent 4ad0d4d6d3
commit 152fc202f5
6 changed files with 31 additions and 29 deletions

View File

@@ -1,8 +1,8 @@
diff --git a/common/common.cpp b/common/common.cpp
index ba1ecf0e..cead57cc 100644
index 73ff0e85..6adb1a92 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1836,6 +1836,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
@@ -2447,6 +2447,8 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
mparams.use_mmap = params.use_mmap;
mparams.use_mlock = params.use_mlock;
mparams.check_tensors = params.check_tensors;
@@ -12,20 +12,20 @@ index ba1ecf0e..cead57cc 100644
mparams.kv_overrides = NULL;
} else {
diff --git a/common/common.h b/common/common.h
index d80344f2..71e84834 100644
index 58ed72f4..0bb2605e 100644
--- a/common/common.h
+++ b/common/common.h
@@ -174,6 +174,13 @@ struct gpt_params {
// multimodal models (see examples/llava)
@@ -180,6 +180,13 @@ struct gpt_params {
std::string mmproj = ""; // path to multimodal projector
std::vector<std::string> image; // path to image file(s)
+
+ // Called with a progress value between 0.0 and 1.0. Pass NULL to disable.
+ // If the provided progress_callback returns true, model loading continues.
+ // If it returns false, model loading is immediately aborted.
+ llama_progress_callback progress_callback = NULL;
+ // context pointer passed to the progress callback
+ void * progress_callback_user_data;
};
void gpt_params_handle_model_default(gpt_params & params);
+
// server params
int32_t port = 8080; // server listens on this network port
int32_t timeout_read = 600; // http read timeout in seconds

View File

@@ -1,8 +1,8 @@
diff --git a/llama.cpp b/llama.cpp
index 40d2ec2c..74f3ee9c 100644
index 61948751..4b72a293 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4642,16 +4642,7 @@ static void llm_load_vocab(
@@ -4824,16 +4824,7 @@ static void llm_load_vocab(
// for now, only BPE models have pre-tokenizers
if (vocab.type == LLAMA_VOCAB_TYPE_BPE) {
@@ -15,14 +15,14 @@ index 40d2ec2c..74f3ee9c 100644
- LLAMA_LOG_WARN("%s: ************************************ \n", __func__);
- LLAMA_LOG_WARN("%s: \n", __func__);
- vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
- } else if (
+ if (
tokenizer_pre == "default") {
- } else if (tokenizer_pre == "default") {
+ if (tokenizer_pre == "default") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_DEFAULT;
} else if (
@@ -4703,7 +4694,8 @@ static void llm_load_vocab(
tokenizer_pre == "smaug-bpe") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_SMAUG;
tokenizer_pre == "llama3" ||
@@ -4888,7 +4879,8 @@ static void llm_load_vocab(
tokenizer_pre == "poro-chat") {
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_PORO;
} else {
- throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
+ LLAMA_LOG_WARN("%s: missing or unrecognized pre-tokenizer type, using: 'default'\n", __func__);