llm: update llama.cpp commit to 8962422 (#6618)

This commit is contained in:
Jeffrey Morgan
2024-09-03 21:12:39 -04:00
committed by GitHub
parent f29b167e1a
commit 5e2653f9fe
7 changed files with 32 additions and 422 deletions

View File

@@ -425,7 +425,7 @@ struct llama_server_context
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model);
add_bos_token = llama_add_bos_token(model);
return true;
}
@@ -1031,7 +1031,7 @@ struct llama_server_context
continue;
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.cpuparams.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG_TEE("Error processing the given image");
return false;
}
@@ -2014,7 +2014,7 @@ static void server_print_usage(const char *argv0, const gpt_params &params,
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.cpuparams.n_threads);
printf(" -tb N, --threads-batch N number of threads to use during batch and prompt processing (default: same as --threads)\n");
printf(" --threads-http N number of threads in the http server pool to process requests (default: max(hardware concurrency - 1, --parallel N + 2))\n");
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
@@ -2287,7 +2287,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param = true;
break;
}
params.n_threads = std::stoi(argv[i]);
params.cpuparams.n_threads = std::stoi(argv[i]);
}
else if (arg == "--grp-attn-n" || arg == "-gan")
{
@@ -2315,7 +2315,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
invalid_param = true;
break;
}
params.n_threads_batch = std::stoi(argv[i]);
params.cpuparams_batch.n_threads = std::stoi(argv[i]);
}
else if (arg == "--threads-http")
{
@@ -2626,6 +2626,11 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, g
params.kv_overrides.back().key[0] = 0;
}
postprocess_cpu_params(params.cpuparams, nullptr);
postprocess_cpu_params(params.cpuparams_batch, &params.cpuparams);
postprocess_cpu_params(params.draft_cpuparams, &params.cpuparams);
postprocess_cpu_params(params.draft_cpuparams_batch, &params.cpuparams_batch);
if (invalid_param)
{
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
@@ -2775,8 +2780,8 @@ int main(int argc, char **argv) {
{"commit", LLAMA_COMMIT}});
LOG_INFO("system info", {
{"n_threads", params.n_threads},
{"n_threads_batch", params.n_threads_batch},
{"n_threads", params.cpuparams.n_threads},
{"n_threads_batch", params.cpuparams_batch.n_threads},
{"total_threads", std::thread::hardware_concurrency()},
{"system_info", llama_print_system_info()},
});