Remove CPU build, fixup linux build script

2025-12-11 16:26:59 +00:00 · 2023-12-21 16:54:54 -08:00
parent 325d74985b
commit fa24e73b82
6 changed files with 21 additions and 58 deletions
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -3,7 +3,7 @@
 init_vars() {
    LLAMACPP_DIR=gguf
    PATCHES="0001-Expose-callable-API-for-server.patch"
-    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
+    CMAKE_DEFS="-DLLAMA_ACCELERATE=on -DLLAMA_SERVER_VERBOSE=off"
    # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
--- a/llm/llama.cpp/gen_linux.sh
+++ b/llm/llama.cpp/gen_linux.sh
@@ -22,13 +22,14 @@ if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
 fi
 COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
 OLLAMA_DYN_LIB_DIR="gguf/build/lib"
-mkdir -p ${OLLAMA_DYN_LIB_DIR}
-touch ${OLLAMA_DYN_LIB_DIR}/.generated
 source $(dirname $0)/gen_common.sh
 init_vars
 git_module_setup
 apply_patches

+mkdir -p ${OLLAMA_DYN_LIB_DIR}
+touch ${OLLAMA_DYN_LIB_DIR}/.generated
+
 #
 # CPU first for the default library
 #
--- a/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
+++ b/llm/llama.cpp/patches/0001-Expose-callable-API-for-server.patch
@@ -1,4 +1,4 @@
-From 7184ae16e8fd0e9e91cac4c81daa323057fa992b Mon Sep 17 00:00:00 2001
+From 4c72576c5f6c2217b1ecf7fd8523616acc5526ae Mon Sep 17 00:00:00 2001
 From: Daniel Hiltgen <daniel@ollama.com>
 Date: Mon, 13 Nov 2023 12:25:58 -0800
 Subject: [PATCH] Expose callable API for server
@@ -6,10 +6,10 @@ Subject: [PATCH] Expose callable API for server
 This adds an extern "C" interface within the example server
 ---
 examples/server/CMakeLists.txt |  24 +++
- examples/server/server.cpp     | 276 +++++++++++++++++++++++++++++++++
+ examples/server/server.cpp     | 279 +++++++++++++++++++++++++++++++++
 examples/server/server.h       |  89 +++++++++++
 ggml-cuda.cu                   |   1 +
- 4 files changed, 390 insertions(+)
+ 4 files changed, 393 insertions(+)
 create mode 100644 examples/server/server.h

 diff --git a/examples/server/CMakeLists.txt b/examples/server/CMakeLists.txt
@@ -46,7 +46,7 @@ index 859cd12..4ea47a7 100644
 +endif()
 \ No newline at end of file
 diff --git a/examples/server/server.cpp b/examples/server/server.cpp
-index 0403853..065420c 100644
+index 0403853..5e78e4d 100644
 --- a/examples/server/server.cpp
 +++ b/examples/server/server.cpp
@@ -5,6 +5,9 @@
@@ -67,7 +67,7 @@ index 0403853..065420c 100644
 int main(int argc, char **argv)
 {
 #if SERVER_VERBOSE != 1
-@@ -3123,3 +3127,275 @@ int main(int argc, char **argv)
+@@ -3123,3 +3127,278 @@ int main(int argc, char **argv)
     llama_backend_free();
     return 0;
 }
@@ -80,6 +80,9 @@ index 0403853..065420c 100644
 +
 +void llama_server_init(ext_server_params *sparams, ext_server_resp_t *err)
 +{
+#if SERVER_VERBOSE != 1
+    log_disable();
+#endif
 +    assert(err != NULL && sparams != NULL);
 +    err->id = 0;
 +    err->msg[0] = '\0';