Add cgo implementation for llama.cpp

Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions.
2025-12-12 00:37:04 +00:00 · 2023-11-13 17:20:34 -08:00
parent 5e7fd6906f
commit d4cd695759
27 changed files with 1189 additions and 765 deletions
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -0,0 +1,34 @@
+# common logic accross linux and darwin
+
+init_vars() {
+    PATCHES="0001-Expose-callable-API-for-server.patch"
+    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
+    # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
+    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server"
+    if echo "${CGO_CFLAGS}" | grep -- '-g' > /dev/null ; then
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}"
+    else
+        # TODO - add additional optimization flags...
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release ${CMAKE_DEFS}"
+    fi
+}
+
+git_module_setup() {
+    # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
+    git submodule init
+    git submodule update --force gguf
+
+}
+
+apply_patches() {
+    # Workaround git apply not handling creation well for iteration
+    rm -f gguf/examples/server/server.h
+    for patch in ${PATCHES} ; do
+        git -C gguf apply ../patches/${patch}
+    done
+}
+
+build() {
+    cmake -S gguf -B ${BUILD_DIR} ${CMAKE_DEFS}
+    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8 
+}