Add cgo implementation for llama.cpp

Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions.
2025-12-12 00:37:04 +00:00 · 2023-11-13 17:20:34 -08:00
parent 5e7fd6906f
commit d4cd695759
27 changed files with 1189 additions and 765 deletions
--- a/llm/llama.cpp/gen_darwin.sh
+++ b/llm/llama.cpp/gen_darwin.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# This script is intended to run inside the go generate
+# working directory must be ../llm/llama.cpp
+
+# TODO - add hardening to detect missing tools (cmake, etc.)
+
+set -ex
+set -o pipefail
+echo "Starting darwin generate script"
+source $(dirname $0)/gen_common.sh
+init_vars
+CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 ${CMAKE_DEFS}"
+case "${GOARCH}" in
+    "amd64")
+        CMAKE_DEFS="-DLLAMA_METAL=off -DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 ${CMAKE_DEFS}"
+        BUILD_DIR="gguf/build/cpu"
+        ;;
+     "arm64")
+        CMAKE_DEFS="-DLLAMA_METAL=on -DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}"
+        BUILD_DIR="gguf/build/metal"
+        ;;
+    *)
+        echo "GOARCH must be set"
+        echo "this script is meant to be run from within go generate"
+        exit 1
+        ;;
+esac
+
+git_module_setup
+apply_patches
+build
+
+# Enable local debug/run usecase
+if [ -e "gguf/ggml-metal.metal" ]; then
+    cp gguf/ggml-metal.metal ../../
+fi