Revamp the dynamic library shim

This switches the default llama.cpp to be CPU based, and builds the GPU variants as dynamically loaded libraries which we can select at runtime. This also bumps the ROCm library to version 6 given 5.7 builds don't work on the latest ROCm library that just shipped.
2025-12-11 16:26:59 +00:00 · 2023-12-20 10:36:01 -08:00
parent 1d1eb1688c
commit 7555ea44f8
14 changed files with 272 additions and 280 deletions
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -6,7 +6,7 @@ init_vars() {
    CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
    # TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
-    if echo "${CGO_CFLAGS}" | grep -- '-g' > /dev/null ; then
+    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}"
    else
        # TODO - add additional optimization flags...
@@ -15,7 +15,7 @@ init_vars() {
 }

 git_module_setup() {
-    if [ -n "${OLLAMA_SKIP_PATCHING}" ] ; then
+    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule initialization"
        return
    fi
@@ -25,13 +25,13 @@ git_module_setup() {
 }

 apply_patches() {
-    if [ -n "${OLLAMA_SKIP_PATCHING}" ] ; then
+    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
        echo "Skipping submodule patching"
        return
    fi
    # Workaround git apply not handling creation well for iteration
    rm -f gguf/examples/server/server.h
-    for patch in ${PATCHES} ; do
+    for patch in ${PATCHES}; do
        git -C gguf apply ../patches/${patch}
    done
 }
@@ -39,4 +39,4 @@ apply_patches() {
 build() {
    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
-}
+}