Code shuffle to clean up the llm dir

2025-12-12 00:37:04 +00:00 · 2024-01-04 09:40:15 -08:00
parent b5939008a1
commit 77d96da94b
19 changed files with 54 additions and 47 deletions
--- a/llm/generate/gen_common.sh
+++ b/llm/generate/gen_common.sh
@@ -0,0 +1,53 @@
+# common logic accross linux and darwin
+
+init_vars() {
+    LLAMACPP_DIR=../llama.cpp
+    PATCHES="0001-Expose-callable-API-for-server.patch"
+    CMAKE_DEFS=""
+    CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
+    if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on"
+    else
+        # TODO - add additional optimization flags...
+        CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off"
+    fi
+}
+
+git_module_setup() {
+    if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
+        echo "Skipping submodule initialization"
+        return
+    fi
+    git submodule init
+    git submodule update --force ${LLAMACPP_DIR}
+
+}
+
+apply_patches() {
+    # Wire up our CMakefile
+    if ! grep ollama ${LLAMACPP_DIR}/examples/server/CMakeLists.txt; then
+        echo 'include (../../../ext_server/CMakeLists.txt) # ollama' >>${LLAMACPP_DIR}/examples/server/CMakeLists.txt
+    fi
+    # Avoid duplicate main symbols when we link into the cgo binary
+    sed -e 's/int main(/int __main(/g' <${LLAMACPP_DIR}/examples/server/server.cpp >${LLAMACPP_DIR}/examples/server/server.cpp.tmp &&
+        mv ${LLAMACPP_DIR}/examples/server/server.cpp.tmp ${LLAMACPP_DIR}/examples/server/server.cpp
+}
+
+build() {
+    cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
+    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
+}
+
+install() {
+    rm -rf ${BUILD_DIR}/lib
+    mkdir -p ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
+}
+
+# Keep the local tree clean after we're done with the build
+cleanup() {
+    (cd ${LLAMACPP_DIR}/examples/server/ && git checkout CMakeLists.txt server.cpp)
+}
--- a/llm/generate/gen_darwin.sh
+++ b/llm/generate/gen_darwin.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# This script is intended to run inside the go generate
+# working directory must be ./llm/generate/
+
+# TODO - add hardening to detect missing tools (cmake, etc.)
+
+set -ex
+set -o pipefail
+echo "Starting darwin generate script"
+source $(dirname $0)/gen_common.sh
+init_vars
+CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on -DLLAMA_ACCELERATE=on ${CMAKE_DEFS}"
+BUILD_DIR="${LLAMACPP_DIR}/build/darwin/metal"
+case "${GOARCH}" in
+"amd64")
+    CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
+    ;;
+"arm64")
+    CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=arm64 -DCMAKE_OSX_ARCHITECTURES=arm64 ${CMAKE_DEFS}"
+    ;;
+*)
+    echo "GOARCH must be set"
+    echo "this script is meant to be run from within go generate"
+    exit 1
+    ;;
+esac
+
+git_module_setup
+apply_patches
+build
+install
+cleanup
--- a/llm/generate/gen_linux.sh
+++ b/llm/generate/gen_linux.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+# This script is intended to run inside the go generate
+# working directory must be llm/generate/
+
+# First we build our default built-in library which will be linked into the CGO
+# binary as a normal dependency. This default build is CPU based.
+#
+# Then we build a CUDA dynamic library (although statically linked with the CUDA
+# library dependencies for maximum portability)
+#
+# Then if we detect ROCm, we build a dynamically loaded ROCm lib.  ROCm is particularly
+# important to be a dynamic lib even if it's the only GPU library detected because
+# we can't redistribute the objectfiles but must rely on dynamic libraries at
+# runtime, which could lead the server not to start if not present.
+
+set -ex
+set -o pipefail
+
+# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
+amdGPUs() {
+    GPU_LIST=(
+        "gfx803"
+        "gfx900"
+        "gfx906:xnack-"
+        "gfx908:xnack-"
+        "gfx90a:xnack+"
+        "gfx90a:xnack-"
+        "gfx1010"
+        "gfx1012"
+        "gfx1030"
+        "gfx1100"
+        "gfx1101"
+        "gfx1102"
+    )
+    (
+        IFS=$';'
+        echo "'${GPU_LIST[*]}'"
+    )
+}
+
+echo "Starting linux generate script"
+if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
+    export CUDACXX=/usr/local/cuda/bin/nvcc
+fi
+COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
+source $(dirname $0)/gen_common.sh
+init_vars
+git_module_setup
+apply_patches
+
+#
+# CPU first for the default library
+#
+CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
+BUILD_DIR="${LLAMACPP_DIR}/build/linux/cpu"
+
+build
+install
+
+# Placeholder to keep go embed happy until we start building dynamic CPU lib variants
+touch ${BUILD_DIR}/lib/dummy.so
+
+if [ -d /usr/local/cuda/lib64/ ]; then
+    echo "CUDA libraries detected - building dynamic CUDA library"
+    init_vars
+    CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/cuda"
+    CUDA_LIB_DIR=/usr/local/cuda/lib64
+    build
+    install
+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
+        -Wl,--whole-archive \
+        ${BUILD_DIR}/lib/libext_server.a \
+        ${BUILD_DIR}/lib/libcommon.a \
+        ${BUILD_DIR}/lib/libllama.a \
+        -Wl,--no-whole-archive \
+        ${CUDA_LIB_DIR}/libcudart_static.a \
+        ${CUDA_LIB_DIR}/libcublas_static.a \
+        ${CUDA_LIB_DIR}/libcublasLt_static.a \
+        ${CUDA_LIB_DIR}/libcudadevrt.a \
+        ${CUDA_LIB_DIR}/libculibos.a \
+        -lrt -lpthread -ldl -lstdc++ -lm
+fi
+
+if [ -z "${ROCM_PATH}" ]; then
+    # Try the default location in case it exists
+    ROCM_PATH=/opt/rocm
+fi
+
+if [ -z "${CLBlast_DIR}" ]; then
+    # Try the default location in case it exists
+    if [ -d /usr/lib/cmake/CLBlast ]; then
+        export CLBlast_DIR=/usr/lib/cmake/CLBlast
+    fi
+fi
+
+if [ -d "${ROCM_PATH}" ]; then
+    echo "ROCm libraries detected - building dynamic ROCm library"
+    init_vars
+    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
+    BUILD_DIR="${LLAMACPP_DIR}/build/linux/rocm"
+    build
+    install
+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
+        -Wl,--whole-archive \
+        ${BUILD_DIR}/lib/libext_server.a \
+        ${BUILD_DIR}/lib/libcommon.a \
+        ${BUILD_DIR}/lib/libllama.a \
+        -Wl,--no-whole-archive \
+        -lrt -lpthread -ldl -lstdc++ -lm \
+        -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
+        -Wl,-rpath,/opt/rocm/lib,-rpath,/opt/amdgpu/lib/x86_64-linux-gnu/ \
+        -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu
+fi
+
+cleanup
--- a/llm/generate/gen_windows.ps1
+++ b/llm/generate/gen_windows.ps1
@@ -0,0 +1,88 @@
+#!powershell
+
+$ErrorActionPreference = "Stop"
+
+function init_vars {
+    $script:llamacppDir = "../llama.cpp"
+    $script:patches = @("0001-Expose-callable-API-for-server.patch")
+    $script:cmakeDefs = @("-DBUILD_SHARED_LIBS=on", "-DLLAMA_NATIVE=off", "-DLLAMA_F16C=off", "-DLLAMA_FMA=off", "-DLLAMA_AVX512=off", "-DLLAMA_AVX2=off", "-DLLAMA_AVX=on", "-A","x64")
+    $script:cmakeTargets = @("ggml", "ggml_static", "llama", "build_info", "common", "ext_server_shared", "llava_static")
+    if ($env:CGO_CFLAGS -contains "-g") {
+        $script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on")
+        $script:config = "RelWithDebInfo"
+    } else {
+        $script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off")
+        $script:config = "Release"
+    }
+}
+
+function git_module_setup {
+    # TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
+    & git submodule init
+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
+    & git submodule update --force "${script:llamacppDir}"
+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
+}
+
+function apply_patches {
+    # Wire up our CMakefile
+    if (!(Select-String -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Pattern 'ollama')) {
+        Add-Content -Path "${script:llamacppDir}/examples/server/CMakeLists.txt" -Value 'include (../../../ext_server/CMakeLists.txt) # ollama'
+    }
+    # Avoid duplicate main symbols when we link into the cgo binary
+    $content = Get-Content -Path "${script:llamacppDir}/examples/server/server.cpp"
+    $content = $content -replace 'int main\(', 'int __main('
+    Set-Content -Path "${script:llamacppDir}/examples/server/server.cpp" -Value $content
+}
+
+function build {
+    write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
+    & cmake --version
+    & cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
+    write-host "building with: cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })"
+    & cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ })
+    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
+}
+
+function install {
+    rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
+    md "${script:buildDir}/lib" -ea 0 > $null
+    cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"
+    cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
+
+    # Display the dll dependencies in the build log
+    dumpbin /dependents "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" | select-string ".dll"
+}
+
+function cleanup {
+    Set-Location "${script:llamacppDir}/examples/server"
+    git checkout CMakeLists.txt server.cpp
+}
+
+init_vars
+git_module_setup
+apply_patches
+
+# first build CPU based
+$script:buildDir="${script:llamacppDir}/build/windows/cpu"
+
+build
+install
+
+# Then build cuda as a dynamically loaded library
+init_vars
+$script:buildDir="${script:llamacppDir}/build/windows/cuda"
+$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
+build
+install
+
+# TODO - actually implement ROCm support on windows
+$script:buildDir="${script:llamacppDir}/build/windows/rocm"
+
+rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
+md "${script:buildDir}/lib" -ea 0 > $null
+echo $null >> "${script:buildDir}/lib/.generated"
+
+cleanup
+write-host "`ngo generate completed"
--- a/llm/generate/generate_darwin.go
+++ b/llm/generate/generate_darwin.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate sh ./gen_darwin.sh
--- a/llm/generate/generate_linux.go
+++ b/llm/generate/generate_linux.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate bash ./gen_linux.sh
--- a/llm/generate/generate_windows.go
+++ b/llm/generate/generate_windows.go
@@ -0,0 +1,3 @@
+package generate
+
+//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1