Switch windows build to fully dynamic

Refactor where we store build outputs, and support a fully dynamic loading model on windows so the base executable has no special dependencies thus doesn't require a special PATH.
2025-12-11 16:26:59 +00:00 · 2023-12-23 11:35:44 -08:00
parent 9a70aecccb
commit d966b730ac
17 changed files with 379 additions and 228 deletions
--- a/llm/llama.cpp/gen_common.sh
+++ b/llm/llama.cpp/gen_common.sh
@@ -39,6 +39,15 @@ build() {
    cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
 }

+install() {
+    rm -rf ${BUILD_DIR}/lib
+    mkdir -p ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/examples/server/libext_server.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/common/libcommon.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/libllama.a ${BUILD_DIR}/lib
+    cp ${BUILD_DIR}/libggml_static.a ${BUILD_DIR}/lib
+}
+
 # Keep the local tree clean after we're done with the build
 cleanup() {
    (cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
--- a/llm/llama.cpp/gen_darwin.sh
+++ b/llm/llama.cpp/gen_darwin.sh
@@ -10,7 +10,7 @@ echo "Starting darwin generate script"
 source $(dirname $0)/gen_common.sh
 init_vars
 CMAKE_DEFS="-DCMAKE_OSX_DEPLOYMENT_TARGET=11.0 -DLLAMA_METAL=on ${CMAKE_DEFS}"
-BUILD_DIR="gguf/build/metal"
+BUILD_DIR="gguf/build/darwin/metal"
 case "${GOARCH}" in
 "amd64")
    CMAKE_DEFS="-DCMAKE_SYSTEM_PROCESSOR=x86_64 -DCMAKE_OSX_ARCHITECTURES=x86_64 ${CMAKE_DEFS}"
@@ -28,4 +28,5 @@ esac
 git_module_setup
 apply_patches
 build
+install
 cleanup
--- a/llm/llama.cpp/gen_linux.sh
+++ b/llm/llama.cpp/gen_linux.sh
@@ -21,34 +21,33 @@ if [ -z "${CUDACXX}" -a -x /usr/local/cuda/bin/nvcc ]; then
    export CUDACXX=/usr/local/cuda/bin/nvcc
 fi
 COMMON_CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_ACCELERATE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off"
-OLLAMA_DYN_LIB_DIR="gguf/build/lib"
 source $(dirname $0)/gen_common.sh
 init_vars
 git_module_setup
 apply_patches

-mkdir -p ${OLLAMA_DYN_LIB_DIR}
-touch ${OLLAMA_DYN_LIB_DIR}/.generated
-
 #
 # CPU first for the default library
 #
 CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-BUILD_DIR="gguf/build/cpu"
+BUILD_DIR="gguf/build/linux/cpu"
+
 build
+install

 if [ -d /usr/local/cuda/lib64/ ]; then
    echo "CUDA libraries detected - building dynamic CUDA library"
    init_vars
    CMAKE_DEFS="-DLLAMA_CUBLAS=on ${COMMON_CMAKE_DEFS} ${CMAKE_DEFS}"
-    BUILD_DIR="gguf/build/cuda"
+    BUILD_DIR="gguf/build/linux/cuda"
    CUDA_LIB_DIR=/usr/local/cuda/lib64
    build
-    gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/libcuda_server.so \
+    install
+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
        -Wl,--whole-archive \
-        ${BUILD_DIR}/examples/server/libext_server.a \
-        ${BUILD_DIR}/common/libcommon.a \
-        ${BUILD_DIR}/libllama.a \
+        ${BUILD_DIR}/lib/libext_server.a \
+        ${BUILD_DIR}/lib/libcommon.a \
+        ${BUILD_DIR}/lib/libllama.a \
        -Wl,--no-whole-archive \
        ${CUDA_LIB_DIR}/libcudart_static.a \
        ${CUDA_LIB_DIR}/libcublas_static.a \
@@ -74,13 +73,14 @@ if [ -d "${ROCM_PATH}" ]; then
    echo "ROCm libraries detected - building dynamic ROCm library"
    init_vars
    CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DLLAMA_HIPBLAS=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102' -DGPU_TARGETS='gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102'"
-    BUILD_DIR="gguf/build/rocm"
+    BUILD_DIR="gguf/build/linux/rocm"
    build
-    gcc -fPIC -g -shared -o ${OLLAMA_DYN_LIB_DIR}/librocm_server.so \
+    install
+    gcc -fPIC -g -shared -o ${BUILD_DIR}/lib/libext_server.so \
        -Wl,--whole-archive \
-        ${BUILD_DIR}/examples/server/libext_server.a \
-        ${BUILD_DIR}/common/libcommon.a \
-        ${BUILD_DIR}/libllama.a \
+        ${BUILD_DIR}/lib/libext_server.a \
+        ${BUILD_DIR}/lib/libcommon.a \
+        ${BUILD_DIR}/lib/libllama.a \
        -Wl,--no-whole-archive \
        -lrt -lpthread -ldl -lstdc++ -lm \
        -L/opt/rocm/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ \
--- a/llm/llama.cpp/gen_windows.ps1
+++ b/llm/llama.cpp/gen_windows.ps1
@@ -44,6 +44,13 @@ function build {
    if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
 }

+function install {
+    rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
+    md "${script:buildDir}/lib" -ea 0 > $null
+    cp "${script:buildDir}/bin/${script:config}/ext_server_shared.dll" "${script:buildDir}/lib"
+    cp "${script:buildDir}/bin/${script:config}/llama.dll" "${script:buildDir}/lib"
+}
+
 function cleanup {
    Set-Location "gguf/examples/server"
    git checkout CMakeLists.txt server.cpp
@@ -54,42 +61,24 @@ git_module_setup
 apply_patches

 # first build CPU based
-$script:buildDir="gguf/build/wincpu"
+$script:buildDir="gguf/build/windows/cpu"

 build
-# install
-
-md gguf/build/lib -ea 0
-md gguf/build/wincpu/dist/lib -ea 0
-cp -force gguf/build/wincpu/bin/$script:config/ext_server_shared.dll gguf/build/lib/ext_server_shared.dll
-cp -force gguf/build/wincpu/bin/$script:config/llama.dll gguf/build/lib/llama.dll
-
-# Nope, this barfs on lots of symbol problems
-#mv gguf/build/wincpu/examples/server/$script:config/ext_server_shared.dll gguf/build/wincpu/dist/lib/cpu_server.lib
-# Nope: this needs lots of include paths to pull in things like msvcprt.lib and other deps
-# & cl.exe `
-#     gguf/build/wincpu/examples/server/$script:config/ext_server.lib `
-#     gguf/build/wincpu/common/$script:config/common.lib `
-#     gguf/build/wincpu/$script:config/llama.lib `
-#     gguf/build/wincpu/$script:config/ggml_static.lib `
-#     /link /DLL /DEF:cpu_server.def /NOENTRY /MACHINE:X64  /OUT:gguf/build/wincpu/dist/lib/cpu_server.dll
-# if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
+install

 # Then build cuda as a dynamically loaded library
 init_vars
-$script:buildDir="gguf/build/wincuda"
-$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON", "-DBUILD_SHARED_LIBS=on")
+$script:buildDir="gguf/build/windows/cuda"
+$script:cmakeDefs += @("-DLLAMA_CUBLAS=ON")
 build
-# install
-cp -force gguf/build/wincuda/bin/$script:config/ext_server_shared.dll gguf/build/lib/cuda_server.dll
+install

-# TODO - more to do here to create a usable dll
+# TODO - actually implement ROCm support on windows
+$script:buildDir="gguf/build/windows/rocm"

-
-# TODO - implement ROCm support on windows
-md gguf/build/winrocm/lib -ea 0
-echo $null >> gguf/build/winrocm/lib/.generated
+rm -ea 0 -recurse -force -path "${script:buildDir}/lib"
+md "${script:buildDir}/lib" -ea 0 > $null
+echo $null >> "${script:buildDir}/lib/.generated"

 cleanup
-
-write-host "go generate completed"
+write-host "`ngo generate completed"