Refine build to support CPU only

If someone checks out the ollama repo and doesn't install the CUDA
library, this will ensure they can build a CPU only version
This commit is contained in:
Daniel Hiltgen
2023-12-13 17:26:47 -08:00
parent 51082535e1
commit 1b991d0ba9
9 changed files with 152 additions and 98 deletions

View File

@@ -21,17 +21,7 @@ package llm
#cgo linux CFLAGS: -D_GNU_SOURCE
#cgo linux windows CFLAGS: -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -DGGML_USE_CUBLAS
#cgo linux LDFLAGS: -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib/stubs
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cuda/examples/server/libext_server.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cuda/common/libcommon.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cuda/libllama.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cuda/libggml_static.a
// Note: the following requires cuda library presence on linux to build, even if you only have rocm or CPU only
#cgo linux LDFLAGS: /usr/local/cuda/lib64/libcudart_static.a
#cgo linux LDFLAGS: /usr/local/cuda/lib64/libcublas_static.a
#cgo linux LDFLAGS: /usr/local/cuda/lib64/libcublasLt_static.a
#cgo linux LDFLAGS: /usr/local/cuda/lib64/libcudadevrt.a
#cgo linux LDFLAGS: /usr/local/cuda/lib64/libculibos.a
#cgo linux LDFLAGS: ${SRCDIR}/llama.cpp/gguf/build/cuda/libollama.a
#cgo linux LDFLAGS: -lrt -lpthread -ldl -lstdc++ -lm
#cgo windows LDFLAGS: -L${SRCDIR}/llama.cpp/gguf/build/wincuda/dist/bin
#cgo windows LDFLAGS: -lext_server_shared -lpthread

View File

@@ -13,28 +13,43 @@ source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
CMAKE_DEFS="-DLLAMA_CUBLAS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
if [ -d /usr/local/cuda/lib64/ ] ; then
CMAKE_DEFS="-DLLAMA_CUBLAS=on -DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
else
CMAKE_DEFS="-DCMAKE_POSITION_INDEPENDENT_CODE=on -DLLAMA_NATIVE=off -DLLAMA_AVX=on -DLLAMA_AVX2=off -DLLAMA_AVX512=off -DLLAMA_FMA=off -DLLAMA_F16C=off ${CMAKE_DEFS}"
fi
BUILD_DIR="gguf/build/cuda"
LIB_DIR="${BUILD_DIR}/lib"
mkdir -p ../../dist/
build
# TODO - explore mechanism to soften the hard cuda dependency on linux
# by conditionally building some archive here that aggregates the cuda libs if present
# so that the cgo flags link this intermediate archive instead of the underlying cuda libs
#
# gcc -fPIC -g -shared -o ${LIB_DIR}/libcuda_server.so \
# -Wl,--whole-archive \
# ${BUILD_DIR}/examples/server/CMakeFiles/ext_server.dir/server.cpp.o \
# ${BUILD_DIR}/common/libcommon.a \
# ${BUILD_DIR}/libllama.a \
# ${BUILD_DIR}/examples/llava/libllava_static.a \
# -Wl,--no-whole-archive \
# -lrt -lpthread -ldl -lstdc++ -lm \
# /usr/local/cuda/lib64/libcudart_static.a \
# /usr/local/cuda/lib64/libcublas_static.a \
# /usr/local/cuda/lib64/libcublasLt_static.a \
# /usr/local/cuda/lib64/libcudadevrt.a \
# /usr/local/cuda/lib64/libculibos.a
if [ -d /usr/local/cuda/lib64/ ] ; then
pwd
ar -M <<EOF
create ${BUILD_DIR}/libollama.a
addlib ${BUILD_DIR}/examples/server/libext_server.a
addlib ${BUILD_DIR}/common/libcommon.a
addlib ${BUILD_DIR}/libllama.a
addlib ${BUILD_DIR}/libggml_static.a
addlib /usr/local/cuda/lib64/libcudart_static.a
addlib /usr/local/cuda/lib64/libcublas_static.a
addlib /usr/local/cuda/lib64/libcublasLt_static.a
addlib /usr/local/cuda/lib64/libcudadevrt.a
addlib /usr/local/cuda/lib64/libculibos.a
save
end
EOF
else
ar -M <<EOF
create ${BUILD_DIR}/libollama.a
addlib ${BUILD_DIR}/examples/server/libext_server.a
addlib ${BUILD_DIR}/common/libcommon.a
addlib ${BUILD_DIR}/libllama.a
addlib ${BUILD_DIR}/libggml_static.a
save
end
EOF
fi
if [ -z "${ROCM_PATH}" ] ; then
# Try the default location in case it exists