Revamp the dynamic library shim

This switches the default llama.cpp to be CPU based, and builds the GPU variants
as dynamically loaded libraries which we can select at runtime.

This also bumps the ROCm library to version 6 given 5.7 builds don't work
on the latest ROCm library that just shipped.
This commit is contained in:
Daniel Hiltgen
2023-12-20 10:36:01 -08:00
parent 1d1eb1688c
commit 7555ea44f8
14 changed files with 272 additions and 280 deletions

View File

@@ -6,7 +6,7 @@ init_vars() {
CMAKE_DEFS="-DLLAMA_ACCELERATE=on"
# TODO - LLAMA_K_QUANTS is stale and needs to be mapped to newer cmake settings
CMAKE_TARGETS="--target ggml --target ggml_static --target llama --target build_info --target common --target ext_server --target llava_static"
if echo "${CGO_CFLAGS}" | grep -- '-g' > /dev/null ; then
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on ${CMAKE_DEFS}"
else
# TODO - add additional optimization flags...
@@ -15,7 +15,7 @@ init_vars() {
}
git_module_setup() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ] ; then
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule initialization"
return
fi
@@ -25,13 +25,13 @@ git_module_setup() {
}
apply_patches() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ] ; then
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule patching"
return
fi
# Workaround git apply not handling creation well for iteration
rm -f gguf/examples/server/server.h
for patch in ${PATCHES} ; do
for patch in ${PATCHES}; do
git -C gguf apply ../patches/${patch}
done
}
@@ -39,4 +39,4 @@ apply_patches() {
build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}
}