Refactor how we augment llama.cpp

This changes the model for llama.cpp inclusion so we're not applying a patch,
but instead have the C++ code directly in the ollama tree, which should make it
easier to refine and update over time.
This commit is contained in:
Daniel Hiltgen
2023-12-22 09:51:53 -08:00
parent 22cd5eaab6
commit 9a70aecccb
10 changed files with 457 additions and 508 deletions

View File

@@ -25,18 +25,21 @@ git_module_setup() {
}
apply_patches() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule patching"
return
# Wire up our CMakefile
if ! grep ollama.txt gguf/examples/server/CMakeLists.txt; then
echo 'include (../../../ollama.txt)' >>gguf/examples/server/CMakeLists.txt
fi
# Workaround git apply not handling creation well for iteration
rm -f gguf/examples/server/server.h
for patch in ${PATCHES}; do
git -C gguf apply ../patches/${patch}
done
# Avoid duplicate main symbols when we link into the cgo binary
sed -e 's/int main(/int __main(/g' <./gguf/examples/server/server.cpp >./gguf/examples/server/server.cpp.tmp &&
mv ./gguf/examples/server/server.cpp.tmp ./gguf/examples/server/server.cpp
}
build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}
# Keep the local tree clean after we're done with the build
cleanup() {
(cd gguf/examples/server/ && git checkout CMakeLists.txt server.cpp)
}