mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
llama: update vendored code to commit 40c6d79f (#7875)
This commit is contained in:
@@ -51,7 +51,7 @@ GPU_DIST_LIB_DEPS= $(sort $(addprefix $(ROCM_DIST_DEPS_DIR)/,$(notdir $(GPU_LIBS
|
||||
ROCBLAS_DIST_DEP_MANIFEST = $(ROCM_DIST_DEPS_DIR)/rocblas/library/TensileManifest.txt
|
||||
|
||||
ifeq ($(OS),linux)
|
||||
GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++11
|
||||
GPU_COMPILER_FPIC := -fPIC -Wno-unused-function -std=gnu++17
|
||||
else ifeq ($(OS),windows)
|
||||
GPU_COMPILER_FPIC := -Xclang --dependent-lib=msvcrt
|
||||
endif
|
||||
@@ -69,11 +69,13 @@ GPU_COMPILER_CUFLAGS = \
|
||||
-O3 \
|
||||
-DGGML_USE_CUDA \
|
||||
-DGGML_BUILD=1 \
|
||||
-DGGML_BACKEND_BUILD=1 \
|
||||
-DGGML_SHARED=1 \
|
||||
-DGGML_BACKEND_SHARED=1 \
|
||||
-DGGML_CUDA_DMMV_X=32 \
|
||||
-DGGML_CUDA_MMV_Y=1 \
|
||||
-DGGML_SCHED_MAX_COPIES=4 \
|
||||
-DGGML_USE_HIPBLAS \
|
||||
-DGGML_USE_HIP \
|
||||
-DGGML_USE_LLAMAFILE \
|
||||
-DHIP_FAST_MATH \
|
||||
-D__HIP_PLATFORM_AMD__=1 \
|
||||
|
||||
@@ -86,13 +86,14 @@ LLAMACPP_FILES=\
|
||||
src/llama-sampling.cpp \
|
||||
src/llama-sampling.h \
|
||||
include/llama.h \
|
||||
ggml/src/llamafile/sgemm.cpp \
|
||||
ggml/src/llamafile/sgemm.h
|
||||
ggml/include/ggml-cpu.h \
|
||||
ggml/src/ggml-cpu/llamafile/sgemm.cpp \
|
||||
ggml/src/ggml-cpu/llamafile/sgemm.h
|
||||
$(foreach name,$(LLAMACPP_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
|
||||
|
||||
# llama.cpp files -> llama/llamafile
|
||||
LLAMAFILE_FILES= \
|
||||
ggml/src/llamafile/sgemm.h
|
||||
ggml/src/ggml-cpu/llamafile/sgemm.h
|
||||
$(foreach name,$(LLAMAFILE_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR)llamafile/)))
|
||||
|
||||
# ggml files -> llama/
|
||||
@@ -101,26 +102,53 @@ GGML_FILES= \
|
||||
ggml/include/ggml.h \
|
||||
ggml/src/ggml-quants.c \
|
||||
ggml/src/ggml-quants.h \
|
||||
ggml/src/ggml-metal.metal \
|
||||
ggml/src/ggml-metal/ggml-metal.metal \
|
||||
ggml/include/ggml-metal.h \
|
||||
ggml/src/ggml-impl.h \
|
||||
ggml/src/ggml-threading.h \
|
||||
ggml/include/ggml-cuda.h \
|
||||
ggml/src/ggml-cuda.cu \
|
||||
ggml/src/ggml-backend-reg.cpp \
|
||||
ggml/src/ggml-metal/ggml-metal-impl.h \
|
||||
ggml/src/ggml-common.h \
|
||||
ggml/include/ggml-backend.h \
|
||||
ggml/src/ggml-backend.c \
|
||||
ggml/src/ggml-backend.cpp \
|
||||
ggml/src/ggml-backend-impl.h \
|
||||
ggml/include/ggml-alloc.h \
|
||||
ggml/src/ggml-alloc.c \
|
||||
ggml/src/ggml-aarch64.h \
|
||||
ggml/src/ggml-aarch64.c \
|
||||
ggml/src/ggml-cpu-impl.h \
|
||||
ggml/include/ggml-blas.h \
|
||||
ggml/src/ggml-blas.cpp
|
||||
ggml/include/ggml-cpp.h \
|
||||
ggml/src/ggml-threading.cpp \
|
||||
ggml/src/ggml-blas/ggml-blas.cpp \
|
||||
ggml/src/ggml-cpu/ggml-cpu.c \
|
||||
ggml/src/ggml-cpu/ggml-cpu-aarch64.c \
|
||||
ggml/src/ggml-cpu/ggml-cpu.cpp \
|
||||
ggml/src/ggml-cpu/ggml-cpu-aarch64.h \
|
||||
ggml/src/ggml-cpu/ggml-cpu-quants.h \
|
||||
ggml/src/ggml-cpu/ggml-cpu-quants.c \
|
||||
ggml/src/ggml-cpu/ggml-cpu-impl.h \
|
||||
ggml/src/ggml-cpu/amx/amx.h \
|
||||
ggml/src/ggml-cpu/amx/amx.cpp \
|
||||
ggml/src/ggml-cpu/amx/mmq.cpp \
|
||||
ggml/src/ggml-cpu/amx/mmq.h
|
||||
$(foreach name,$(GGML_FILES),$(eval $(call vendor_file,$(name),$(DEST_DIR))))
|
||||
|
||||
$(DEST_DIR)ggml-metal-embed.metal: $(DEST_DIR)ggml-common.h $(DEST_DIR)ggml-metal-impl.h
|
||||
@sed -e '/__embed_ggml-common.h__/r $(DEST_DIR)/ggml-common.h' \
|
||||
-e '/__embed_ggml-common.h__/d' \
|
||||
< $(DEST_DIR)/ggml-metal.metal \
|
||||
> $(DEST_DIR)/ggml-metal-embed.metal.tmp
|
||||
@sed -e '/#include "ggml-metal-impl.h"/r $(DEST_DIR)/ggml-metal-impl.h' \
|
||||
-e '/#include "ggml-metal-impl.h"/d' \
|
||||
< $(DEST_DIR)/ggml-metal-embed.metal.tmp \
|
||||
> $(DEST_DIR)/ggml-metal-embed.metal
|
||||
@rm $(DEST_DIR)/ggml-metal-embed.metal.tmp
|
||||
|
||||
VENDORED_FILES += $(DEST_DIR)ggml-metal-embed.metal
|
||||
|
||||
# TODO generalize renaming pattern if we have more of these
|
||||
$(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal.m
|
||||
$(DEST_DIR)ggml-metal_darwin_arm64.m : $(LLAMACPP_REPO)ggml/src/ggml-metal/ggml-metal.m
|
||||
@echo "vendoring $(subst $(LLAMACPP_REPO),,$<)"; \
|
||||
mkdir -p $(dir $@) && \
|
||||
echo "/**" > $@ && \
|
||||
|
||||
@@ -41,7 +41,9 @@ GPU_COMPILER_CUFLAGS = \
|
||||
-DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 \
|
||||
-DGGML_USE_CUDA=1 \
|
||||
-DGGML_SHARED=1 \
|
||||
-DGGML_BACKEND_SHARED=1 \
|
||||
-DGGML_BUILD=1 \
|
||||
-DGGML_BACKEND_BUILD=1 \
|
||||
-DGGML_USE_LLAMAFILE \
|
||||
-DK_QUANTS_PER_ITERATION=2 \
|
||||
-DNDEBUG \
|
||||
|
||||
@@ -15,10 +15,9 @@ DIST_GPU_RUNNER_DEPS_DIR = $(DIST_LIB_DIR)
|
||||
GPU_RUNNER_LIBS = $(wildcard $(addsuffix .$(SHARED_EXT).*,$(addprefix $(GPU_LIB_DIR)/$(SHARED_PREFIX),$(GPU_RUNNER_LIBS_SHORT))))
|
||||
|
||||
GPU_RUNNER_SRCS := \
|
||||
llama/ggml-cuda.cu \
|
||||
$(filter-out $(wildcard llama/ggml-cuda/fattn*.cu),$(wildcard llama/ggml-cuda/*.cu)) \
|
||||
$(wildcard llama/ggml-cuda/template-instances/mmq*.cu) \
|
||||
llama/ggml.c llama/ggml-backend.c llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c
|
||||
llama/ggml.c llama/ggml-backend.cpp llama/ggml-alloc.c llama/ggml-quants.c llama/sgemm.cpp llama/ggml-aarch64.c llama/ggml-threading.cpp
|
||||
GPU_RUNNER_HDRS := \
|
||||
$(wildcard llama/ggml-cuda/*.cuh)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user