mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
next build (#8539)
* add build to .dockerignore * test: only build one arch * add build to .gitignore * fix ccache path * filter amdgpu targets * only filter if autodetecting * Don't clobber gpu list for default runner This ensures the GPU specific environment variables are set properly * explicitly set CXX compiler for HIP * Update build_windows.ps1 This isn't complete, but is close. Dependencies are missing, and it only builds the "default" preset. * build: add ollama subdir * add .git to .dockerignore * docs: update development.md * update build_darwin.sh * remove unused scripts * llm: add cwd and build/lib/ollama to library paths * default DYLD_LIBRARY_PATH to LD_LIBRARY_PATH in runner on macOS * add additional cmake output vars for msvc * interim edits to make server detection logic work with dll directories like lib/ollama/cuda_v12 * remove unncessary filepath.Dir, cleanup * add hardware-specific directory to path * use absolute server path * build: linux arm * cmake install targets * remove unused files * ml: visit each library path once * build: skip cpu variants on arm * build: install cpu targets * build: fix workflow * shorter names * fix rocblas install * docs: clean up development.md * consistent build dir removal in development.md * silence -Wimplicit-function-declaration build warnings in ggml-cpu * update readme * update development readme * llm: update library lookup logic now that there is one runner (#8587) * tweak development.md * update docs * add windows cuda/rocm tests --------- Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Daniel Hiltgen <daniel@ollama.com>
This commit is contained in:
@@ -4,39 +4,44 @@ Date: Thu, 6 Jun 2024 23:55:47 -0700
|
||||
Subject: [PATCH] cuda
|
||||
|
||||
---
|
||||
ggml/src/ggml-backend.cpp | 5 +++++
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 4 ++++
|
||||
2 files changed, 9 insertions(+)
|
||||
ggml/src/ggml-backend.cpp | 1 -
|
||||
ggml/src/ggml-cuda/ggml-cuda.cu | 1 +
|
||||
ggml/src/ggml-metal/ggml-metal.m | 1 +
|
||||
3 files changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp
|
||||
index e2d6c405..1b62c056 100644
|
||||
index e2d6c405..a12172dc 100644
|
||||
--- a/ggml/src/ggml-backend.cpp
|
||||
+++ b/ggml/src/ggml-backend.cpp
|
||||
@@ -106,7 +106,12 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
@@ -106,7 +106,6 @@ void ggml_backend_buffer_free(ggml_backend_buffer_t buffer) {
|
||||
if (buffer->iface.free_buffer != NULL) {
|
||||
buffer->iface.free_buffer(buffer);
|
||||
}
|
||||
+
|
||||
+// TODO: this needs to be freed in cuda and hip backends because
|
||||
+// the cuda backend implementation compiled with msvc
|
||||
+#if !defined(GGML_USE_CUDA) && !defined(GGML_USE_HIP)
|
||||
delete buffer;
|
||||
+#endif
|
||||
- delete buffer;
|
||||
}
|
||||
|
||||
size_t ggml_backend_buffer_get_size(ggml_backend_buffer_t buffer) {
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index 0b06be72..0a6ae325 100644
|
||||
index 0b06be72..be29e979 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -424,6 +424,10 @@ struct ggml_backend_cuda_buffer_context {
|
||||
@@ -424,6 +424,7 @@ struct ggml_backend_cuda_buffer_context {
|
||||
static void ggml_backend_cuda_buffer_free_buffer(ggml_backend_buffer_t buffer) {
|
||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||
delete ctx;
|
||||
+
|
||||
+ // TODO: this needs to be freed in cuda and hipblas backends because
|
||||
+ // the cuda backend implementation compiled with msvc
|
||||
+ free(buffer);
|
||||
+ delete buffer;
|
||||
}
|
||||
|
||||
static bool ggml_backend_buffer_is_cuda(ggml_backend_buffer_t buffer) {
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index a85502ee..cd8ef741 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -4187,6 +4187,7 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer)
|
||||
}
|
||||
|
||||
free(ctx);
|
||||
+ free(buffer);
|
||||
}
|
||||
|
||||
static void * ggml_backend_metal_buffer_get_base(ggml_backend_buffer_t buffer) {
|
||||
|
||||
@@ -8,10 +8,10 @@ Subject: [PATCH] conditional-fattn
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index 0a6ae325..bb425ee8 100644
|
||||
index be29e979..aaa79ea4 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -2162,9 +2162,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
@@ -2159,9 +2159,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
case GGML_OP_ARGSORT:
|
||||
ggml_cuda_op_argsort(ctx, dst);
|
||||
break;
|
||||
|
||||
@@ -1,26 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Jesse Gross <jesse@ollama.com>
|
||||
Date: Mon, 30 Sep 2024 16:31:04 -0700
|
||||
Subject: [PATCH] blas
|
||||
|
||||
---
|
||||
ggml/src/ggml-blas/ggml-blas.cpp | 4 ++++
|
||||
1 file changed, 4 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/ggml-blas/ggml-blas.cpp b/ggml/src/ggml-blas/ggml-blas.cpp
|
||||
index ec158dfa..b3ac1fa4 100644
|
||||
--- a/ggml/src/ggml-blas/ggml-blas.cpp
|
||||
+++ b/ggml/src/ggml-blas/ggml-blas.cpp
|
||||
@@ -1,3 +1,5 @@
|
||||
+#ifdef GGML_USE_BLAS
|
||||
+
|
||||
#include "ggml-impl.h"
|
||||
#include "ggml-blas.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
@@ -515,3 +517,5 @@ ggml_backend_reg_t ggml_backend_blas_reg(void) {
|
||||
}
|
||||
|
||||
GGML_BACKEND_DL_IMPL(ggml_backend_blas_reg)
|
||||
+
|
||||
+#endif // GGML_USE_BLAS
|
||||
\ No newline at end of file
|
||||
@@ -126,10 +126,10 @@ index b7fefb9d..b307d554 100644
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
case GGML_OP_ARGSORT:
|
||||
diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
index bb425ee8..1e7c2a22 100644
|
||||
index aaa79ea4..9286f866 100644
|
||||
--- a/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
+++ b/ggml/src/ggml-cuda/ggml-cuda.cu
|
||||
@@ -2085,6 +2085,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
@@ -2082,6 +2082,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
case GGML_OP_PAD:
|
||||
ggml_cuda_op_pad(ctx, dst);
|
||||
break;
|
||||
@@ -139,7 +139,7 @@ index bb425ee8..1e7c2a22 100644
|
||||
case GGML_OP_ARANGE:
|
||||
ggml_cuda_op_arange(ctx, dst);
|
||||
break;
|
||||
@@ -3013,6 +3016,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
@@ -3010,6 +3013,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_OP_GROUP_NORM:
|
||||
case GGML_OP_UPSCALE:
|
||||
case GGML_OP_PAD:
|
||||
@@ -211,10 +211,10 @@ index 8fd386b0..e2ededc3 100644
|
||||
void ggml_cuda_op_pad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
+void ggml_cuda_op_unpad(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||
diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
|
||||
index a85502ee..84e027eb 100644
|
||||
index cd8ef741..318addec 100644
|
||||
--- a/ggml/src/ggml-metal/ggml-metal.m
|
||||
+++ b/ggml/src/ggml-metal/ggml-metal.m
|
||||
@@ -311,6 +311,7 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte
|
||||
@@ -311,6 +311,7 @@ enum ggml_metal_kernel_type {
|
||||
GGML_METAL_KERNEL_TYPE_UPSCALE_F32,
|
||||
GGML_METAL_KERNEL_TYPE_PAD_F32,
|
||||
GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32,
|
||||
@@ -222,7 +222,7 @@ index a85502ee..84e027eb 100644
|
||||
GGML_METAL_KERNEL_TYPE_ARANGE_F32,
|
||||
GGML_METAL_KERNEL_TYPE_TIMESTEP_EMBEDDING_F32,
|
||||
GGML_METAL_KERNEL_TYPE_ARGSORT_F32_I32_ASC,
|
||||
@@ -910,6 +911,7 @@ @implementation GGMLMetalClass
|
||||
@@ -910,6 +911,7 @@ static struct ggml_backend_metal_context * ggml_metal_init(ggml_backend_dev_t de
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_UPSCALE_F32, upscale_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_F32, pad_f32, true);
|
||||
GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_PAD_REFLECT_1D_F32, pad_reflect_1d_f32, true);
|
||||
@@ -1,51 +0,0 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: jmorganca <jmorganca@gmail.com>
|
||||
Date: Tue, 3 Dec 2024 21:30:51 -0800
|
||||
Subject: [PATCH] relative include paths
|
||||
|
||||
---
|
||||
ggml/src/ggml-cpu/ggml-cpu.c | 2 +-
|
||||
ggml/src/ggml-cpu/ggml-cpu.cpp | 3 +--
|
||||
ggml/src/ggml-quants.c | 2 +-
|
||||
3 files changed, 3 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
index b307d554..4eb39c52 100644
|
||||
--- a/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
+++ b/ggml/src/ggml-cpu/ggml-cpu.c
|
||||
@@ -10,7 +10,7 @@
|
||||
#include "ggml-quants.h"
|
||||
#include "ggml-cpu-quants.h"
|
||||
#include "ggml-threading.h"
|
||||
-#include "amx/amx.h"
|
||||
+#include "amx.h"
|
||||
#include "ggml.h"
|
||||
|
||||
#if defined(_MSC_VER) || defined(__MINGW32__)
|
||||
diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp
|
||||
index f11399cc..2a8b40ce 100644
|
||||
--- a/ggml/src/ggml-cpu/ggml-cpu.cpp
|
||||
+++ b/ggml/src/ggml-cpu/ggml-cpu.cpp
|
||||
@@ -4,8 +4,7 @@
|
||||
#include "ggml-cpu-aarch64.h"
|
||||
#include "ggml-cpu-traits.h"
|
||||
#include "ggml-impl.h"
|
||||
-#include "amx/amx.h"
|
||||
-
|
||||
+#include "amx.h"
|
||||
#include <cctype>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
|
||||
index 7918388a..e2ed84e4 100644
|
||||
--- a/ggml/src/ggml-quants.c
|
||||
+++ b/ggml/src/ggml-quants.c
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include "ggml-quants.h"
|
||||
#include "ggml-impl.h"
|
||||
-#include "ggml-cpu/ggml-cpu-impl.h"
|
||||
+#include "ggml-cpu-impl.h"
|
||||
#include "ggml-cpu.h"
|
||||
|
||||
#include <math.h>
|
||||
82
llama/patches/0014-sort-devices-by-score.patch
Normal file
82
llama/patches/0014-sort-devices-by-score.patch
Normal file
@@ -0,0 +1,82 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Tue, 14 Jan 2025 12:01:24 -0800
|
||||
Subject: [PATCH] sort devices by score
|
||||
|
||||
---
|
||||
ggml/src/ggml-backend-reg.cpp | 21 +++++++++++++--------
|
||||
1 file changed, 13 insertions(+), 8 deletions(-)
|
||||
|
||||
diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp
|
||||
index 899d16f2..ac5cda07 100644
|
||||
--- a/ggml/src/ggml-backend-reg.cpp
|
||||
+++ b/ggml/src/ggml-backend-reg.cpp
|
||||
@@ -150,7 +150,7 @@ struct ggml_backend_reg_entry {
|
||||
|
||||
struct ggml_backend_registry {
|
||||
std::vector<ggml_backend_reg_entry> backends;
|
||||
- std::vector<ggml_backend_dev_t> devices;
|
||||
+ std::vector<std::pair<ggml_backend_dev_t, int>> devices;
|
||||
|
||||
ggml_backend_registry() {
|
||||
#ifdef GGML_USE_CUDA
|
||||
@@ -195,7 +195,7 @@ struct ggml_backend_registry {
|
||||
}
|
||||
}
|
||||
|
||||
- void register_backend(ggml_backend_reg_t reg, dl_handle_ptr handle = nullptr) {
|
||||
+ void register_backend(ggml_backend_reg_t reg, int score = -1, dl_handle_ptr handle = nullptr) {
|
||||
if (!reg) {
|
||||
return;
|
||||
}
|
||||
@@ -206,15 +206,15 @@ struct ggml_backend_registry {
|
||||
#endif
|
||||
backends.push_back({ reg, std::move(handle) });
|
||||
for (size_t i = 0; i < ggml_backend_reg_dev_count(reg); i++) {
|
||||
- register_device(ggml_backend_reg_dev_get(reg, i));
|
||||
+ register_device(ggml_backend_reg_dev_get(reg, i), score);
|
||||
}
|
||||
}
|
||||
|
||||
- void register_device(ggml_backend_dev_t device) {
|
||||
+ void register_device(ggml_backend_dev_t device, int score = -1) {
|
||||
#ifndef NDEBUG
|
||||
GGML_LOG_DEBUG("%s: registered device %s (%s)\n", __func__, ggml_backend_dev_name(device), ggml_backend_dev_description(device));
|
||||
#endif
|
||||
- devices.push_back(device);
|
||||
+ devices.push_back({device, score});
|
||||
}
|
||||
|
||||
ggml_backend_reg_t load_backend(const std::wstring & path, bool silent) {
|
||||
@@ -257,7 +257,7 @@ struct ggml_backend_registry {
|
||||
|
||||
GGML_LOG_INFO("%s: loaded %s backend from %s\n", __func__, ggml_backend_reg_name(reg), utf16_to_utf8(path).c_str());
|
||||
|
||||
- register_backend(reg, std::move(handle));
|
||||
+ register_backend(reg, score_fn ? score_fn() : -1, std::move(handle));
|
||||
|
||||
return reg;
|
||||
}
|
||||
@@ -280,7 +280,7 @@ struct ggml_backend_registry {
|
||||
// remove devices
|
||||
devices.erase(
|
||||
std::remove_if(devices.begin(), devices.end(),
|
||||
- [reg](ggml_backend_dev_t dev) { return ggml_backend_dev_backend_reg(dev) == reg; }),
|
||||
+ [reg](std::pair<ggml_backend_dev_t, int> dev) { return ggml_backend_dev_backend_reg(dev.first) == reg; }),
|
||||
devices.end());
|
||||
|
||||
// remove backend
|
||||
@@ -338,7 +338,12 @@ size_t ggml_backend_dev_count() {
|
||||
|
||||
ggml_backend_dev_t ggml_backend_dev_get(size_t index) {
|
||||
GGML_ASSERT(index < ggml_backend_dev_count());
|
||||
- return get_reg().devices[index];
|
||||
+ auto devices = get_reg().devices;
|
||||
+ if (!std::is_heap(devices.begin(), devices.end())) {
|
||||
+ std::make_heap(devices.begin(), devices.end(), [](const auto & a, const auto & b) { return a.second < b.second; });
|
||||
+ }
|
||||
+
|
||||
+ return devices[index].first;
|
||||
}
|
||||
|
||||
ggml_backend_dev_t ggml_backend_dev_by_name(const char * name) {
|
||||
@@ -0,0 +1,29 @@
|
||||
From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
|
||||
From: Michael Yang <mxyng@pm.me>
|
||||
Date: Tue, 14 Jan 2025 15:59:04 -0800
|
||||
Subject: [PATCH] add phony target ggml-cpu for all cpu variants
|
||||
|
||||
---
|
||||
ggml/src/CMakeLists.txt | 2 ++
|
||||
1 file changed, 2 insertions(+)
|
||||
|
||||
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
|
||||
index 84101c32..72b488dd 100644
|
||||
--- a/ggml/src/CMakeLists.txt
|
||||
+++ b/ggml/src/CMakeLists.txt
|
||||
@@ -278,6 +278,7 @@ function(ggml_add_cpu_backend_variant tag_name)
|
||||
endforeach()
|
||||
|
||||
ggml_add_cpu_backend_variant_impl(${tag_name})
|
||||
+ add_dependencies(ggml-cpu ggml-cpu-${tag_name})
|
||||
endfunction()
|
||||
|
||||
ggml_add_backend(CPU)
|
||||
@@ -286,6 +287,7 @@ if (GGML_CPU_ALL_VARIANTS)
|
||||
if (NOT GGML_BACKEND_DL)
|
||||
message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS requires GGML_BACKEND_DL")
|
||||
endif()
|
||||
+ add_custom_target(ggml-cpu)
|
||||
ggml_add_cpu_backend_variant(sandybridge AVX)
|
||||
ggml_add_cpu_backend_variant(haswell AVX F16C AVX2 FMA)
|
||||
ggml_add_cpu_backend_variant(skylakex AVX F16C AVX2 FMA AVX512)
|
||||
Reference in New Issue
Block a user