Add support for new models and fix GitHub issues

- Add Gemma3n model support with text generation capabilities - Add new CUDA mean operations for improved performance - Add macOS documentation and performance tests - Update LLAMA patches for ROCm/CUDA compatibility - Fix various model conversion and processing issues - Update CI workflows and build configurations - Add library model tests and Shakespeare test data 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
2025-12-12 16:57:04 +00:00 · 2025-07-20 00:12:36 +08:00
parent 1fa71c2670
commit cbcbc9ae07
81 changed files with 132316 additions and 747 deletions
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -7,31 +7,31 @@ This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
 ggml/include/ggml-backend.h      |  1 +
- ggml/src/ggml-cuda/ggml-cuda.cu  | 33 ++++++++++++++++++++++++++++++++
+ ggml/src/ggml-cuda/ggml-cuda.cu  | 39 ++++++++++++++++++++++++++++++++
 ggml/src/ggml-metal/ggml-metal.m |  1 +
- 3 files changed, 35 insertions(+)
+ 3 files changed, 41 insertions(+)

 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
-index 74e46716..a880df33 100644
+index 74e46716..48839339 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
     struct ggml_backend_dev_props {
         const char * name;
         const char * description;
-+        const char * uuid;
+        const char * id;
         size_t memory_free;
         size_t memory_total;
         enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
-index cb0d8528..4c829153 100644
+index cb0d8528..d6960174 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
-+    std::string uuid;
+    std::string id;
 };
 
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -39,9 +39,9 @@ index cb0d8528..4c829153 100644
     return ctx->description.c_str();
 }
 
-+static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
+static const char * ggml_backend_cuda_device_get_id(ggml_backend_dev_t dev) {
 +    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
-+    return ctx->uuid.c_str();
+    return ctx->id.c_str();
 +}
 +
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
@@ -51,17 +51,17 @@ index cb0d8528..4c829153 100644
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
-+    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
+    props->id          = ggml_backend_cuda_device_get_id(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
 
-@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
+@@ -3458,6 +3465,38 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
 
 +                #if !defined(GGML_USE_HIP)
-+                char uuid[64];
-+                snprintf(uuid, sizeof(uuid),
+                char id[64];
+                snprintf(id, sizeof(id),
 +                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 +                    (unsigned char)prop.uuid.bytes[0],
 +                    (unsigned char)prop.uuid.bytes[1],
@@ -80,23 +80,29 @@ index cb0d8528..4c829153 100644
 +                    (unsigned char)prop.uuid.bytes[14],
 +                    (unsigned char)prop.uuid.bytes[15]
 +                  );
-+                dev_ctx->uuid = uuid;
+                dev_ctx->id = id;
 +                #else
-+                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #ifdef _WIN32
+                char id[16];
+                snprintf(id, sizeof(id), "%d", i);
+                dev_ctx->id = id;
+                #else
+                dev_ctx->id = "GPU-" + std::string(prop.uuid.bytes, 16);
+                #endif
 +                #endif
 +
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,
                     /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
-index 1b56f858..ee4f2dcb 100644
+index 1b56f858..a9eeebc6 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
-+    props->uuid        = "0";
+    props->id          = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
     props->caps = (struct ggml_backend_dev_caps) {