Revert "Revert "ggml: Export GPU UUIDs" (#11115)" (#11117)

Reverts PR #11115. The original change was mistakingly reverted instead of #10822
2025-12-10 07:46:59 +00:00 · 2025-06-18 07:30:49 -07:00
parent ed567ef43b
commit 6baf1e31e2
6 changed files with 151 additions and 0 deletions
--- a/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
+++ b/llama/patches/0017-ggml-Export-GPU-UUIDs.patch
@@ -0,0 +1,102 @@
 From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001
 From: Jesse Gross <jesse@ollama.com>
 Date: Thu, 24 Apr 2025 14:48:51 -0700
 Subject: [PATCH] ggml: Export GPU UUIDs
 This enables matching up devices and information reported by the backend
 with tools (e.g. nvidia-smi) and system management libraries (e.g. nvml).
 ---
 ggml/include/ggml-backend.h      |  1 +
 ggml/src/ggml-cuda/ggml-cuda.cu  | 33 ++++++++++++++++++++++++++++++++
 ggml/src/ggml-metal/ggml-metal.m |  1 +
 3 files changed, 35 insertions(+)
 diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h
 index 74e46716..a880df33 100644
 --- a/ggml/include/ggml-backend.h
 +++ b/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
     struct ggml_backend_dev_props {
         const char * name;
         const char * description;
 +        const char * uuid;
         size_t memory_free;
         size_t memory_total;
         enum ggml_backend_dev_type type;
 diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu
 index cb0d8528..4c829153 100644
 --- a/ggml/src/ggml-cuda/ggml-cuda.cu
 +++ b/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
     int device;
     std::string name;
     std::string description;
 +    std::string uuid;
 };
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
     return ctx->description.c_str();
 }
 +static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
 +    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
 +    return ctx->uuid.c_str();
 +}
 +
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
     ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
     ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
     props->name        = ggml_backend_cuda_device_get_name(dev);
     props->description = ggml_backend_cuda_device_get_description(dev);
 +    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
     props->type        = ggml_backend_cuda_device_get_type(dev);
     ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                 CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                 dev_ctx->description = prop.name;
 +                #if !defined(GGML_USE_HIP)
 +                char uuid[64];
 +                snprintf(uuid, sizeof(uuid),
 +                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
 +                    (unsigned char)prop.uuid.bytes[0],
 +                    (unsigned char)prop.uuid.bytes[1],
 +                    (unsigned char)prop.uuid.bytes[2],
 +                    (unsigned char)prop.uuid.bytes[3],
 +                    (unsigned char)prop.uuid.bytes[4],
 +                    (unsigned char)prop.uuid.bytes[5],
 +                    (unsigned char)prop.uuid.bytes[6],
 +                    (unsigned char)prop.uuid.bytes[7],
 +                    (unsigned char)prop.uuid.bytes[8],
 +                    (unsigned char)prop.uuid.bytes[9],
 +                    (unsigned char)prop.uuid.bytes[10],
 +                    (unsigned char)prop.uuid.bytes[11],
 +                    (unsigned char)prop.uuid.bytes[12],
 +                    (unsigned char)prop.uuid.bytes[13],
 +                    (unsigned char)prop.uuid.bytes[14],
 +                    (unsigned char)prop.uuid.bytes[15]
 +                  );
 +                dev_ctx->uuid = uuid;
 +                #else
 +                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
 +                #endif
 +
                 ggml_backend_dev_t dev = new ggml_backend_device {
                     /* .iface   = */ ggml_backend_cuda_device_interface,
                     /* .reg     = */ &reg,
 diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m
 index 1b56f858..ee4f2dcb 100644
 --- a/ggml/src/ggml-metal/ggml-metal.m
 +++ b/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
     props->name        = ggml_backend_metal_device_get_name(dev);
     props->description = ggml_backend_metal_device_get_description(dev);
 +    props->uuid        = "0";
     props->type        = ggml_backend_metal_device_get_type(dev);
     ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
     props->caps = (struct ggml_backend_dev_caps) {
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -124,6 +124,10 @@ type DeviceMemory struct {
 	// may not be persistent across instances of the runner.
 	Name string
 	// UUID is a unique persistent identifier for the device for matching
 	// with system management libraries
 	UUID string
 	// Weights is the per-layer memory needed for the model weights.
 	Weights []Memory
@@ -152,6 +156,10 @@ func (m DeviceMemory) LogValue() slog.Value {
 		attrs = append(attrs, slog.Any("Graph", m.Graph))
 	}
 	if len(attrs) > 0 && m.UUID != "" {
 		attrs = append([]slog.Attr{slog.String("UUID", m.UUID)}, attrs...)
 	}
 	return slog.GroupValue(attrs...)
 }
--- a/ml/backend/ggml/ggml.go
+++ b/ml/backend/ggml/ggml.go
@@ -136,6 +136,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 	}
 	requiredMemory.CPU.Name = C.GoString(C.ggml_backend_dev_name(cpuDeviceBufferType.d))
 	var props C.struct_ggml_backend_dev_props
 	C.ggml_backend_dev_get_props(cpuDeviceBufferType.d, &props)
 	requiredMemory.CPU.UUID = C.GoString(props.uuid)
 	requiredMemory.CPU.Weights = make([]ml.Memory, blocks+1)
 	requiredMemory.CPU.Cache = make([]ml.Memory, blocks+1)
@@ -150,6 +153,9 @@ func New(modelPath string, params ml.BackendParams) (ml.Backend, error) {
 		})
 		btDeviceMemory[bt] = &requiredMemory.GPUs[i]
 		requiredMemory.GPUs[i].Name = C.GoString(C.ggml_backend_dev_name(d))
 		var props C.struct_ggml_backend_dev_props
 		C.ggml_backend_dev_get_props(d, &props)
 		requiredMemory.GPUs[i].UUID = C.GoString(props.uuid)
 		requiredMemory.GPUs[i].Weights = make([]ml.Memory, blocks+1)
 		requiredMemory.GPUs[i].Cache = make([]ml.Memory, blocks+1)
 	}
--- a/ml/backend/ggml/ggml/include/ggml-backend.h
+++ b/ml/backend/ggml/ggml/include/ggml-backend.h
@@ -152,6 +152,7 @@ extern "C" {
    struct ggml_backend_dev_props {
        const char * name;
        const char * description;
        const char * uuid;
        size_t memory_free;
        size_t memory_total;
        enum ggml_backend_dev_type type;
--- a/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
+++ b/ml/backend/ggml/ggml/src/ggml-cuda/ggml-cuda.cu
@@ -2884,6 +2884,7 @@ struct ggml_backend_cuda_device_context {
    int device;
    std::string name;
    std::string description;
    std::string uuid;
 };
 static const char * ggml_backend_cuda_device_get_name(ggml_backend_dev_t dev) {
@@ -2896,6 +2897,11 @@ static const char * ggml_backend_cuda_device_get_description(ggml_backend_dev_t
    return ctx->description.c_str();
 }
 static const char * ggml_backend_cuda_device_get_uuid(ggml_backend_dev_t dev) {
    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
    return ctx->uuid.c_str();
 }
 static void ggml_backend_cuda_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) {
    ggml_backend_cuda_device_context * ctx = (ggml_backend_cuda_device_context *)dev->context;
    ggml_cuda_set_device(ctx->device);
@@ -2910,6 +2916,7 @@ static enum ggml_backend_dev_type ggml_backend_cuda_device_get_type(ggml_backend
 static void ggml_backend_cuda_device_get_props(ggml_backend_dev_t dev, ggml_backend_dev_props * props) {
    props->name        = ggml_backend_cuda_device_get_name(dev);
    props->description = ggml_backend_cuda_device_get_description(dev);
    props->uuid        = ggml_backend_cuda_device_get_uuid(dev);
    props->type        = ggml_backend_cuda_device_get_type(dev);
    ggml_backend_cuda_device_get_memory(dev, &props->memory_free, &props->memory_total);
@@ -3458,6 +3465,32 @@ ggml_backend_reg_t ggml_backend_cuda_reg() {
                CUDA_CHECK(cudaGetDeviceProperties(&prop, i));
                dev_ctx->description = prop.name;
                #if !defined(GGML_USE_HIP)
                char uuid[64];
                snprintf(uuid, sizeof(uuid),
                    "GPU-%02x%02x%02x%02x-%02x%02x-%02x%02x-%02x%02x-%02x%02x%02x%02x%02x%02x",
                    (unsigned char)prop.uuid.bytes[0],
                    (unsigned char)prop.uuid.bytes[1],
                    (unsigned char)prop.uuid.bytes[2],
                    (unsigned char)prop.uuid.bytes[3],
                    (unsigned char)prop.uuid.bytes[4],
                    (unsigned char)prop.uuid.bytes[5],
                    (unsigned char)prop.uuid.bytes[6],
                    (unsigned char)prop.uuid.bytes[7],
                    (unsigned char)prop.uuid.bytes[8],
                    (unsigned char)prop.uuid.bytes[9],
                    (unsigned char)prop.uuid.bytes[10],
                    (unsigned char)prop.uuid.bytes[11],
                    (unsigned char)prop.uuid.bytes[12],
                    (unsigned char)prop.uuid.bytes[13],
                    (unsigned char)prop.uuid.bytes[14],
                    (unsigned char)prop.uuid.bytes[15]
                  );
                dev_ctx->uuid = uuid;
                #else
                dev_ctx->uuid = "GPU-" + std::string(prop.uuid.bytes, 16);
                #endif
                ggml_backend_dev_t dev = new ggml_backend_device {
                    /* .iface   = */ ggml_backend_cuda_device_interface,
                    /* .reg     = */ &reg,
--- a/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
+++ b/ml/backend/ggml/ggml/src/ggml-metal/ggml-metal.m
@@ -5703,6 +5703,7 @@ static enum ggml_backend_dev_type ggml_backend_metal_device_get_type(ggml_backen
 static void ggml_backend_metal_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) {
    props->name        = ggml_backend_metal_device_get_name(dev);
    props->description = ggml_backend_metal_device_get_description(dev);
    props->uuid        = "0";
    props->type        = ggml_backend_metal_device_get_type(dev);
    ggml_backend_metal_device_get_memory(dev, &props->memory_free, &props->memory_total);
    props->caps = (struct ggml_backend_dev_caps) {