image processing for llama3.2 (#6963)

Co-authored-by: jmorganca <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me> Co-authored-by: Jesse Gross <jesse@ollama.com>
2025-12-11 08:17:03 +00:00 · 2024-10-18 16:12:35 -07:00
parent bf4018b9ec
commit c7cb0f0602
35 changed files with 3851 additions and 203 deletions
--- a/llm/memory.go
+++ b/llm/memory.go
@@ -3,6 +3,7 @@ package llm
 import (
 	"fmt"
 	"log/slog"
+	"os"
 	"strconv"
 	"strings"

@@ -63,6 +64,8 @@ type MemoryEstimate struct {
 	memoryLayerOutput   uint64
 	graphFullOffload    uint64
 	graphPartialOffload uint64
+
+	projectorWeights, projectorGraph uint64
 }

 // Given a model and one or more GPU targets, predict how many layers and bytes we can load, and the total size
@@ -78,7 +81,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 	var graphOffload uint64

 	// Projectors loaded into GPU0 only
-	var projectorSize uint64
+	var projectorWeights uint64
+	var projectorGraph uint64

 	// Conditional output size on GPU 0
 	var memoryLayerOutput uint64
@@ -103,7 +107,9 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 	slog.Debug("evaluating", "library", gpus[0].Library, "gpu_count", len(gpus), "available", availableList)

 	for _, projector := range projectors {
-		projectorSize += projectorMemoryRequirements(projector)
+		weight, graph := projectorMemoryRequirements(projector)
+		projectorWeights += weight
+		projectorGraph += graph

 		// multimodal models require at least 2048 context
 		opts.NumCtx = max(opts.NumCtx, 2048)
@@ -149,7 +155,7 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 	}

 	// Output layer handled at the end if we have space
-	gpuZeroOverhead := projectorSize
+	gpuZeroOverhead := projectorWeights + projectorGraph

 	// Reduce set of GPUs to only those that have sufficient space to fit overhead and at least one layer
 	var layerCount int
@@ -303,6 +309,8 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,
 		memoryLayerOutput:   memoryLayerOutput,
 		graphFullOffload:    graphFullOffload,
 		graphPartialOffload: graphPartialOffload,
+		projectorWeights:    projectorWeights,
+		projectorGraph:      projectorGraph,
 	}

 	if gpus[0].Library == "cpu" {
@@ -323,7 +331,19 @@ func EstimateGPULayers(gpus []discover.GpuInfo, ggml *GGML, projectors []string,

 func (m MemoryEstimate) log() {
 	overhead := envconfig.GpuOverhead()
-	slog.Info(
+
+	log := slog.With()
+	if m.projectorWeights > 0 {
+		log = log.With(
+			slog.Group(
+				"projector",
+				"weights", format.HumanBytes2(m.projectorWeights),
+				"graph", format.HumanBytes2(m.projectorGraph),
+			),
+		)
+	}
+
+	log.Info(
 		"offload to "+m.inferenceLibrary,
 		slog.Group(
 			"layers",
@@ -371,3 +391,52 @@ func (m MemoryEstimate) log() {
 		),
 	)
 }
+
+func projectorMemoryRequirements(filename string) (weights, graphSize uint64) {
+	file, err := os.Open(filename)
+	if err != nil {
+		return 0, 0
+	}
+	defer file.Close()
+
+	ggml, _, err := DecodeGGML(file, 0)
+	if err != nil {
+		return 0, 0
+	}
+
+	for _, layer := range ggml.Tensors().Layers() {
+		weights += layer.size()
+	}
+
+	switch arch := ggml.KV().Architecture(); arch {
+	case "mllama":
+		kv := func(n string) uint64 {
+			if v, ok := ggml.KV()[arch+".vision."+n].(uint32); ok {
+				return uint64(v)
+			}
+
+			return 0
+		}
+
+		imageSize := kv("image_size")
+
+		maxNumTiles := kv("max_num_tiles")
+		embeddingLength := kv("embedding_length")
+		headCount := kv("attention.head_count")
+
+		numPatches := (imageSize / kv("patch_size")) * (imageSize / kv("patch_size"))
+		if _, ok := ggml.Tensors().Layers()["v"]["class_embd"]; ok {
+			numPatches++
+		}
+
+		numPaddedPatches := numPatches + 8 - (numPatches%8)%8
+
+		graphSize = 4 * (8 +
+			imageSize*imageSize*kv("num_channels")*maxNumTiles +
+			embeddingLength*numPatches*maxNumTiles +
+			9*embeddingLength*numPaddedPatches*maxNumTiles +
+			numPaddedPatches*maxNumTiles*numPaddedPatches*maxNumTiles*headCount)
+	}
+
+	return weights, graphSize
+}