From bd68d3ae50c67ba46ee94a584fa6d0386e4b8522 Mon Sep 17 00:00:00 2001 From: Bruce MacDonald Date: Wed, 14 May 2025 16:42:30 -0700 Subject: [PATCH] ggml: update qwen25vl vision size estimate (#10711) --- fs/ggml/ggml.go | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/fs/ggml/ggml.go b/fs/ggml/ggml.go index 514b6011..8c0a2ae5 100644 --- a/fs/ggml/ggml.go +++ b/fs/ggml/ggml.go @@ -6,7 +6,6 @@ import ( "fmt" "io" "log/slog" - "math" "slices" "strings" @@ -653,24 +652,15 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) { numPatches*numPatches*headCount) case "qwen25vl": maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280)) - mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2)) - temporalPatchSize := uint64(2) - // Calculate max possible patches based on max_pixels - maxHeight := uint64(math.Sqrt(float64(maxPixels))) - maxWidth := maxPixels / maxHeight - maxGridHeight := maxHeight / patchSize - maxGridWidth := maxWidth / patchSize - // Account for merged patches (2x2 grid) - numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize) + numPatches := maxPixels / (patchSize * patchSize) - // Calculate graph size based on typical operations in ProcessImage and createPatches graphSize = 4 * (maxPixels*numChannels + // Original image storage // Normalized pixels maxPixels*numChannels + - // Patches storage (numPatches * channels * temporalPatchSize * patchSize^2) - numPatches*numChannels*temporalPatchSize*patchSize*patchSize + - // Self-attention calculations (similar to other architectures) + // Patches storage (numPatches * channels * patchSize^2) + numPatches*numChannels*patchSize*patchSize + + // Self-attention calculations numPatches*numPatches*headCount + // Additional buffer for processing embeddingLength*numPatches)