imageproc mllama refactor (#7537)

Refactor mllama image processing code, and add pixtral and qwen2vl
2025-12-11 16:26:59 +00:00 · 2024-12-14 19:50:15 -08:00
parent b75ccfc5ec
commit 8c9fb8eb73
10 changed files with 828 additions and 125 deletions
--- a/server/prompt.go
+++ b/server/prompt.go
@@ -11,7 +11,7 @@ import (

 	"github.com/ollama/ollama/api"
 	"github.com/ollama/ollama/llm"
-	"github.com/ollama/ollama/server/imageproc"
+	"github.com/ollama/ollama/model/mllama"
 	"github.com/ollama/ollama/template"
 )

@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 			var imgData llm.ImageData

 			if isMllama {
-				data, aspectRatioID, err := imageproc.Preprocess(i)
+				data, opts, err := mllama.Preprocess(bytes.NewReader(i))
 				if err != nil {
 					return "", nil, err
 				}
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
 					return "", nil, err
 				}

+				ar, ok := opts["aspectRatioIndex"].(int)
+				if !ok {
+					return "", nil, fmt.Errorf("missing aspect ratio for image")
+				}
+
 				imgData = llm.ImageData{
 					ID:            len(images),
 					Data:          buf.Bytes(),
-					AspectRatioID: aspectRatioID,
+					AspectRatioID: ar,
 				}
 				imgPrompt = "<|image|>"
 			} else {