imageproc mllama refactor (#7537)

Refactor mllama image processing code, and add pixtral and qwen2vl
2025-12-11 08:17:03 +00:00 · 2024-12-14 19:50:15 -08:00
parent b75ccfc5ec
commit 8c9fb8eb73
10 changed files with 828 additions and 125 deletions
--- a/server/routes.go
+++ b/server/routes.go
@@ -31,10 +31,10 @@ import (
 	"github.com/ollama/ollama/discover"
 	"github.com/ollama/ollama/envconfig"
 	"github.com/ollama/ollama/llm"
+	"github.com/ollama/ollama/model/mllama"
 	"github.com/ollama/ollama/openai"
 	"github.com/ollama/ollama/parser"
 	"github.com/ollama/ollama/runners"
-	"github.com/ollama/ollama/server/imageproc"
 	"github.com/ollama/ollama/template"
 	"github.com/ollama/ollama/types/errtypes"
 	"github.com/ollama/ollama/types/model"
@@ -205,12 +205,18 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 	images := make([]llm.ImageData, len(req.Images))
 	for i := range req.Images {
 		if isMllama {
-			data, aspectRatioID, err := imageproc.Preprocess(req.Images[i])
+			data, opts, err := mllama.Preprocess(bytes.NewReader(req.Images[i]))
 			if err != nil {
 				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
 				return
 			}

+			ar, ok := opts["aspectRatioIndex"].(int)
+			if !ok {
+				c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": "error processing image"})
+				return
+			}
+
 			buf := new(bytes.Buffer)
 			err = binary.Write(buf, binary.LittleEndian, data)
 			if err != nil {
@@ -218,7 +224,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
 				return
 			}

-			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: aspectRatioID}
+			images[i] = llm.ImageData{ID: i, Data: buf.Bytes(), AspectRatioID: ar}
 		} else {
 			images[i] = llm.ImageData{ID: i, Data: req.Images[i]}
 		}