imageproc mllama refactor (#7537)

Refactor mllama image processing code, and add pixtral and qwen2vl
This commit is contained in:
Patrick Devine
2024-12-14 19:50:15 -08:00
committed by GitHub
parent b75ccfc5ec
commit 8c9fb8eb73
10 changed files with 828 additions and 125 deletions

View File

@@ -11,7 +11,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/llm"
"github.com/ollama/ollama/server/imageproc"
"github.com/ollama/ollama/model/mllama"
"github.com/ollama/ollama/template"
)
@@ -92,7 +92,7 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
var imgData llm.ImageData
if isMllama {
data, aspectRatioID, err := imageproc.Preprocess(i)
data, opts, err := mllama.Preprocess(bytes.NewReader(i))
if err != nil {
return "", nil, err
}
@@ -103,10 +103,15 @@ func chatPrompt(ctx context.Context, m *Model, tokenize tokenizeFunc, opts *api.
return "", nil, err
}
ar, ok := opts["aspectRatioIndex"].(int)
if !ok {
return "", nil, fmt.Errorf("missing aspect ratio for image")
}
imgData = llm.ImageData{
ID: len(images),
Data: buf.Bytes(),
AspectRatioID: aspectRatioID,
AspectRatioID: ar,
}
imgPrompt = "<|image|>"
} else {