mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
imageproc mllama refactor (#7537)
Refactor mllama image processing code, and add pixtral and qwen2vl
This commit is contained in:
111
model/imageproc/images.go
Normal file
111
model/imageproc/images.go
Normal file
@@ -0,0 +1,111 @@
|
||||
package imageproc
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
|
||||
"golang.org/x/image/draw"
|
||||
)
|
||||
|
||||
var (
|
||||
ImageNetDefaultMean = [3]float32{0.485, 0.456, 0.406}
|
||||
ImageNetDefaultSTD = [3]float32{0.229, 0.224, 0.225}
|
||||
ImageNetStandardMean = [3]float32{0.5, 0.5, 0.5}
|
||||
ImageNetStandardSTD = [3]float32{0.5, 0.5, 0.5}
|
||||
ClipDefaultMean = [3]float32{0.48145466, 0.4578275, 0.40821073}
|
||||
ClipDefaultSTD = [3]float32{0.26862954, 0.26130258, 0.27577711}
|
||||
)
|
||||
|
||||
const (
|
||||
ResizeBilinear = iota
|
||||
ResizeNearestNeighbor
|
||||
ResizeApproxBilinear
|
||||
ResizeCatmullrom
|
||||
)
|
||||
|
||||
// Composite returns an image with the alpha channel removed by drawing over a white background.
|
||||
func Composite(img image.Image) image.Image {
|
||||
dst := image.NewRGBA(img.Bounds())
|
||||
|
||||
white := color.RGBA{255, 255, 255, 255}
|
||||
draw.Draw(dst, dst.Bounds(), &image.Uniform{white}, image.Point{}, draw.Src)
|
||||
draw.Draw(dst, dst.Bounds(), img, img.Bounds().Min, draw.Over)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// Resize returns an image which has been scaled to a new size.
|
||||
func Resize(img image.Image, newSize image.Point, method int) image.Image {
|
||||
dst := image.NewRGBA(image.Rect(0, 0, newSize.X, newSize.Y))
|
||||
|
||||
kernels := map[int]draw.Interpolator{
|
||||
ResizeBilinear: draw.BiLinear,
|
||||
ResizeNearestNeighbor: draw.NearestNeighbor,
|
||||
ResizeApproxBilinear: draw.ApproxBiLinear,
|
||||
ResizeCatmullrom: draw.CatmullRom,
|
||||
}
|
||||
|
||||
kernel, ok := kernels[method]
|
||||
if !ok {
|
||||
panic("no resizing method found")
|
||||
}
|
||||
|
||||
kernel.Scale(dst, dst.Rect, img, img.Bounds(), draw.Over, nil)
|
||||
|
||||
return dst
|
||||
}
|
||||
|
||||
// Normalize returns a slice of float32 containing each of the r, g, b values for an image normalized around a value.
|
||||
func Normalize(img image.Image, mean, std [3]float32, rescale bool, channelFirst bool) []float32 {
|
||||
var pixelVals []float32
|
||||
|
||||
bounds := img.Bounds()
|
||||
if channelFirst {
|
||||
var rVals, gVals, bVals []float32
|
||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||
c := img.At(x, y)
|
||||
r, g, b, _ := c.RGBA()
|
||||
var rVal, gVal, bVal float32
|
||||
if rescale {
|
||||
rVal = float32(r>>8) / 255.0
|
||||
gVal = float32(g>>8) / 255.0
|
||||
bVal = float32(b>>8) / 255.0
|
||||
}
|
||||
|
||||
rVal = (rVal - mean[0]) / std[0]
|
||||
gVal = (gVal - mean[1]) / std[1]
|
||||
bVal = (bVal - mean[2]) / std[2]
|
||||
|
||||
rVals = append(rVals, rVal)
|
||||
gVals = append(gVals, gVal)
|
||||
bVals = append(bVals, bVal)
|
||||
}
|
||||
}
|
||||
|
||||
pixelVals = append(pixelVals, rVals...)
|
||||
pixelVals = append(pixelVals, gVals...)
|
||||
pixelVals = append(pixelVals, bVals...)
|
||||
} else {
|
||||
for y := bounds.Min.Y; y < bounds.Max.Y; y++ {
|
||||
for x := bounds.Min.X; x < bounds.Max.X; x++ {
|
||||
c := img.At(x, y)
|
||||
r, g, b, _ := c.RGBA()
|
||||
var rVal, gVal, bVal float32
|
||||
if rescale {
|
||||
rVal = float32(r>>8) / 255.0
|
||||
gVal = float32(g>>8) / 255.0
|
||||
bVal = float32(b>>8) / 255.0
|
||||
}
|
||||
|
||||
rVal = (rVal - mean[0]) / std[0]
|
||||
gVal = (gVal - mean[1]) / std[1]
|
||||
bVal = (bVal - mean[2]) / std[2]
|
||||
|
||||
pixelVals = append(pixelVals, rVal, gVal, bVal)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pixelVals
|
||||
}
|
||||
177
model/imageproc/images_test.go
Normal file
177
model/imageproc/images_test.go
Normal file
@@ -0,0 +1,177 @@
|
||||
package imageproc
|
||||
|
||||
import (
|
||||
"image"
|
||||
"image/color"
|
||||
"image/draw"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func createImage(width, height int, fillCol color.RGBA) image.Image {
|
||||
img := image.NewRGBA(image.Rect(0, 0, width, height))
|
||||
draw.Draw(img, img.Bounds(), &image.Uniform{fillCol}, image.Point{}, draw.Src)
|
||||
return img
|
||||
}
|
||||
|
||||
func TestComposite(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
img image.Image
|
||||
expectedRGBA color.RGBA
|
||||
}{
|
||||
{
|
||||
name: "Transparent image",
|
||||
img: createImage(5, 5, color.RGBA{0, 0, 0, 0}),
|
||||
expectedRGBA: color.RGBA{255, 255, 255, 255},
|
||||
},
|
||||
{
|
||||
name: "Solid red image",
|
||||
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
|
||||
expectedRGBA: color.RGBA{255, 0, 0, 255},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
resultImg := Composite(tt.img)
|
||||
|
||||
// Check the pixel values in the resulting image
|
||||
for x := range resultImg.Bounds().Dx() {
|
||||
for y := range resultImg.Bounds().Dy() {
|
||||
r, g, b, a := resultImg.At(x, y).RGBA()
|
||||
expectedR, expectedG, expectedB, expectedA := tt.expectedRGBA.RGBA()
|
||||
|
||||
if r != expectedR || g != expectedG || b != expectedB || a != expectedA {
|
||||
t.Errorf("Pixel mismatch at (%d, %d): got (%d, %d, %d, %d), want (%d, %d, %d, %d)",
|
||||
x, y, r, g, b, a, expectedR, expectedG, expectedB, expectedA)
|
||||
}
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResize(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
img image.Image
|
||||
newSize image.Point
|
||||
method int
|
||||
expected image.Point
|
||||
}{
|
||||
{
|
||||
name: "Resize with bilinear interpolation",
|
||||
img: createImage(5, 5, color.RGBA{255, 0, 0, 255}),
|
||||
newSize: image.Point{10, 10},
|
||||
method: ResizeBilinear,
|
||||
expected: image.Point{10, 10},
|
||||
},
|
||||
{
|
||||
name: "Resize with nearest neighbor",
|
||||
img: createImage(10, 10, color.RGBA{0, 255, 0, 255}),
|
||||
newSize: image.Point{5, 5},
|
||||
method: ResizeNearestNeighbor,
|
||||
expected: image.Point{5, 5},
|
||||
},
|
||||
{
|
||||
name: "Resize with catmullrom",
|
||||
img: createImage(1024, 1024, color.RGBA{0, 0, 255, 255}),
|
||||
newSize: image.Point{10, 10},
|
||||
method: ResizeCatmullrom,
|
||||
expected: image.Point{10, 10},
|
||||
},
|
||||
{
|
||||
name: "Resize with approx bilinear",
|
||||
img: createImage(1024, 768, color.RGBA{100, 100, 100, 255}),
|
||||
newSize: image.Point{4, 3},
|
||||
method: ResizeApproxBilinear,
|
||||
expected: image.Point{4, 3},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
resizedImg := Resize(tt.img, tt.newSize, tt.method)
|
||||
|
||||
if resizedImg.Bounds().Dx() != tt.expected.X || resizedImg.Bounds().Dy() != tt.expected.Y {
|
||||
t.Errorf("Unexpected size for resized image: got (%d, %d), want (%d, %d)",
|
||||
resizedImg.Bounds().Dx(), resizedImg.Bounds().Dy(), tt.expected.X, tt.expected.Y)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestResizeInvalidMethod(t *testing.T) {
|
||||
defer func() {
|
||||
if r := recover(); r == nil {
|
||||
t.Errorf("Expected panic for invalid resizing method, but did not panic")
|
||||
}
|
||||
}()
|
||||
|
||||
img := createImage(10, 10, color.RGBA{0, 0, 0, 255})
|
||||
Resize(img, image.Point{5, 5}, -1)
|
||||
}
|
||||
|
||||
func TestNormalize(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
img image.Image
|
||||
mean [3]float32
|
||||
std [3]float32
|
||||
rescale bool
|
||||
channelFirst bool
|
||||
expected []float32
|
||||
}{
|
||||
{
|
||||
name: "Rescale with channel first",
|
||||
img: createImage(2, 2, color.RGBA{128, 128, 128, 255}),
|
||||
mean: ImageNetStandardMean,
|
||||
std: ImageNetStandardSTD,
|
||||
rescale: true,
|
||||
channelFirst: true,
|
||||
expected: []float32{
|
||||
0.003921628, 0.003921628, 0.003921628, 0.003921628, // R values
|
||||
0.003921628, 0.003921628, 0.003921628, 0.003921628, // G values
|
||||
0.003921628, 0.003921628, 0.003921628, 0.003921628, // B values
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "Rescale without channel first",
|
||||
img: createImage(2, 2, color.RGBA{255, 0, 0, 255}),
|
||||
mean: [3]float32{0.0, 0.0, 0.0},
|
||||
std: [3]float32{1.0, 1.0, 1.0},
|
||||
rescale: true,
|
||||
channelFirst: false,
|
||||
expected: []float32{
|
||||
1.0, 0.0, 0.0,
|
||||
1.0, 0.0, 0.0,
|
||||
1.0, 0.0, 0.0,
|
||||
1.0, 0.0, 0.0,
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "No rescale with mean/std adjustment",
|
||||
img: createImage(2, 2, color.RGBA{100, 150, 200, 255}),
|
||||
mean: ClipDefaultMean,
|
||||
std: ClipDefaultSTD,
|
||||
rescale: false,
|
||||
channelFirst: false,
|
||||
expected: []float32{
|
||||
-1.7922626, -1.7520971, -1.4802198,
|
||||
-1.7922626, -1.7520971, -1.4802198,
|
||||
-1.7922626, -1.7520971, -1.4802198,
|
||||
-1.7922626, -1.7520971, -1.4802198,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := Normalize(tt.img, tt.mean, tt.std, tt.rescale, tt.channelFirst)
|
||||
|
||||
if !reflect.DeepEqual(result, tt.expected) {
|
||||
t.Errorf("Test %s failed: got %v, want %v", tt.name, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user