mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-12 08:47:01 +00:00
runner.go: Better abstract vision model integration
-Update mllama to take the cross attention state as embeddings in a batch, more similar to how Llava handles it. This improves integration with the input cache. -Pass locations in a prompt for embeddings using tags similar to Llava. -Abstract interface to vision models so the main runner accesses Clip and Mllama similarly Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
@@ -2,7 +2,6 @@ package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"hash/maphash"
|
||||
"log/slog"
|
||||
"reflect"
|
||||
"time"
|
||||
@@ -20,10 +19,6 @@ type InputCache struct {
|
||||
// optimize cache eviction for multiple users
|
||||
multiUserCache bool
|
||||
|
||||
// cache of images to embeddings
|
||||
images []imageCache
|
||||
imageHash maphash.Hash
|
||||
|
||||
lc *llama.Context
|
||||
}
|
||||
|
||||
@@ -41,7 +36,6 @@ func NewInputCache(lc *llama.Context, kvSize int, numSlots int, multiUserCache b
|
||||
numCtx: kvSize / numSlots,
|
||||
slots: slots,
|
||||
multiUserCache: multiUserCache,
|
||||
images: make([]imageCache, numSlots),
|
||||
lc: lc,
|
||||
}
|
||||
}
|
||||
@@ -211,55 +205,3 @@ func (c *InputCache) ShiftCacheSlot(slot *InputCacheSlot, numKeep int, numDiscar
|
||||
}
|
||||
slot.Inputs = slot.Inputs[:len(slot.Inputs)-numDiscard]
|
||||
}
|
||||
|
||||
// Locking: Lookup and store operations on imageCache require a lock
|
||||
// to be held that serializes these with each other. Hash does not
|
||||
// require a lock nor they need to be serialized with InputCacheSlot.
|
||||
|
||||
type imageCache struct {
|
||||
key uint64
|
||||
val [][]float32
|
||||
lastUsed time.Time
|
||||
}
|
||||
|
||||
func (c *InputCache) HashImage(image []byte) uint64 {
|
||||
c.imageHash.Reset()
|
||||
_, _ = c.imageHash.Write(image)
|
||||
return c.imageHash.Sum64()
|
||||
}
|
||||
|
||||
var ErrImageNotFound = errors.New("image not found in cache")
|
||||
|
||||
func (c *InputCache) FindImage(hash uint64) ([][]float32, error) {
|
||||
for i := range c.images {
|
||||
if c.images[i].key == hash {
|
||||
slog.Debug("loading image embeddings from cache", "entry", i)
|
||||
c.images[i].lastUsed = time.Now()
|
||||
return c.images[i].val, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, ErrImageNotFound
|
||||
}
|
||||
|
||||
func (c *InputCache) AddImage(hash uint64, embed [][]float32) {
|
||||
best := time.Now()
|
||||
var bestImage int
|
||||
|
||||
for i := range c.images {
|
||||
if c.images[i].key == hash {
|
||||
bestImage = i
|
||||
break
|
||||
}
|
||||
|
||||
if c.images[i].lastUsed.Compare(best) < 0 {
|
||||
best = c.images[i].lastUsed
|
||||
bestImage = i
|
||||
}
|
||||
}
|
||||
|
||||
slog.Debug("storing image embeddings in cache", "entry", bestImage, "used", c.images[bestImage].lastUsed)
|
||||
c.images[bestImage].key = hash
|
||||
c.images[bestImage].val = embed
|
||||
c.images[bestImage].lastUsed = time.Now()
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user