mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
ollamarunner: Use a separate context per multimodal input
Currently there is a single context per sequence, shared all by all multimodal inputs. Since we build a vision encoder graph per image, with a large number of inputs we can eventually hit the maximum number of graph nodes per context. This changes to use a separate context for each image, ensuring that available resource limits are consistent.
This commit is contained in:
@@ -34,10 +34,14 @@ import (
|
||||
_ "github.com/ollama/ollama/model/models"
|
||||
)
|
||||
|
||||
type contextList struct {
|
||||
list []ml.Context
|
||||
}
|
||||
|
||||
type Sequence struct {
|
||||
// ctx for allocating tensors that last the lifetime of the sequence, such as
|
||||
// ctxs are used for allocating tensors that last the lifetime of the sequence, such as
|
||||
// multimodal embeddings
|
||||
ctx ml.Context
|
||||
ctxs *contextList
|
||||
|
||||
// batch index
|
||||
iBatch int
|
||||
@@ -99,9 +103,8 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
|
||||
s.ready.Wait()
|
||||
|
||||
startTime := time.Now()
|
||||
ctx := s.model.Backend().NewContext()
|
||||
|
||||
inputs, err := s.inputs(ctx, prompt, images)
|
||||
inputs, ctxs, err := s.inputs(prompt, images)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to process inputs: %w", err)
|
||||
} else if len(inputs) == 0 {
|
||||
@@ -127,7 +130,7 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
|
||||
// TODO(jessegross): Ingest cached history for grammar
|
||||
|
||||
return &Sequence{
|
||||
ctx: ctx,
|
||||
ctxs: ctxs,
|
||||
inputs: inputs,
|
||||
numPromptInputs: len(inputs),
|
||||
startProcessingTime: startTime,
|
||||
@@ -146,7 +149,7 @@ func (s *Server) NewSequence(prompt string, images []llm.ImageData, params NewSe
|
||||
// inputs processes the prompt and images into a list of inputs
|
||||
// by splitting the prompt on [img-<n>] tags, tokenizing text and
|
||||
// decoding images
|
||||
func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) ([]input.Input, error) {
|
||||
func (s *Server) inputs(prompt string, images []llm.ImageData) ([]input.Input, *contextList, error) {
|
||||
var inputs []input.Input
|
||||
var parts []string
|
||||
var matches [][]string
|
||||
@@ -161,12 +164,19 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
|
||||
parts = []string{prompt}
|
||||
}
|
||||
|
||||
var contexts contextList
|
||||
runtime.AddCleanup(&contexts, func(ctxs []ml.Context) {
|
||||
for _, ctx := range ctxs {
|
||||
ctx.Close()
|
||||
}
|
||||
}, contexts.list)
|
||||
|
||||
postTokenize := false
|
||||
for i, part := range parts {
|
||||
// text - tokenize
|
||||
tokens, err := s.model.(model.TextProcessor).Encode(part, i == 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
for _, t := range tokens {
|
||||
@@ -186,12 +196,14 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
|
||||
}
|
||||
|
||||
if imageIndex < 0 {
|
||||
return nil, fmt.Errorf("invalid image index: %d", n)
|
||||
return nil, nil, fmt.Errorf("invalid image index: %d", n)
|
||||
}
|
||||
|
||||
ctx := s.model.Backend().NewContext()
|
||||
contexts.list = append(contexts.list, ctx)
|
||||
imageEmbeddings, err := multimodalProcessor.EncodeMultimodal(ctx, images[imageIndex].Data)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
|
||||
s.multimodalHash.Reset()
|
||||
@@ -205,13 +217,13 @@ func (s *Server) inputs(ctx ml.Context, prompt string, images []llm.ImageData) (
|
||||
|
||||
if visionModel && postTokenize {
|
||||
var err error
|
||||
inputs, err = multimodalProcessor.PostTokenize(ctx, inputs)
|
||||
inputs, err = multimodalProcessor.PostTokenize(inputs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, nil, err
|
||||
}
|
||||
}
|
||||
|
||||
return inputs, nil
|
||||
return inputs, &contexts, nil
|
||||
}
|
||||
|
||||
type Server struct {
|
||||
@@ -306,7 +318,6 @@ func (s *Server) removeSequence(seqIndex int, reason string) {
|
||||
close(seq.responses)
|
||||
close(seq.embedding)
|
||||
seq.cache.InUse = false
|
||||
seq.ctx.Close()
|
||||
s.seqs[seqIndex] = nil
|
||||
s.seqsSem.Release(1)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user