ollamarunner: Use a separate context per multimodal input

Currently there is a single context per sequence, shared all by
all multimodal inputs. Since we build a vision encoder graph per
image, with a large number of inputs we can eventually hit the
maximum number of graph nodes per context.

This changes to use a separate context for each image, ensuring
that available resource limits are consistent.
This commit is contained in:
Jesse Gross
2025-03-13 20:32:50 -07:00
committed by Jesse Gross
parent 9679f40146
commit 282bfaaa95
4 changed files with 33 additions and 19 deletions

View File

@@ -111,7 +111,7 @@ func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, er
return visionOutputs, nil
}
func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Input, error) {
func (m *Model) PostTokenize(inputs []input.Input) ([]input.Input, error) {
var result []input.Input
for _, inp := range inputs {