Allow models to force a new batch

This is useful for a few things:
 - Work around bugs, such as having 2 images in one batch
 - Keep the image in a single batch for fully connected attention
 - Improve performance by not evaluating embeddings multiple times
This commit is contained in:
Jesse Gross
2025-03-10 20:03:29 -07:00
committed by Michael Yang
parent a8e83a7654
commit 06007c0a18
4 changed files with 10 additions and 14 deletions

View File

@@ -112,8 +112,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu
result = append(result, inp)
} else {
imageInputs := []input.Input{
{Token: 108}, // "\n\n"
{Token: 255999}, // "<start_of_image>""
{Token: 108}, // "\n\n"
{Token: 255999, BatchBreak: true}, // "<start_of_image>""
}
result = append(result, imageInputs...)