Allow models to force a new batch

This is useful for a few things: - Work around bugs, such as having 2 images in one batch - Keep the image in a single batch for fully connected attention - Improve performance by not evaluating embeddings multiple times
2025-12-10 15:57:04 +00:00 · 2025-03-10 20:03:29 -07:00
parent a8e83a7654
commit 06007c0a18
4 changed files with 10 additions and 14 deletions
--- a/model/models/gemma3/model.go
+++ b/model/models/gemma3/model.go
@@ -112,8 +112,8 @@ func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Inpu
 			result = append(result, inp)
 		} else {
 			imageInputs := []input.Input{
-				{Token: 108},    // "\n\n"
-				{Token: 255999}, // "<start_of_image>""
+				{Token: 108},                      // "\n\n"
+				{Token: 255999, BatchBreak: true}, // "<start_of_image>""
 			}
 			result = append(result, imageInputs...)