fix(mllama): sync backend between batches

2025-12-11 16:26:59 +00:00 · 2024-11-13 14:12:30 -08:00
parent c2e8cbaa14
commit 5b3393b6a2
2 changed files with 11 additions and 0 deletions
--- a/llama/runner/runner.go
+++ b/llama/runner/runner.go
@@ -427,6 +427,13 @@ func (s *Server) processBatch(tokenBatch *llama.Batch, embedBatch *llama.Batch)
 		return
 	}

+	if crossAttention {
+		// synchronize state to ensure the cross attention batch is complete.
+		// needed specifically for multi-GPU systems otherwise an inflight
+		// task may be incorrectly invalidated causing a crash
+		s.lc.Synchronize()
+	}
+
 	for i, seq := range s.seqs {
 		if seq == nil {
 			continue