chunked attention

2025-12-09 23:37:06 +00:00 · 2025-04-10 18:00:43 -07:00
parent 470af8ab89
commit 8bf11b84c1
4 changed files with 84 additions and 4 deletions
--- a/model/models/llama4/model.go
+++ b/model/models/llama4/model.go
@@ -52,8 +52,7 @@ func New(c fs.Config) (model.Model, error) {
 	}

 	m.Cache = kvcache.NewWrapperCache(
-		// TODO: pretend this is chunked attention for now
-		kvcache.NewSWACache(8192, m.Shift),
+		kvcache.NewChunkedAttentionCache(int32(c.Uint("attention.chunk_size")), m.Shift),
 		kvcache.NewCausalCache(m.Shift),
 	)