chunked attention

This commit is contained in:
Michael Yang
2025-04-10 18:00:43 -07:00
committed by Michael Yang
parent 470af8ab89
commit 8bf11b84c1
4 changed files with 84 additions and 4 deletions

View File

@@ -52,8 +52,7 @@ func New(c fs.Config) (model.Model, error) {
}
m.Cache = kvcache.NewWrapperCache(
// TODO: pretend this is chunked attention for now
kvcache.NewSWACache(8192, m.Shift),
kvcache.NewChunkedAttentionCache(int32(c.Uint("attention.chunk_size")), m.Shift),
kvcache.NewCausalCache(m.Shift),
)