mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-09 23:37:06 +00:00
chunked attention
This commit is contained in:
committed by
Michael Yang
parent
470af8ab89
commit
8bf11b84c1
@@ -52,8 +52,7 @@ func New(c fs.Config) (model.Model, error) {
|
||||
}
|
||||
|
||||
m.Cache = kvcache.NewWrapperCache(
|
||||
// TODO: pretend this is chunked attention for now
|
||||
kvcache.NewSWACache(8192, m.Shift),
|
||||
kvcache.NewChunkedAttentionCache(int32(c.Uint("attention.chunk_size")), m.Shift),
|
||||
kvcache.NewCausalCache(m.Shift),
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user