mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
kvcache: create cache ctx per layer
each cache layer creates and maintains its own context instead of using a large context for all layers
This commit is contained in:
@@ -99,7 +99,7 @@ type Context interface {
|
||||
|
||||
Forward(...Tensor) Context
|
||||
Compute(...Tensor)
|
||||
MaxTensors() int
|
||||
MaxGraphNodes() int
|
||||
Close()
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user