kvcache: create cache ctx per layer

each cache layer creates and maintains its own context instead of using
a large context for all layers
This commit is contained in:
Michael Yang
2025-02-25 12:57:49 -08:00
parent bfce55db3d
commit 764e199d67
4 changed files with 68 additions and 46 deletions

View File

@@ -99,7 +99,7 @@ type Context interface {
Forward(...Tensor) Context
Compute(...Tensor)
MaxTensors() int
MaxGraphNodes() int
Close()
}