kvcache: create cache ctx per layer

each cache layer creates and maintains its own context instead of using a large context for all layers
2025-12-10 07:46:59 +00:00 · 2025-02-25 12:57:49 -08:00
parent bfce55db3d
commit 764e199d67
4 changed files with 68 additions and 46 deletions
--- a/ml/backend.go
+++ b/ml/backend.go
@@ -99,7 +99,7 @@ type Context interface {

 	Forward(...Tensor) Context
 	Compute(...Tensor)
-	MaxTensors() int
+	MaxGraphNodes() int
 	Close()
 }