ollamarunner: Memory usage reporting

This provides granular information about the backend memory allocations required by the runner: - Per backend - Per layer - Weights, cache and graph - Allocation status This can be used for debugging and validating memory estimates.
2025-12-11 16:26:59 +00:00 · 2025-04-17 11:00:25 -07:00
parent 6db8a3771c
commit 73d6a82cce
5 changed files with 224 additions and 78 deletions
--- a/runner/ollamarunner/multimodal.go
+++ b/runner/ollamarunner/multimodal.go
@@ -95,10 +95,7 @@ func (m multimodalStore) getTensor(backend ml.Backend, ctx ml.Context, in ml.Ten
 				}
 			}
 		} else {
-			err := computeCtx.Reserve()
-			if err != nil {
-				return nil, err
-			}
+			computeCtx.Reserve()
 		}
 	}