ollamarunner: Memory usage reporting

This provides granular information about the backend memory allocations
required by the runner:
 - Per backend
 - Per layer
 - Weights, cache and graph
 - Allocation status

This can be used for debugging and validating memory estimates.
This commit is contained in:
Jesse Gross
2025-04-17 11:00:25 -07:00
committed by Jesse Gross
parent 6db8a3771c
commit 73d6a82cce
5 changed files with 224 additions and 78 deletions

View File

@@ -95,10 +95,7 @@ func (m multimodalStore) getTensor(backend ml.Backend, ctx ml.Context, in ml.Ten
}
}
} else {
err := computeCtx.Reserve()
if err != nil {
return nil, err
}
computeCtx.Reserve()
}
}