update memory calcualtions

count each layer independently when deciding gpu offloading
This commit is contained in:
Michael Yang
2024-03-18 10:45:22 +01:00
parent d338d70492
commit 91b3e4d282
7 changed files with 125 additions and 89 deletions

View File

@@ -5,6 +5,7 @@ import (
"errors"
"fmt"
"io"
"strings"
)
type GGML struct {
@@ -12,6 +13,16 @@ type GGML struct {
model
}
func (ggml *GGML) LayerSize(prefix string) (n int64) {
for _, t := range ggml.Tensors() {
if strings.HasPrefix(t.Name, prefix) {
n += int64(t.size())
}
}
return
}
const (
fileTypeF32 uint32 = iota
fileTypeF16