update memory calcualtions

count each layer independently when deciding gpu offloading
2025-12-11 00:07:07 +00:00 · 2024-03-18 10:45:22 +01:00
parent d338d70492
commit 91b3e4d282
7 changed files with 125 additions and 89 deletions
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -5,6 +5,7 @@ import (
 	"errors"
 	"fmt"
 	"io"
+	"strings"
 )

 type GGML struct {
@@ -12,6 +13,16 @@ type GGML struct {
 	model
 }

+func (ggml *GGML) LayerSize(prefix string) (n int64) {
+	for _, t := range ggml.Tensors() {
+		if strings.HasPrefix(t.Name, prefix) {
+			n += int64(t.size())
+		}
+	}
+
+	return
+}
+
 const (
 	fileTypeF32 uint32 = iota
 	fileTypeF16