mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
update memory calcualtions
count each layer independently when deciding gpu offloading
This commit is contained in:
11
llm/ggml.go
11
llm/ggml.go
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
@@ -12,6 +13,16 @@ type GGML struct {
|
||||
model
|
||||
}
|
||||
|
||||
func (ggml *GGML) LayerSize(prefix string) (n int64) {
|
||||
for _, t := range ggml.Tensors() {
|
||||
if strings.HasPrefix(t.Name, prefix) {
|
||||
n += int64(t.size())
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
const (
|
||||
fileTypeF32 uint32 = iota
|
||||
fileTypeF16
|
||||
|
||||
Reference in New Issue
Block a user