diff --git a/llm/memory.go b/llm/memory.go index e9ed1738..05b3b2fd 100644 --- a/llm/memory.go +++ b/llm/memory.go @@ -1,12 +1,9 @@ package llm import ( - "cmp" "fmt" "log/slog" - "maps" "os" - "slices" "strconv" "strings" @@ -125,10 +122,12 @@ func EstimateGPULayers(gpus []discover.GpuInfo, f *ggml.GGML, projectors []strin } layers := f.Tensors().GroupLayers() - // add one layer (chosing the max layer) worth of memory as a buffer - layerSize = slices.MaxFunc(slices.Collect(maps.Values(layers)), func(a, b ggml.Layer) int { - return cmp.Compare(a.Size(), b.Size()) - }).Size() + // add one layer worth of memory as a buffer + if blk0, ok := layers["blk.0"]; ok { + layerSize = blk0.Size() + } else { + slog.Warn("model missing blk.0 layer size") + } var kvct string if envconfig.FlashAttention() &&