Offload layers to GPU based on new model size estimates (#1850)

* select layers based on estimated model memory usage

* always account for scratch vram

* dont load +1 layers

* better estmation for graph alloc

* Update gpu/gpu_darwin.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update llm/llm.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* Update llm/llm.go

* add overhead for cuda memory

* Update llm/llm.go

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>

* fix build error on linux

* address comments

---------

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
Jeffrey Morgan
2024-01-08 16:42:00 -05:00
committed by GitHub
parent 7e8f7c8358
commit 08f1e18965
10 changed files with 161 additions and 154 deletions

View File

@@ -6,18 +6,31 @@ import "C"
import (
"runtime"
"github.com/jmorganca/ollama/api"
"github.com/pbnjay/memory"
)
// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
func CheckVRAM() (int64, error) {
// TODO - assume metal, and return free memory?
return 0, nil
if runtime.GOARCH == "amd64" {
// gpu not supported, this may not be metal
return 0, nil
}
// on macOS, there's already buffer for available vram (see below) so just return the total
systemMemory := int64(memory.TotalMemory())
// macOS limits how much memory is available to the GPU based on the amount of system memory
// TODO: handle case where iogpu.wired_limit_mb is set to a higher value
if systemMemory <= 36*1024*1024*1024 {
systemMemory = systemMemory * 2 / 3
} else {
systemMemory = systemMemory * 3 / 4
}
return systemMemory, nil
}
func GetGPUInfo() GpuInfo {
// TODO - Metal vs. x86 macs...
mem, _ := getCPUMem()
return GpuInfo{
Library: "default",
@@ -32,19 +45,6 @@ func getCPUMem() (memInfo, error) {
}, nil
}
func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
if opts.NumGPU != -1 {
return opts.NumGPU
}
// metal only supported on arm64
if runtime.GOARCH == "arm64" {
return 1
}
return 0
}
func nativeInit() error {
return nil
}