mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-13 01:07:12 +00:00
Offload layers to GPU based on new model size estimates (#1850)
* select layers based on estimated model memory usage * always account for scratch vram * dont load +1 layers * better estmation for graph alloc * Update gpu/gpu_darwin.go Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com> * Update llm/llm.go Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com> * Update llm/llm.go * add overhead for cuda memory * Update llm/llm.go Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com> * fix build error on linux * address comments --------- Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
@@ -16,7 +16,7 @@ import (
|
||||
//go:embed llama.cpp/ggml-metal.metal
|
||||
var libEmbed embed.FS
|
||||
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, numLayers int64, opts api.Options) (extServer, error) {
|
||||
func newDynamicShimExtServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
|
||||
// should never happen...
|
||||
return nil, fmt.Errorf("Dynamic library loading not supported on Mac")
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user