mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-12 00:37:04 +00:00
unbound max num gpu layers (#591)
--------- Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
@@ -91,9 +91,9 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
|
||||
switch ggml.Name() {
|
||||
case "gguf":
|
||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), ggml.NumLayers(), opts)
|
||||
case "ggml", "ggmf", "ggjt", "ggla":
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), ggml.NumLayers(), opts)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user