mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
deprecate ggml
- remove ggml runner - automatically pull gguf models when ggml detected - tell users to update to gguf in the case automatic pull fails Co-Authored-By: Jeffrey Morgan <jmorganca@gmail.com>
This commit is contained in:
committed by
Daniel Hiltgen
parent
ed195f3562
commit
811b1f03c8
23
llm/llama.go
23
llm/llama.go
@@ -59,13 +59,12 @@ ws ::= ([ \t\n] ws)?
|
||||
var llamaCppEmbed embed.FS
|
||||
|
||||
type ModelRunner struct {
|
||||
Type string // "gguf" or "ggml"
|
||||
Path string // path to the model runner executable
|
||||
Accelerated bool
|
||||
}
|
||||
|
||||
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
buildPath := path.Join("llama.cpp", runnerType, "build")
|
||||
func chooseRunners(workDir string) []ModelRunner {
|
||||
buildPath := path.Join("llama.cpp", "gguf", "build")
|
||||
var runners []ModelRunner
|
||||
|
||||
// set the runners based on the OS
|
||||
@@ -73,25 +72,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
if runtime.GOARCH == "arm64" {
|
||||
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
|
||||
} else {
|
||||
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
|
||||
}
|
||||
case "linux":
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
}
|
||||
case "windows":
|
||||
// TODO: select windows GPU runner here when available
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
|
||||
}
|
||||
default:
|
||||
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
|
||||
runners = []ModelRunner{
|
||||
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -141,7 +140,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
}
|
||||
}
|
||||
if !runnerAvailable {
|
||||
log.Fatalf("%s runner not found", runnerType)
|
||||
log.Fatalf("gguf runner not found")
|
||||
}
|
||||
|
||||
// return the runners to try in priority order
|
||||
@@ -149,7 +148,6 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
for _, r := range runners {
|
||||
// clean the ModelRunner paths so that they match the OS we are running on
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
|
||||
Type: r.Type,
|
||||
Path: filepath.Clean(path.Join(workDir, r.Path)),
|
||||
Accelerated: r.Accelerated,
|
||||
})
|
||||
@@ -350,6 +348,7 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
|
||||
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
|
||||
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
|
||||
"--embedding",
|
||||
"--parallel", "2",
|
||||
}
|
||||
|
||||
if opts.MainGPU > 0 {
|
||||
|
||||
Reference in New Issue
Block a user