deprecate ggml

- remove ggml runner
- automatically pull gguf models when ggml detected
- tell users to update to gguf in the case automatic pull fails

Co-Authored-By: Jeffrey Morgan <jmorganca@gmail.com>
This commit is contained in:
Bruce MacDonald
2023-11-24 13:58:09 -05:00
committed by Daniel Hiltgen
parent ed195f3562
commit 811b1f03c8
19 changed files with 74 additions and 393 deletions

View File

@@ -59,13 +59,12 @@ ws ::= ([ \t\n] ws)?
var llamaCppEmbed embed.FS
type ModelRunner struct {
Type string // "gguf" or "ggml"
Path string // path to the model runner executable
Accelerated bool
}
func chooseRunners(workDir, runnerType string) []ModelRunner {
buildPath := path.Join("llama.cpp", runnerType, "build")
func chooseRunners(workDir string) []ModelRunner {
buildPath := path.Join("llama.cpp", "gguf", "build")
var runners []ModelRunner
// set the runners based on the OS
@@ -73,25 +72,25 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
switch runtime.GOOS {
case "darwin":
if runtime.GOARCH == "arm64" {
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
runners = []ModelRunner{{Path: path.Join(buildPath, "metal", "bin", "ollama-runner")}}
} else {
runners = []ModelRunner{{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
runners = []ModelRunner{{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")}}
}
case "linux":
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
{Path: path.Join(buildPath, "cuda", "bin", "ollama-runner"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
case "windows":
// TODO: select windows GPU runner here when available
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
{Path: path.Join(buildPath, "cuda", "bin", "Release", "ollama-runner.exe"), Accelerated: true},
{Path: path.Join(buildPath, "cpu", "bin", "Release", "ollama-runner.exe")},
}
default:
log.Printf("unknown OS, running on CPU: %s", runtime.GOOS)
runners = []ModelRunner{
{Type: runnerType, Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
{Path: path.Join(buildPath, "cpu", "bin", "ollama-runner")},
}
}
@@ -141,7 +140,7 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
}
}
if !runnerAvailable {
log.Fatalf("%s runner not found", runnerType)
log.Fatalf("gguf runner not found")
}
// return the runners to try in priority order
@@ -149,7 +148,6 @@ func chooseRunners(workDir, runnerType string) []ModelRunner {
for _, r := range runners {
// clean the ModelRunner paths so that they match the OS we are running on
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{
Type: r.Type,
Path: filepath.Clean(path.Join(workDir, r.Path)),
Accelerated: r.Accelerated,
})
@@ -350,6 +348,7 @@ func newLlama(model string, adapters, projectors []string, runners []ModelRunner
"--batch-size", fmt.Sprintf("%d", opts.NumBatch),
"--n-gpu-layers", fmt.Sprintf("%d", numGPU),
"--embedding",
"--parallel", "2",
}
if opts.MainGPU > 0 {