GGUF support (#441)

This commit is contained in:
Bruce MacDonald
2023-09-07 13:55:37 -04:00
committed by GitHub
parent 07b4074e7b
commit 09dd2aeff9
10 changed files with 543 additions and 139 deletions

View File

@@ -32,15 +32,22 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
defer f.Close()
ggml, err := DecodeGGML(f, ModelFamilyLlama)
ggml, err := DecodeGGML(f)
if err != nil {
return nil, err
}
switch ggml.FileType().String() {
case "F32", "Q5_0", "Q5_1", "Q8_0":
case "Q8_0":
if ggml.Name() != "gguf" && opts.NumGPU != 0 {
// GGML Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
}
case "F32", "Q5_0", "Q5_1":
if opts.NumGPU != 0 {
// F32, F16, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// F32, Q5_0, Q5_1, and Q8_0 do not support Metal API and will
// cause the runner to segmentation fault so disable GPU
log.Printf("WARNING: GPU disabled for F32, Q5_0, Q5_1, and Q8_0")
opts.NumGPU = 0
@@ -75,8 +82,11 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
}
}
switch ggml.ModelFamily() {
case ModelFamilyLlama:
switch ggml.Name() {
case "gguf":
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
return newLlama(model, adapters, ggufRunner(), opts)
case "ggml", "ggmf", "ggjt", "ggla":
return newLlama(model, adapters, ggmlRunner(), opts)
default:
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())