support for packaging in multiple cuda runners (#509)

* enable packaging multiple cuda versions
* use nvcc cuda version if available

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
Bruce MacDonald
2023-09-14 15:08:13 -04:00
committed by GitHub
parent 83ffb154bc
commit 2540c9181c
5 changed files with 96 additions and 38 deletions

View File

@@ -6,7 +6,6 @@ import (
"errors"
"fmt"
"io"
"path"
"sync"
)
@@ -370,11 +369,6 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
return
}
var (
ggufGPU = path.Join("llama.cpp", "gguf", "build", "gpu", "bin")
ggufCPU = path.Join("llama.cpp", "gguf", "build", "cpu", "bin")
)
var (
ggufInit sync.Once
ggufRunnerPath string
@@ -382,7 +376,7 @@ var (
func ggufRunner() ModelRunner {
ggufInit.Do(func() {
ggufRunnerPath = chooseRunner(ggufGPU, ggufCPU)
ggufRunnerPath = chooseRunner("gguf")
})
return ModelRunner{Path: ggufRunnerPath}