support for packaging in multiple cuda runners (#509)

* enable packaging multiple cuda versions
* use nvcc cuda version if available

---------

Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
Bruce MacDonald
2023-09-14 15:08:13 -04:00
committed by GitHub
parent 83ffb154bc
commit 2540c9181c
5 changed files with 96 additions and 38 deletions

View File

@@ -4,7 +4,6 @@ import (
"encoding/binary"
"errors"
"io"
"path"
"sync"
)
@@ -166,11 +165,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
return nil, nil
}
var (
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
)
var (
ggmlInit sync.Once
ggmlRunnerPath string
@@ -178,7 +172,7 @@ var (
func ggmlRunner() ModelRunner {
ggmlInit.Do(func() {
ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
ggmlRunnerPath = chooseRunner("ggml")
})
return ModelRunner{Path: ggmlRunnerPath}
}