mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
support for packaging in multiple cuda runners (#509)
* enable packaging multiple cuda versions * use nvcc cuda version if available --------- Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
@@ -4,7 +4,6 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"path"
|
||||
"sync"
|
||||
)
|
||||
|
||||
@@ -166,11 +165,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var (
|
||||
ggmlGPU = path.Join("llama.cpp", "ggml", "build", "gpu", "bin")
|
||||
ggmlCPU = path.Join("llama.cpp", "ggml", "build", "cpu", "bin")
|
||||
)
|
||||
|
||||
var (
|
||||
ggmlInit sync.Once
|
||||
ggmlRunnerPath string
|
||||
@@ -178,7 +172,7 @@ var (
|
||||
|
||||
func ggmlRunner() ModelRunner {
|
||||
ggmlInit.Do(func() {
|
||||
ggmlRunnerPath = chooseRunner(ggmlGPU, ggmlCPU)
|
||||
ggmlRunnerPath = chooseRunner("ggml")
|
||||
})
|
||||
return ModelRunner{Path: ggmlRunnerPath}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user