Refine handling of shim presence

This allows the CPU only builds to work on systems with Radeon cards
This commit is contained in:
Daniel Hiltgen
2023-12-15 14:27:27 -08:00
parent 1b991d0ba9
commit 3269535a4c
2 changed files with 8 additions and 7 deletions

View File

@@ -22,6 +22,9 @@ type LLM interface {
Close()
}
// Set to false on linux/windows if we are able to load the shim
var ShimPresent = false
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
@@ -79,11 +82,10 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
gpuInfo := gpu.GetGPUInfo()
switch gpuInfo.Driver {
case "ROCM":
if gpuInfo.Driver == "ROCM" && ShimPresent {
return newRocmShimExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
default:
// Rely on the built-in CUDA based server which will fall back to CPU
} else {
// Rely on the built-in CUDA/Metal based server which will fall back to CPU
return newLlamaExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
}
}