Refine handling of shim presence

This allows the CPU only builds to work on systems with Radeon cards
2025-12-13 01:07:12 +00:00 · 2023-12-15 14:27:27 -08:00
parent 1b991d0ba9
commit 3269535a4c
2 changed files with 8 additions and 7 deletions
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -22,6 +22,9 @@ type LLM interface {
 	Close()
 }

+// Set to false on linux/windows if we are able to load the shim
+var ShimPresent = false
+
 func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
 	if _, err := os.Stat(model); err != nil {
 		return nil, err
@@ -79,11 +82,10 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
 	gpuInfo := gpu.GetGPUInfo()
-	switch gpuInfo.Driver {
-	case "ROCM":
+	if gpuInfo.Driver == "ROCM" && ShimPresent {
 		return newRocmShimExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
-	default:
-		// Rely on the built-in CUDA based server which will fall back to CPU
+	} else {
+		// Rely on the built-in CUDA/Metal based server which will fall back to CPU
 		return newLlamaExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
 	}
 }