Add cgo implementation for llama.cpp

Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions.
2025-12-14 09:47:02 +00:00 · 2023-11-13 17:20:34 -08:00
parent 5e7fd6906f
commit d4cd695759
27 changed files with 1189 additions and 765 deletions
--- a/llm/gpu_darwin.go
+++ b/llm/gpu_darwin.go
@@ -0,0 +1,19 @@
+//go:build darwin
+
+package llm
+
+import (
+	"github.com/jmorganca/ollama/api"
+)
+
+// CheckVRAM returns the free VRAM in bytes on Linux machines with NVIDIA GPUs
+func CheckVRAM() (int64, error) {
+	// TODO - assume metal, and return free memory?
+	return 0, errNvidiaSMI
+
+}
+
+func NumGPU(numLayer, fileSizeBytes int64, opts api.Options) int {
+	// default to enable metal on macOS
+	return 1
+}