Add cgo implementation for llama.cpp

Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions.
2025-12-11 16:26:59 +00:00 · 2023-11-13 17:20:34 -08:00
parent 5e7fd6906f
commit d4cd695759
27 changed files with 1189 additions and 765 deletions
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -18,7 +18,6 @@ type LLM interface {
 	Embedding(context.Context, string) ([]float64, error)
 	Encode(context.Context, string) ([]int, error)
 	Decode(context.Context, []int) (string, error)
-	SetOptions(api.Options)
 	Close()
 	Ping(context.Context) error
 }
@@ -79,5 +78,5 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
 	opts.NumGQA = 0
 	opts.RopeFrequencyBase = 0.0
 	opts.RopeFrequencyScale = 0.0
-	return newLlama(model, adapters, projectors, chooseRunners(workDir), ggml.NumLayers(), opts)
+	return newLlamaExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
 }