Add cgo implementation for llama.cpp

Run the server.cpp directly inside the Go runtime via cgo
while retaining the LLM Go abstractions.
This commit is contained in:
Daniel Hiltgen
2023-11-13 17:20:34 -08:00
parent 5e7fd6906f
commit d4cd695759
27 changed files with 1189 additions and 765 deletions

View File

@@ -18,7 +18,6 @@ type LLM interface {
Embedding(context.Context, string) ([]float64, error)
Encode(context.Context, string) ([]int, error)
Decode(context.Context, []int) (string, error)
SetOptions(api.Options)
Close()
Ping(context.Context) error
}
@@ -79,5 +78,5 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.NumGQA = 0
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
return newLlama(model, adapters, projectors, chooseRunners(workDir), ggml.NumLayers(), opts)
return newLlamaExtServer(model, adapters, projectors, ggml.NumLayers(), opts)
}