mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-14 09:47:02 +00:00
subprocess llama.cpp server (#401)
* remove c code * pack llama.cpp * use request context for llama_cpp * let llama_cpp decide the number of threads to use * stop llama runner when app stops * remove sample count and duration metrics * use go generate to get libraries * tmp dir for running llm
This commit is contained in:
12
llm/llm.go
12
llm/llm.go
@@ -1,6 +1,7 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
@@ -11,12 +12,13 @@ import (
|
||||
)
|
||||
|
||||
type LLM interface {
|
||||
Predict([]int, string, func(api.GenerateResponse)) error
|
||||
Embedding(string) ([]float64, error)
|
||||
Encode(string) []int
|
||||
Decode(...int) string
|
||||
Predict(context.Context, []int, string, func(api.GenerateResponse)) error
|
||||
Embedding(context.Context, string) ([]float64, error)
|
||||
Encode(context.Context, string) ([]int, error)
|
||||
Decode(context.Context, []int) (string, error)
|
||||
SetOptions(api.Options)
|
||||
Close()
|
||||
Ping(context.Context) error
|
||||
}
|
||||
|
||||
func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
||||
@@ -75,7 +77,7 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
||||
|
||||
switch ggml.ModelFamily() {
|
||||
case ModelFamilyLlama:
|
||||
return newLlama(model, adapters, opts)
|
||||
return newLlama(model, adapters, ggmlRunner(), opts)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user