mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
Add cgo implementation for llama.cpp
Run the server.cpp directly inside the Go runtime via cgo while retaining the LLM Go abstractions.
This commit is contained in:
103
server/llm_test.go
Normal file
103
server/llm_test.go
Normal file
@@ -0,0 +1,103 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
)
|
||||
|
||||
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
|
||||
// package to avoid circular dependencies
|
||||
|
||||
// WARNING - these tests will fail on mac if you don't manually copy ggml-metal.metal to this dir (./server)
|
||||
//
|
||||
// TODO - Fix this ^^
|
||||
|
||||
var (
|
||||
req = [2]api.GenerateRequest{
|
||||
{
|
||||
Model: "orca-mini",
|
||||
Prompt: "tell me a short story about agi?",
|
||||
Options: map[string]interface{}{},
|
||||
}, {
|
||||
Model: "orca-mini",
|
||||
Prompt: "what is the origin of the us thanksgiving holiday?",
|
||||
Options: map[string]interface{}{},
|
||||
},
|
||||
}
|
||||
resp = [2]string{
|
||||
"once upon a time",
|
||||
"fourth thursday",
|
||||
}
|
||||
)
|
||||
|
||||
func TestIntegrationSimpleOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
opts.Seed = 42
|
||||
opts.Temperature = 0.0
|
||||
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
|
||||
defer llmRunner.Close()
|
||||
response := OneShotPromptResponse(t, ctx, req[0], model, llmRunner)
|
||||
assert.Contains(t, strings.ToLower(response), resp[0])
|
||||
}
|
||||
|
||||
// TODO
|
||||
// The server always loads a new runner and closes the old one, which forces serial execution
|
||||
// At present this test case fails with concurrency problems. Eventually we should try to
|
||||
// get true concurrency working with n_parallel support in the backend
|
||||
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
t.Skip("concurrent prediction on single runner not currently supported")
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
opts.Seed = 42
|
||||
opts.Temperature = 0.0
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(req))
|
||||
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
|
||||
defer llmRunner.Close()
|
||||
for i := 0; i < len(req); i++ {
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
|
||||
t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
|
||||
assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
opts.Seed = 42
|
||||
opts.Temperature = 0.0
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(req))
|
||||
|
||||
for i := 0; i < len(req); i++ {
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
model, llmRunner := PrepareModelForPrompts(t, req[0].Model, opts)
|
||||
defer llmRunner.Close()
|
||||
response := OneShotPromptResponse(t, ctx, req[i], model, llmRunner)
|
||||
t.Logf("Prompt: %s\nResponse: %s", req[0].Prompt, response)
|
||||
assert.Contains(t, strings.ToLower(response), resp[i], "error in thread %d (%s)", i, req[i].Prompt)
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// TODO - create a parallel test with 2 different models once we support concurrency
|
||||
76
server/llm_utils_test.go
Normal file
76
server/llm_utils_test.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"os"
|
||||
"path"
|
||||
"runtime"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func SkipIFNoTestData(t *testing.T) {
|
||||
modelDir := getModelDir()
|
||||
if _, err := os.Stat(modelDir); errors.Is(err, os.ErrNotExist) {
|
||||
t.Skipf("%s does not exist - skipping integration tests", modelDir)
|
||||
}
|
||||
}
|
||||
|
||||
func getModelDir() string {
|
||||
_, filename, _, _ := runtime.Caller(0)
|
||||
return path.Dir(path.Dir(filename) + "/../test_data/models/.")
|
||||
}
|
||||
|
||||
func PrepareModelForPrompts(t *testing.T, modelName string, opts api.Options) (*Model, llm.LLM) {
|
||||
modelDir := getModelDir()
|
||||
os.Setenv("OLLAMA_MODELS", modelDir)
|
||||
model, err := GetModel(modelName)
|
||||
require.NoError(t, err, "GetModel ")
|
||||
err = opts.FromMap(model.Options)
|
||||
require.NoError(t, err, "opts from model ")
|
||||
runner, err := llm.New("unused", model.ModelPath, model.AdapterPaths, model.ProjectorPaths, opts)
|
||||
require.NoError(t, err, "llm.New failed")
|
||||
return model, runner
|
||||
}
|
||||
|
||||
func OneShotPromptResponse(t *testing.T, ctx context.Context, req api.GenerateRequest, model *Model, runner llm.LLM) string {
|
||||
checkpointStart := time.Now()
|
||||
prompt, err := model.Prompt(PromptVars{
|
||||
System: req.System,
|
||||
Prompt: req.Prompt,
|
||||
First: len(req.Context) == 0,
|
||||
})
|
||||
require.NoError(t, err, "prompt generation failed")
|
||||
success := make(chan bool, 1)
|
||||
response := ""
|
||||
cb := func(r llm.PredictResult) {
|
||||
|
||||
if !r.Done {
|
||||
response += r.Content
|
||||
} else {
|
||||
success <- true
|
||||
}
|
||||
}
|
||||
checkpointLoaded := time.Now()
|
||||
predictReq := llm.PredictOpts{
|
||||
Prompt: prompt,
|
||||
Format: req.Format,
|
||||
CheckpointStart: checkpointStart,
|
||||
CheckpointLoaded: checkpointLoaded,
|
||||
}
|
||||
err = runner.Predict(ctx, predictReq, cb)
|
||||
require.NoError(t, err, "predict call failed")
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
t.Errorf("failed to complete before timeout: \n%s", response)
|
||||
return ""
|
||||
case <-success:
|
||||
return response
|
||||
}
|
||||
}
|
||||
@@ -126,10 +126,6 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
|
||||
loaded.Options = &opts
|
||||
}
|
||||
|
||||
// update options for the loaded llm
|
||||
// TODO(mxyng): this isn't thread safe, but it should be fine for now
|
||||
loaded.runner.SetOptions(opts)
|
||||
|
||||
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||
|
||||
if loaded.expireTimer == nil {
|
||||
|
||||
Reference in New Issue
Block a user