mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 00:07:07 +00:00
Adapted rocm support to cgo based llama.cpp
This commit is contained in:
@@ -2,14 +2,17 @@ package server
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
)
|
||||
|
||||
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
|
||||
@@ -33,12 +36,16 @@ var (
|
||||
}
|
||||
resp = [2]string{
|
||||
"once upon a time",
|
||||
"fourth thursday",
|
||||
"united states thanksgiving",
|
||||
}
|
||||
)
|
||||
|
||||
func TestIntegrationSimpleOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
workDir, err := os.MkdirTemp("", "ollama")
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(workDir)
|
||||
require.NoError(t, llm.Init(workDir))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
@@ -56,7 +63,13 @@ func TestIntegrationSimpleOrcaMini(t *testing.T) {
|
||||
// get true concurrency working with n_parallel support in the backend
|
||||
func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
|
||||
t.Skip("concurrent prediction on single runner not currently supported")
|
||||
|
||||
workDir, err := os.MkdirTemp("", "ollama")
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(workDir)
|
||||
require.NoError(t, llm.Init(workDir))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
@@ -79,6 +92,10 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
|
||||
|
||||
func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
|
||||
SkipIFNoTestData(t)
|
||||
workDir, err := os.MkdirTemp("", "ollama")
|
||||
require.NoError(t, err)
|
||||
defer os.RemoveAll(workDir)
|
||||
require.NoError(t, llm.Init(workDir))
|
||||
ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
|
||||
defer cancel()
|
||||
opts := api.DefaultOptions()
|
||||
@@ -87,6 +104,7 @@ func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(len(req))
|
||||
|
||||
t.Logf("Running %d concurrently", len(req))
|
||||
for i := 0; i < len(req); i++ {
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
|
||||
@@ -25,6 +25,7 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/gpu"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
"github.com/jmorganca/ollama/version"
|
||||
@@ -81,20 +82,6 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ctx := c.Request.Context()
|
||||
|
||||
// check if the loaded model is still running in a subprocess, in case something unexpected happened
|
||||
if loaded.runner != nil {
|
||||
if err := loaded.runner.Ping(ctx); err != nil {
|
||||
log.Print("loaded llm process not responding, closing now")
|
||||
// the subprocess is no longer running, so close it
|
||||
loaded.runner.Close()
|
||||
loaded.runner = nil
|
||||
loaded.Model = nil
|
||||
loaded.Options = nil
|
||||
}
|
||||
}
|
||||
|
||||
needLoad := loaded.runner == nil || // is there a model loaded?
|
||||
loaded.ModelPath != model.ModelPath || // has the base model changed?
|
||||
!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
|
||||
@@ -905,9 +892,12 @@ func Serve(ln net.Listener) error {
|
||||
os.Exit(0)
|
||||
}()
|
||||
|
||||
if runtime.GOOS == "linux" {
|
||||
if err := llm.Init(s.WorkDir); err != nil {
|
||||
return fmt.Errorf("unable to initialize llm library %w", err)
|
||||
}
|
||||
if runtime.GOOS == "linux" { // TODO - windows too
|
||||
// check compatibility to log warnings
|
||||
if _, err := llm.CheckVRAM(); err != nil {
|
||||
if _, err := gpu.CheckVRAM(); err != nil {
|
||||
log.Print(err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user