Adapted rocm support to cgo based llama.cpp

2025-12-11 00:07:07 +00:00 · 2023-11-29 11:00:37 -08:00
parent f8ef4439e9
commit 35934b2e05
37 changed files with 1688 additions and 658 deletions
--- a/server/llm_test.go
+++ b/server/llm_test.go
@@ -2,14 +2,17 @@ package server

 import (
 	"context"
+	"os"
 	"strings"
 	"sync"
 	"testing"
 	"time"

 	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"

 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/llm"
 )

 // TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
@@ -33,12 +36,16 @@ var (
 	}
 	resp = [2]string{
 		"once upon a time",
-		"fourth thursday",
+		"united states thanksgiving",
 	}
 )

 func TestIntegrationSimpleOrcaMini(t *testing.T) {
 	SkipIFNoTestData(t)
+	workDir, err := os.MkdirTemp("", "ollama")
+	require.NoError(t, err)
+	defer os.RemoveAll(workDir)
+	require.NoError(t, llm.Init(workDir))
 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
 	defer cancel()
 	opts := api.DefaultOptions()
@@ -56,7 +63,13 @@ func TestIntegrationSimpleOrcaMini(t *testing.T) {
 // get true concurrency working with n_parallel support in the backend
 func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {
 	SkipIFNoTestData(t)
+
 	t.Skip("concurrent prediction on single runner not currently supported")
+
+	workDir, err := os.MkdirTemp("", "ollama")
+	require.NoError(t, err)
+	defer os.RemoveAll(workDir)
+	require.NoError(t, llm.Init(workDir))
 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
 	defer cancel()
 	opts := api.DefaultOptions()
@@ -79,6 +92,10 @@ func TestIntegrationConcurrentPredictOrcaMini(t *testing.T) {

 func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
 	SkipIFNoTestData(t)
+	workDir, err := os.MkdirTemp("", "ollama")
+	require.NoError(t, err)
+	defer os.RemoveAll(workDir)
+	require.NoError(t, llm.Init(workDir))
 	ctx, cancel := context.WithTimeout(context.Background(), time.Second*60)
 	defer cancel()
 	opts := api.DefaultOptions()
@@ -87,6 +104,7 @@ func TestIntegrationConcurrentRunnersOrcaMini(t *testing.T) {
 	var wg sync.WaitGroup
 	wg.Add(len(req))

+	t.Logf("Running %d concurrently", len(req))
 	for i := 0; i < len(req); i++ {
 		go func(i int) {
 			defer wg.Done()
--- a/server/routes.go
+++ b/server/routes.go
@@ -25,6 +25,7 @@ import (
 	"github.com/gin-gonic/gin"

 	"github.com/jmorganca/ollama/api"
+	"github.com/jmorganca/ollama/gpu"
 	"github.com/jmorganca/ollama/llm"
 	"github.com/jmorganca/ollama/parser"
 	"github.com/jmorganca/ollama/version"
@@ -81,20 +82,6 @@ func load(c *gin.Context, modelName string, reqOpts map[string]interface{}, sess
 		return nil, err
 	}

-	ctx := c.Request.Context()
-
-	// check if the loaded model is still running in a subprocess, in case something unexpected happened
-	if loaded.runner != nil {
-		if err := loaded.runner.Ping(ctx); err != nil {
-			log.Print("loaded llm process not responding, closing now")
-			// the subprocess is no longer running, so close it
-			loaded.runner.Close()
-			loaded.runner = nil
-			loaded.Model = nil
-			loaded.Options = nil
-		}
-	}
-
 	needLoad := loaded.runner == nil || // is there a model loaded?
 		loaded.ModelPath != model.ModelPath || // has the base model changed?
 		!reflect.DeepEqual(loaded.AdapterPaths, model.AdapterPaths) || // have the adapters changed?
@@ -905,9 +892,12 @@ func Serve(ln net.Listener) error {
 		os.Exit(0)
 	}()

-	if runtime.GOOS == "linux" {
+	if err := llm.Init(s.WorkDir); err != nil {
+		return fmt.Errorf("unable to initialize llm library %w", err)
+	}
+	if runtime.GOOS == "linux" { // TODO - windows too
 		// check compatibility to log warnings
-		if _, err := llm.CheckVRAM(); err != nil {
+		if _, err := gpu.CheckVRAM(); err != nil {
 			log.Print(err.Error())
 		}
 	}