remove tmp directories created by previous servers (#559)

* remove tmp directories created by previous servers * clean up on server stop * Update routes.go * Update server/routes.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * create top-level temp ollama dir * check file exists before creating --------- Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me>
2025-12-10 15:57:04 +00:00 · 2023-09-21 20:38:49 +01:00
parent 8c83701e9f
commit 4cba75efc5
6 changed files with 56 additions and 60 deletions
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -4,7 +4,6 @@ import (
 	"encoding/binary"
 	"errors"
 	"io"
-	"sync"
 )

 type GGML struct {
@@ -165,18 +164,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
 	return nil, nil
 }

-var (
-	ggmlInit    sync.Once
-	ggmlRunners []ModelRunner // a slice of ModelRunners ordered by priority
-)
-
-func ggmlRunner() []ModelRunner {
-	ggmlInit.Do(func() {
-		ggmlRunners = chooseRunners("ggml")
-	})
-	return ggmlRunners
-}
-
 const (
 	// Magic constant for `ggml` files (unversioned).
 	FILE_MAGIC_GGML = 0x67676d6c
--- a/llm/gguf.go
+++ b/llm/gguf.go
@@ -6,7 +6,6 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"sync"
 )

 type containerGGUF struct {
@@ -368,16 +367,3 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {

 	return
 }
-
-var (
-	ggufInit    sync.Once
-	ggufRunners []ModelRunner // a slice of ModelRunners ordered by priority
-)
-
-func ggufRunner() []ModelRunner {
-	ggufInit.Do(func() {
-		ggufRunners = chooseRunners("gguf")
-	})
-
-	return ggufRunners
-}
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -32,7 +32,7 @@ type ModelRunner struct {
 	Path string // path to the model runner executable
 }

-func chooseRunners(runnerType string) []ModelRunner {
+func chooseRunners(workDir, runnerType string) []ModelRunner {
 	buildPath := path.Join("llama.cpp", runnerType, "build")
 	var runners []string

@@ -61,11 +61,6 @@ func chooseRunners(runnerType string) []ModelRunner {
 		}
 	}

-	// copy the files locally to run the llama.cpp server
-	tmpDir, err := os.MkdirTemp("", "llama-*")
-	if err != nil {
-		log.Fatalf("load llama runner: failed to create temp dir: %v", err)
-	}
 	runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
 	for _, r := range runners {
 		// find all the files in the runner's bin directory
@@ -85,18 +80,27 @@ func chooseRunners(runnerType string) []ModelRunner {
 			defer srcFile.Close()

 			// create the directory in case it does not exist
-			destPath := filepath.Join(tmpDir, filepath.Dir(f))
+			destPath := filepath.Join(workDir, filepath.Dir(f))
 			if err := os.MkdirAll(destPath, 0o755); err != nil {
 				log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
 			}
-			destFile, err := os.OpenFile(filepath.Join(destPath, filepath.Base(f)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
-			if err != nil {
-				log.Fatalf("write llama runner %s: %v", f, err)
-			}
-			defer destFile.Close()

-			if _, err := io.Copy(destFile, srcFile); err != nil {
-				log.Fatalf("copy llama runner %s: %v", f, err)
+			destFile := filepath.Join(destPath, filepath.Base(f))
+
+			_, err = os.Stat(destFile)
+			switch {
+			case errors.Is(err, os.ErrNotExist):
+				destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
+				if err != nil {
+					log.Fatalf("write llama runner %s: %v", f, err)
+				}
+				defer destFile.Close()
+
+				if _, err := io.Copy(destFile, srcFile); err != nil {
+					log.Fatalf("copy llama runner %s: %v", f, err)
+				}
+			case err != nil:
+				log.Fatalf("stat llama runner %s: %v", f, err)
 			}
 		}
 	}
@@ -107,7 +111,7 @@ func chooseRunners(runnerType string) []ModelRunner {
 	// return the runners to try in priority order
 	localRunnersByPriority := []ModelRunner{}
 	for _, r := range runners {
-		localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(tmpDir, r)})
+		localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(workDir, r)})
 	}

 	return localRunnersByPriority
--- a/llm/llm.go
+++ b/llm/llm.go
@@ -21,7 +21,7 @@ type LLM interface {
 	Ping(context.Context) error
 }

-func New(model string, adapters []string, opts api.Options) (LLM, error) {
+func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
 	if _, err := os.Stat(model); err != nil {
 		return nil, err
 	}
@@ -91,9 +91,9 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
 	switch ggml.Name() {
 	case "gguf":
 		opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
-		return newLlama(model, adapters, ggufRunner(), opts)
+		return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
 	case "ggml", "ggmf", "ggjt", "ggla":
-		return newLlama(model, adapters, ggmlRunner(), opts)
+		return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
 	default:
 		return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
 	}