mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
remove tmp directories created by previous servers (#559)
* remove tmp directories created by previous servers * clean up on server stop * Update routes.go * Update server/routes.go Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> * create top-level temp ollama dir * check file exists before creating --------- Co-authored-by: Jeffrey Morgan <jmorganca@gmail.com> Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
13
llm/ggml.go
13
llm/ggml.go
@@ -4,7 +4,6 @@ import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type GGML struct {
|
||||
@@ -165,18 +164,6 @@ func (c *containerLORA) Decode(r io.Reader) (model, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
var (
|
||||
ggmlInit sync.Once
|
||||
ggmlRunners []ModelRunner // a slice of ModelRunners ordered by priority
|
||||
)
|
||||
|
||||
func ggmlRunner() []ModelRunner {
|
||||
ggmlInit.Do(func() {
|
||||
ggmlRunners = chooseRunners("ggml")
|
||||
})
|
||||
return ggmlRunners
|
||||
}
|
||||
|
||||
const (
|
||||
// Magic constant for `ggml` files (unversioned).
|
||||
FILE_MAGIC_GGML = 0x67676d6c
|
||||
|
||||
14
llm/gguf.go
14
llm/gguf.go
@@ -6,7 +6,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sync"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
@@ -368,16 +367,3 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
var (
|
||||
ggufInit sync.Once
|
||||
ggufRunners []ModelRunner // a slice of ModelRunners ordered by priority
|
||||
)
|
||||
|
||||
func ggufRunner() []ModelRunner {
|
||||
ggufInit.Do(func() {
|
||||
ggufRunners = chooseRunners("gguf")
|
||||
})
|
||||
|
||||
return ggufRunners
|
||||
}
|
||||
|
||||
34
llm/llama.go
34
llm/llama.go
@@ -32,7 +32,7 @@ type ModelRunner struct {
|
||||
Path string // path to the model runner executable
|
||||
}
|
||||
|
||||
func chooseRunners(runnerType string) []ModelRunner {
|
||||
func chooseRunners(workDir, runnerType string) []ModelRunner {
|
||||
buildPath := path.Join("llama.cpp", runnerType, "build")
|
||||
var runners []string
|
||||
|
||||
@@ -61,11 +61,6 @@ func chooseRunners(runnerType string) []ModelRunner {
|
||||
}
|
||||
}
|
||||
|
||||
// copy the files locally to run the llama.cpp server
|
||||
tmpDir, err := os.MkdirTemp("", "llama-*")
|
||||
if err != nil {
|
||||
log.Fatalf("load llama runner: failed to create temp dir: %v", err)
|
||||
}
|
||||
runnerAvailable := false // if no runner files are found in the embed, this flag will cause a fast fail
|
||||
for _, r := range runners {
|
||||
// find all the files in the runner's bin directory
|
||||
@@ -85,18 +80,27 @@ func chooseRunners(runnerType string) []ModelRunner {
|
||||
defer srcFile.Close()
|
||||
|
||||
// create the directory in case it does not exist
|
||||
destPath := filepath.Join(tmpDir, filepath.Dir(f))
|
||||
destPath := filepath.Join(workDir, filepath.Dir(f))
|
||||
if err := os.MkdirAll(destPath, 0o755); err != nil {
|
||||
log.Fatalf("create runner temp dir %s: %v", filepath.Dir(f), err)
|
||||
}
|
||||
destFile, err := os.OpenFile(filepath.Join(destPath, filepath.Base(f)), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
if err != nil {
|
||||
log.Fatalf("write llama runner %s: %v", f, err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
||||
log.Fatalf("copy llama runner %s: %v", f, err)
|
||||
destFile := filepath.Join(destPath, filepath.Base(f))
|
||||
|
||||
_, err = os.Stat(destFile)
|
||||
switch {
|
||||
case errors.Is(err, os.ErrNotExist):
|
||||
destFile, err := os.OpenFile(destFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o755)
|
||||
if err != nil {
|
||||
log.Fatalf("write llama runner %s: %v", f, err)
|
||||
}
|
||||
defer destFile.Close()
|
||||
|
||||
if _, err := io.Copy(destFile, srcFile); err != nil {
|
||||
log.Fatalf("copy llama runner %s: %v", f, err)
|
||||
}
|
||||
case err != nil:
|
||||
log.Fatalf("stat llama runner %s: %v", f, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -107,7 +111,7 @@ func chooseRunners(runnerType string) []ModelRunner {
|
||||
// return the runners to try in priority order
|
||||
localRunnersByPriority := []ModelRunner{}
|
||||
for _, r := range runners {
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(tmpDir, r)})
|
||||
localRunnersByPriority = append(localRunnersByPriority, ModelRunner{Path: path.Join(workDir, r)})
|
||||
}
|
||||
|
||||
return localRunnersByPriority
|
||||
|
||||
@@ -21,7 +21,7 @@ type LLM interface {
|
||||
Ping(context.Context) error
|
||||
}
|
||||
|
||||
func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
||||
func New(workDir, model string, adapters []string, opts api.Options) (LLM, error) {
|
||||
if _, err := os.Stat(model); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -91,9 +91,9 @@ func New(model string, adapters []string, opts api.Options) (LLM, error) {
|
||||
switch ggml.Name() {
|
||||
case "gguf":
|
||||
opts.NumGQA = 0 // TODO: remove this when llama.cpp runners differ enough to need separate newLlama functions
|
||||
return newLlama(model, adapters, ggufRunner(), opts)
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "gguf"), opts)
|
||||
case "ggml", "ggmf", "ggjt", "ggla":
|
||||
return newLlama(model, adapters, ggmlRunner(), opts)
|
||||
return newLlama(model, adapters, chooseRunners(workDir, "ggml"), opts)
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown ggml type: %s", ggml.ModelFamily())
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user