mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
Terminate subprocess if receiving SIGINT or SIGTERM signals while model is loading (#3653)
* terminate subprocess if receiving `SIGINT` or `SIGTERM` signals while model is loading * use `unload` in signal handler
This commit is contained in:
@@ -17,7 +17,6 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -36,10 +35,6 @@ type LlamaServer struct {
|
||||
options api.Options
|
||||
}
|
||||
|
||||
var cpuOnlyFamilies = []string{
|
||||
"mamba",
|
||||
}
|
||||
|
||||
func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
|
||||
f, err := os.Open(model)
|
||||
if err != nil {
|
||||
@@ -91,7 +86,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
memoryRequiredPartial := memoryMinimum + graphPartialOffload
|
||||
|
||||
if info.Library != "metal" {
|
||||
if memoryRequiredPartial > memoryAvailable || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture()) {
|
||||
if memoryRequiredPartial > memoryAvailable {
|
||||
info.Library = "cpu"
|
||||
}
|
||||
}
|
||||
@@ -277,12 +272,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
|
||||
_ = s.cmd.Wait()
|
||||
}()
|
||||
|
||||
if err = s.waitUntilRunning(); err != nil {
|
||||
slog.Error("error starting llama server", "server", servers[i], "error", err)
|
||||
s.Close()
|
||||
finalErr = err
|
||||
continue
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
@@ -383,7 +372,7 @@ func (s *LlamaServer) Ping(ctx context.Context) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *LlamaServer) waitUntilRunning() error {
|
||||
func (s *LlamaServer) WaitUntilRunning() error {
|
||||
start := time.Now()
|
||||
// TODO we need to wire up a better way to detect hangs during model load and startup of the server
|
||||
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load
|
||||
|
||||
Reference in New Issue
Block a user