Terminate subprocess if receiving SIGINT or SIGTERM signals while model is loading (#3653)

* terminate subprocess if receiving `SIGINT` or `SIGTERM` signals while model is loading

* use `unload` in signal handler
This commit is contained in:
Jeffrey Morgan
2024-04-15 12:09:32 -04:00
committed by GitHub
parent 7027f264fb
commit a0b8a32eb4
2 changed files with 23 additions and 32 deletions

View File

@@ -17,7 +17,6 @@ import (
"os/exec"
"path/filepath"
"runtime"
"slices"
"strconv"
"strings"
"time"
@@ -36,10 +35,6 @@ type LlamaServer struct {
options api.Options
}
var cpuOnlyFamilies = []string{
"mamba",
}
func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
f, err := os.Open(model)
if err != nil {
@@ -91,7 +86,7 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
memoryRequiredPartial := memoryMinimum + graphPartialOffload
if info.Library != "metal" {
if memoryRequiredPartial > memoryAvailable || slices.Contains(cpuOnlyFamilies, ggml.KV().Architecture()) {
if memoryRequiredPartial > memoryAvailable {
info.Library = "cpu"
}
}
@@ -277,12 +272,6 @@ func NewLlamaServer(model string, adapters, projectors []string, opts api.Option
_ = s.cmd.Wait()
}()
if err = s.waitUntilRunning(); err != nil {
slog.Error("error starting llama server", "server", servers[i], "error", err)
s.Close()
finalErr = err
continue
}
return s, nil
}
@@ -383,7 +372,7 @@ func (s *LlamaServer) Ping(ctx context.Context) error {
return nil
}
func (s *LlamaServer) waitUntilRunning() error {
func (s *LlamaServer) WaitUntilRunning() error {
start := time.Now()
// TODO we need to wire up a better way to detect hangs during model load and startup of the server
expiresAt := time.Now().Add(10 * time.Minute) // be generous with timeout, large models can take a while to load