mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Optimize container images for startup (#6547)
* Optimize container images for startup This change adjusts how to handle runner payloads to support container builds where we keep them extracted in the filesystem. This makes it easier to optimize the cpu/cuda vs cpu/rocm images for size, and should result in faster startup times for container images. * Refactor payload logic and add buildx support for faster builds * Move payloads around * Review comments * Converge to buildx based helper scripts * Use docker buildx action for release
This commit is contained in:
@@ -179,53 +179,6 @@ var (
|
||||
HsaOverrideGfxVersion = String("HSA_OVERRIDE_GFX_VERSION")
|
||||
)
|
||||
|
||||
func RunnersDir() (p string) {
|
||||
if p := Var("OLLAMA_RUNNERS_DIR"); p != "" {
|
||||
return p
|
||||
}
|
||||
|
||||
if runtime.GOOS != "windows" {
|
||||
return
|
||||
}
|
||||
|
||||
defer func() {
|
||||
if p == "" {
|
||||
slog.Error("unable to locate llm runner directory. Set OLLAMA_RUNNERS_DIR to the location of 'ollama/runners'")
|
||||
}
|
||||
}()
|
||||
|
||||
// On Windows we do not carry the payloads inside the main executable
|
||||
exe, err := os.Executable()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
cwd, err := os.Getwd()
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
var paths []string
|
||||
for _, root := range []string{filepath.Dir(exe), filepath.Join(filepath.Dir(exe), LibRelativeToExe()), cwd} {
|
||||
paths = append(paths,
|
||||
root,
|
||||
filepath.Join(root, runtime.GOOS+"-"+runtime.GOARCH),
|
||||
filepath.Join(root, "dist", runtime.GOOS+"-"+runtime.GOARCH),
|
||||
)
|
||||
}
|
||||
|
||||
// Try a few variations to improve developer experience when building from source in the local tree
|
||||
for _, path := range paths {
|
||||
candidate := filepath.Join(path, "lib", "ollama", "runners")
|
||||
if _, err := os.Stat(candidate); err == nil {
|
||||
p = candidate
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
return p
|
||||
}
|
||||
|
||||
func Uint(key string, defaultValue uint) func() uint {
|
||||
return func() uint {
|
||||
if s := Var(key); s != "" {
|
||||
@@ -290,7 +243,6 @@ func AsMap() map[string]EnvVar {
|
||||
"OLLAMA_NOPRUNE": {"OLLAMA_NOPRUNE", NoPrune(), "Do not prune model blobs on startup"},
|
||||
"OLLAMA_NUM_PARALLEL": {"OLLAMA_NUM_PARALLEL", NumParallel(), "Maximum number of parallel requests"},
|
||||
"OLLAMA_ORIGINS": {"OLLAMA_ORIGINS", Origins(), "A comma separated list of allowed origins"},
|
||||
"OLLAMA_RUNNERS_DIR": {"OLLAMA_RUNNERS_DIR", RunnersDir(), "Location for runners"},
|
||||
"OLLAMA_SCHED_SPREAD": {"OLLAMA_SCHED_SPREAD", SchedSpread(), "Always schedule model across all GPUs"},
|
||||
"OLLAMA_TMPDIR": {"OLLAMA_TMPDIR", TmpDir(), "Location for temporary files"},
|
||||
|
||||
|
||||
Reference in New Issue
Block a user