Support multiple variants for a given llm lib type

In some cases we may want multiple variants for a given GPU type or CPU.
This adds logic to have an optional Variant which we can use to select
an optimal library, but also allows us to try multiple variants in case
some fail to load.

This can be useful for scenarios such as ROCm v5 vs v6 incompatibility
or potentially CPU features.
This commit is contained in:
Daniel Hiltgen
2024-01-05 12:13:08 -08:00
parent b24e8d17b2
commit 8da7bef05f
16 changed files with 428 additions and 212 deletions

View File

@@ -19,8 +19,6 @@ type LLM interface {
Close()
}
var AvailableShims = map[string]string{}
func New(workDir, model string, adapters, projectors []string, opts api.Options) (LLM, error) {
if _, err := os.Stat(model); err != nil {
return nil, err
@@ -131,7 +129,8 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
opts.NumGQA = 0
opts.RopeFrequencyBase = 0.0
opts.RopeFrequencyScale = 0.0
return newLlmServer(library, model, adapters, projectors, opts)
gpuInfo := gpu.GetGPUInfo()
return newLlmServer(gpuInfo, model, adapters, projectors, opts)
}
// Give any native cgo implementations an opportunity to initialize
@@ -139,15 +138,18 @@ func Init(workdir string) error {
return nativeInit(workdir)
}
func newLlmServer(library, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
if _, libPresent := AvailableShims[library]; libPresent && library != "default" {
srv, err := newDynamicShimExtServer(AvailableShims[library], model, adapters, projectors, opts)
func newLlmServer(gpuInfo gpu.GpuInfo, model string, adapters, projectors []string, opts api.Options) (extServer, error) {
for _, shim := range getShims(gpuInfo) {
if shim == "default" {
break
}
srv, err := newDynamicShimExtServer(shim, model, adapters, projectors, opts)
if err == nil {
return srv, nil
}
log.Printf("Failed to load dynamic library %s - falling back to CPU mode %s", library, err)
// TODO - update some state to indicate we were unable to load the GPU library for future "info" ux
log.Printf("Failed to load dynamic library %s %s", shim, err)
}
return newDefaultExtServer(model, adapters, projectors, opts)
}