mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
ggml: Seperate tensor load from backend creation
Currently, when the backend is created, the tensors are loaded at the same time, which is a slow operation. This separates them to be two steps: - Create backend, including enumerating tensors and memory allocation - Loading tensor data This allows more flexibility in managing model loading.
This commit is contained in:
@@ -6,7 +6,6 @@ import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
"slices"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -15,6 +14,7 @@ import (
|
||||
)
|
||||
|
||||
type Backend interface {
|
||||
Load(ctx context.Context, progress func(float32)) error
|
||||
Config() fs.Config
|
||||
Get(name string) Tensor
|
||||
NewContext() Context
|
||||
@@ -52,10 +52,6 @@ type CacheConfig struct {
|
||||
|
||||
// BackendParams controls how the backend loads and executes models
|
||||
type BackendParams struct {
|
||||
// Progress is a callback function that allows reporting percentage completion
|
||||
// of model loading
|
||||
Progress func(float32)
|
||||
|
||||
// NumThreads sets the number of threads to use if running on the CPU
|
||||
NumThreads int
|
||||
|
||||
@@ -72,9 +68,9 @@ type BackendParams struct {
|
||||
FlashAttention bool
|
||||
}
|
||||
|
||||
var backends = make(map[string]func(context.Context, *os.File, BackendParams) (Backend, error))
|
||||
var backends = make(map[string]func(string, BackendParams) (Backend, error))
|
||||
|
||||
func RegisterBackend(name string, f func(context.Context, *os.File, BackendParams) (Backend, error)) {
|
||||
func RegisterBackend(name string, f func(string, BackendParams) (Backend, error)) {
|
||||
if _, ok := backends[name]; ok {
|
||||
panic("backend: backend already registered")
|
||||
}
|
||||
@@ -82,9 +78,9 @@ func RegisterBackend(name string, f func(context.Context, *os.File, BackendParam
|
||||
backends[name] = f
|
||||
}
|
||||
|
||||
func NewBackend(ctx context.Context, f *os.File, params BackendParams) (Backend, error) {
|
||||
func NewBackend(modelPath string, params BackendParams) (Backend, error) {
|
||||
if backend, ok := backends["ggml"]; ok {
|
||||
return backend(ctx, f, params)
|
||||
return backend(modelPath, params)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported backend")
|
||||
|
||||
Reference in New Issue
Block a user