mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-12 00:37:04 +00:00
This provides integration with the new Ollama engine
(5824541 next ollama runner (#7913)) and the rest of the Ollama
infrastructure such as the runner and Ollama server.
In addition, it also builds out the KV cache infrastructure to
support requirements of how Ollama runs models such as:
- Parallel processing
- Memory management for defragmentation and shifting
- Multi-modal modals
Both old and new engines continue to be supported. By default, only
the old engine is used. To enable the new engine:
Start the server with the OLLAMA_NEW_ENGINE environment variable set:
OLLAMA_NEW_ENGINE=1 ./ollama serve
Start a model that is supported by the Ollama engine. This one is Llama 3.1 8b Q4_K_M:
./ollama run jessegross/llama3.1
250 lines
4.8 KiB
Go
250 lines
4.8 KiB
Go
package model
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"image"
|
|
_ "image/jpeg"
|
|
_ "image/png"
|
|
"log/slog"
|
|
"os"
|
|
"reflect"
|
|
"strconv"
|
|
"strings"
|
|
|
|
_ "golang.org/x/image/bmp"
|
|
_ "golang.org/x/image/tiff"
|
|
_ "golang.org/x/image/webp"
|
|
|
|
"github.com/ollama/ollama/kvcache"
|
|
"github.com/ollama/ollama/ml"
|
|
_ "github.com/ollama/ollama/ml/backend"
|
|
)
|
|
|
|
type Options struct {
|
|
Inputs []int32
|
|
Positions []int32
|
|
Sequences []int
|
|
Outputs []int32
|
|
|
|
Images []image.Image
|
|
}
|
|
|
|
type config struct {
|
|
Cache kvcache.Cache
|
|
}
|
|
|
|
type Base struct {
|
|
b ml.Backend
|
|
config
|
|
}
|
|
|
|
func (m *Base) Backend() ml.Backend {
|
|
return m.b
|
|
}
|
|
|
|
func (m *Base) Config() config {
|
|
return m.config
|
|
}
|
|
|
|
type Model interface {
|
|
Forward(ml.Context, Options) (ml.Tensor, error)
|
|
|
|
Backend() ml.Backend
|
|
Config() config
|
|
}
|
|
|
|
var models = make(map[string]func(ml.Config) (Model, error))
|
|
|
|
func Register(name string, f func(ml.Config) (Model, error)) {
|
|
if _, ok := models[name]; ok {
|
|
panic("model: model already registered")
|
|
}
|
|
|
|
models[name] = f
|
|
}
|
|
|
|
func New(s string) (Model, error) {
|
|
r, err := os.Open(s)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer r.Close()
|
|
|
|
b, err := ml.NewBackend(r)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
arch := b.Config().Architecture()
|
|
f, ok := models[arch]
|
|
if !ok {
|
|
return nil, fmt.Errorf("unsupported model architecture %q", arch)
|
|
}
|
|
|
|
m, err := f(b.Config())
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
base := Base{b: b, config: m.Config()}
|
|
|
|
v := reflect.ValueOf(m)
|
|
v.Elem().Set(populateFields(base, v.Elem()))
|
|
return m, nil
|
|
}
|
|
|
|
func populateFields(base Base, v reflect.Value, tags ...Tag) reflect.Value {
|
|
t := v.Type()
|
|
|
|
if t.Kind() == reflect.Struct {
|
|
allNil := true
|
|
for i := range t.NumField() {
|
|
tt := t.Field(i).Type
|
|
vv := v.Field(i)
|
|
if !vv.CanSet() {
|
|
continue
|
|
}
|
|
|
|
// make a copy
|
|
tagsCopy := tags
|
|
if tag := t.Field(i).Tag.Get("gguf"); tag != "" {
|
|
tagsCopy = append(tagsCopy, ParseTags(tag))
|
|
}
|
|
|
|
if tt == reflect.TypeOf((*Base)(nil)).Elem() {
|
|
vv.Set(reflect.ValueOf(base))
|
|
} else if tt == reflect.TypeOf((*ml.Tensor)(nil)).Elem() {
|
|
var fn func([]Tag) [][]string
|
|
fn = func(tags []Tag) (values [][]string) {
|
|
if len(tags) < 1 {
|
|
return nil
|
|
}
|
|
|
|
values = [][]string{{tags[0].Name}}
|
|
for _, alt := range tags[0].Alternate {
|
|
values = append(values, []string{alt})
|
|
}
|
|
|
|
for i, value := range values {
|
|
for _, rest := range fn(tags[1:]) {
|
|
value = append(value, rest...)
|
|
}
|
|
|
|
values[i] = value
|
|
}
|
|
|
|
return values
|
|
}
|
|
|
|
names := fn(tagsCopy)
|
|
for _, name := range names {
|
|
if tensor := base.Backend().Get(strings.Join(name, ".")); tensor != nil {
|
|
slog.Debug("found tensor", "", tensor)
|
|
vv.Set(reflect.ValueOf(tensor))
|
|
break
|
|
}
|
|
}
|
|
} else if tt.Kind() == reflect.Pointer || tt.Kind() == reflect.Interface {
|
|
setPointer(base, vv, tagsCopy)
|
|
} else if tt.Kind() == reflect.Slice || tt.Kind() == reflect.Array {
|
|
for i := range vv.Len() {
|
|
vvv := vv.Index(i)
|
|
if vvv.Kind() == reflect.Pointer || vvv.Kind() == reflect.Interface {
|
|
setPointer(base, vvv, append(tagsCopy, Tag{Name: strconv.Itoa(i)}))
|
|
} else {
|
|
vvv.Set(populateFields(base, vvv, append(tagsCopy, Tag{Name: strconv.Itoa(i)})...))
|
|
}
|
|
}
|
|
}
|
|
|
|
if !canNil(tt) || !vv.IsNil() {
|
|
allNil = false
|
|
}
|
|
}
|
|
|
|
if allNil {
|
|
return reflect.Zero(t)
|
|
}
|
|
}
|
|
|
|
return v
|
|
}
|
|
|
|
func setPointer(base Base, v reflect.Value, tags []Tag) {
|
|
vv := v
|
|
if v.Kind() == reflect.Interface {
|
|
if v.IsNil() {
|
|
return
|
|
}
|
|
|
|
vv = vv.Elem()
|
|
}
|
|
|
|
vv = vv.Elem()
|
|
if v.IsNil() {
|
|
vv = reflect.New(v.Type().Elem()).Elem()
|
|
}
|
|
|
|
if f := populateFields(base, vv, tags...); f.CanAddr() {
|
|
v.Set(f.Addr())
|
|
}
|
|
}
|
|
|
|
type Tag struct {
|
|
Name string
|
|
Alternate []string
|
|
}
|
|
|
|
func ParseTags(s string) (tag Tag) {
|
|
parts := strings.Split(s, ",")
|
|
if len(parts) > 0 {
|
|
tag.Name = parts[0]
|
|
|
|
for _, part := range parts[1:] {
|
|
if value, ok := strings.CutPrefix(part, "alt:"); ok {
|
|
tag.Alternate = append(tag.Alternate, value)
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func canNil(t reflect.Type) bool {
|
|
return t.Kind() == reflect.Chan ||
|
|
t.Kind() == reflect.Func ||
|
|
t.Kind() == reflect.Interface ||
|
|
t.Kind() == reflect.Map ||
|
|
t.Kind() == reflect.Pointer ||
|
|
t.Kind() == reflect.Slice
|
|
}
|
|
|
|
func Forward(ctx ml.Context, m Model, opts Options) (ml.Tensor, error) {
|
|
if len(opts.Positions) != len(opts.Sequences) {
|
|
return nil, fmt.Errorf("length of positions (%v) must match length of seqs (%v)", len(opts.Positions), len(opts.Sequences))
|
|
}
|
|
|
|
if len(opts.Positions) < 1 {
|
|
return nil, errors.New("batch size cannot be less than 1")
|
|
}
|
|
|
|
cache := m.Config().Cache
|
|
if cache != nil {
|
|
err := cache.StartForward(ctx, opts.Positions, opts.Sequences)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
t, err := m.Forward(ctx, opts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
ctx.Forward(t)
|
|
ctx.Compute(t)
|
|
|
|
return t, nil
|
|
}
|