mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-14 01:37:04 +00:00
next ollama runner (#7913)
feat: add new Ollama engine using ggml through cgo This change introduces a new way to run pretrained models. It introduces 3 high level interfaces and a bunch of smaller helper interfaces to facilitate this. - `model.Model` defines the interface for a model architecture. Models such as `llama` and `mllama`, which are provided as examples, can implement the model's forward propagation in the `Forward` method. This method will be called to generate completions. This interface can be found in `model/model.go` - `ml.Backend` defines the interface for a backend tensor library, in this case `ggml`. Among other things, a Backend is responsible for loading a pretrained model into hardware (GPU, CPU, etc) and providing an interface for Models to access loaded tensors. This interface can be found in `ml/backend.go` - `ml.Tensor` defines the interface for a tensor and tensor operations This is the first implementation of the new engine. Follow up PRs will implement more features: - non-greedy sampling (#8410) - integration with Ollama and KV caching (#8301) - more model support (#9080) with more coming soon Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
@@ -15,7 +15,7 @@ import (
|
||||
"text/template/parse"
|
||||
|
||||
"github.com/ollama/ollama/api"
|
||||
"github.com/ollama/ollama/llm"
|
||||
"github.com/ollama/ollama/fs/ggml"
|
||||
"github.com/ollama/ollama/template"
|
||||
"github.com/ollama/ollama/types/model"
|
||||
)
|
||||
@@ -24,7 +24,7 @@ var intermediateBlobs map[string]string = make(map[string]string)
|
||||
|
||||
type layerGGML struct {
|
||||
Layer
|
||||
*llm.GGML
|
||||
*ggml.GGML
|
||||
}
|
||||
|
||||
func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressResponse)) (layers []*layerGGML, err error) {
|
||||
@@ -64,12 +64,12 @@ func parseFromModel(ctx context.Context, name model.Name, fn func(api.ProgressRe
|
||||
}
|
||||
defer blob.Close()
|
||||
|
||||
ggml, _, err := llm.DecodeGGML(blob, 0)
|
||||
f, _, err := ggml.Decode(blob, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
layers = append(layers, &layerGGML{layer, ggml})
|
||||
layers = append(layers, &layerGGML{layer, f})
|
||||
default:
|
||||
layers = append(layers, &layerGGML{layer, nil})
|
||||
}
|
||||
@@ -118,7 +118,7 @@ func detectContentType(r io.Reader) (string, error) {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if contentType := llm.DetectGGMLType(b.Bytes()); contentType != "" {
|
||||
if contentType := ggml.DetectContentType(b.Bytes()); contentType != "" {
|
||||
return contentType, nil
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user