mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
partial decode ggml bin for more info
This commit is contained in:
@@ -19,7 +19,7 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llama"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/parser"
|
||||
"github.com/jmorganca/ollama/vector"
|
||||
)
|
||||
@@ -98,9 +98,14 @@ type LayerReader struct {
|
||||
}
|
||||
|
||||
type ConfigV2 struct {
|
||||
ModelFamily llm.ModelFamily `json:"model_family"`
|
||||
ModelType llm.ModelType `json:"model_type"`
|
||||
FileType llm.FileType `json:"file_type"`
|
||||
RootFS RootFS `json:"rootfs"`
|
||||
|
||||
// required by spec
|
||||
Architecture string `json:"architecture"`
|
||||
OS string `json:"os"`
|
||||
RootFS RootFS `json:"rootfs"`
|
||||
}
|
||||
|
||||
type RootFS struct {
|
||||
@@ -245,6 +250,11 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
return err
|
||||
}
|
||||
|
||||
config := ConfigV2{
|
||||
Architecture: "amd64",
|
||||
OS: "linux",
|
||||
}
|
||||
|
||||
var layers []*LayerReader
|
||||
params := make(map[string][]string)
|
||||
embed := EmbeddingParams{fn: fn, opts: api.DefaultOptions()}
|
||||
@@ -283,6 +293,18 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
}
|
||||
defer file.Close()
|
||||
|
||||
ggml, err := llm.DecodeGGML(file, llm.ModelFamilyLlama)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
config.ModelFamily = ggml.ModelFamily
|
||||
config.ModelType = ggml.ModelType
|
||||
config.FileType = ggml.FileType
|
||||
|
||||
// reset the file
|
||||
file.Seek(0, io.SeekStart)
|
||||
|
||||
l, err := CreateLayer(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create layer: %v", err)
|
||||
@@ -291,6 +313,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
layers = append(layers, l)
|
||||
}
|
||||
}
|
||||
|
||||
if mf != nil {
|
||||
log.Printf("manifest = %#v", mf)
|
||||
for _, l := range mf.Layers {
|
||||
@@ -320,7 +343,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
layers = append(layers, layer)
|
||||
case "template", "system", "prompt":
|
||||
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
|
||||
// remove the prompt layer if one exists
|
||||
// remove the layer if one exists
|
||||
mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
|
||||
layers = removeLayerFromLayers(layers, mediaType)
|
||||
|
||||
@@ -382,7 +405,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
|
||||
|
||||
// Create a layer for the config object
|
||||
fn(api.ProgressResponse{Status: "creating config layer"})
|
||||
cfg, err := createConfigLayer(digests)
|
||||
cfg, err := createConfigLayer(config, digests)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -429,13 +452,13 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
|
||||
}
|
||||
|
||||
e.opts.EmbeddingOnly = true
|
||||
llm, err := llama.New(e.model, e.opts)
|
||||
llmModel, err := llm.New(e.model, e.opts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("load model to generate embeddings: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if llm != nil {
|
||||
llm.Close()
|
||||
if llmModel != nil {
|
||||
llmModel.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
@@ -479,7 +502,7 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
|
||||
Total: len(data) - 1,
|
||||
Completed: i,
|
||||
})
|
||||
embed, err := llm.Embedding(d)
|
||||
embed, err := llmModel.Embedding(d)
|
||||
if err != nil {
|
||||
log.Printf("failed to generate embedding for '%s' line %d: %v", filePath, i+1, err)
|
||||
continue
|
||||
@@ -675,7 +698,7 @@ func getLayerDigests(layers []*LayerReader) ([]string, error) {
|
||||
// CreateLayer creates a Layer object from a given file
|
||||
func CreateLayer(f io.ReadSeeker) (*LayerReader, error) {
|
||||
digest, size := GetSHA256Digest(f)
|
||||
f.Seek(0, 0)
|
||||
f.Seek(0, io.SeekStart)
|
||||
|
||||
layer := &LayerReader{
|
||||
Layer: Layer{
|
||||
@@ -767,10 +790,6 @@ func DeleteModel(name string) error {
|
||||
return err
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// only delete the files which are still in the deleteMap
|
||||
for k, v := range deleteMap {
|
||||
if v {
|
||||
@@ -969,15 +988,10 @@ func pullModelManifest(mp ModelPath, regOpts *RegistryOptions) (*ManifestV2, err
|
||||
return m, err
|
||||
}
|
||||
|
||||
func createConfigLayer(layers []string) (*LayerReader, error) {
|
||||
// TODO change architecture and OS
|
||||
config := ConfigV2{
|
||||
Architecture: "arm64",
|
||||
OS: "linux",
|
||||
RootFS: RootFS{
|
||||
Type: "layers",
|
||||
DiffIDs: layers,
|
||||
},
|
||||
func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
|
||||
config.RootFS = RootFS{
|
||||
Type: "layers",
|
||||
DiffIDs: layers,
|
||||
}
|
||||
|
||||
configJSON, err := json.Marshal(config)
|
||||
|
||||
@@ -21,14 +21,14 @@ import (
|
||||
"gonum.org/v1/gonum/mat"
|
||||
|
||||
"github.com/jmorganca/ollama/api"
|
||||
"github.com/jmorganca/ollama/llama"
|
||||
"github.com/jmorganca/ollama/llm"
|
||||
"github.com/jmorganca/ollama/vector"
|
||||
)
|
||||
|
||||
var loaded struct {
|
||||
mu sync.Mutex
|
||||
|
||||
llm *llama.LLM
|
||||
llm llm.LLM
|
||||
Embeddings []vector.Embedding
|
||||
|
||||
expireAt time.Time
|
||||
@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
|
||||
loaded.Embeddings = model.Embeddings
|
||||
}
|
||||
|
||||
llm, err := llama.New(model.ModelPath, opts)
|
||||
llmModel, err := llm.New(model.ModelPath, opts)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set cache values before modifying opts
|
||||
loaded.llm = llmModel
|
||||
loaded.digest = model.Digest
|
||||
loaded.options = opts
|
||||
|
||||
if opts.NumKeep < 0 {
|
||||
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
|
||||
if err != nil {
|
||||
@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
|
||||
return err
|
||||
}
|
||||
|
||||
tokensWithSystem := llm.Encode(promptWithSystem)
|
||||
tokensNoSystem := llm.Encode(promptNoSystem)
|
||||
tokensWithSystem := llmModel.Encode(promptWithSystem)
|
||||
tokensNoSystem := llmModel.Encode(promptNoSystem)
|
||||
|
||||
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
|
||||
|
||||
llmModel.SetOptions(opts)
|
||||
}
|
||||
|
||||
loaded.llm = llm
|
||||
loaded.digest = model.Digest
|
||||
loaded.options = opts
|
||||
}
|
||||
loaded.expireAt = time.Now().Add(sessionDuration)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user