partial decode ggml bin for more info

This commit is contained in:
Michael Yang
2023-07-21 13:33:56 -07:00
parent 5b5cc9c9f1
commit fccf8d179f
26 changed files with 336 additions and 69 deletions

View File

@@ -19,7 +19,7 @@ import (
"strings"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/parser"
"github.com/jmorganca/ollama/vector"
)
@@ -98,9 +98,14 @@ type LayerReader struct {
}
type ConfigV2 struct {
ModelFamily llm.ModelFamily `json:"model_family"`
ModelType llm.ModelType `json:"model_type"`
FileType llm.FileType `json:"file_type"`
RootFS RootFS `json:"rootfs"`
// required by spec
Architecture string `json:"architecture"`
OS string `json:"os"`
RootFS RootFS `json:"rootfs"`
}
type RootFS struct {
@@ -245,6 +250,11 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
return err
}
config := ConfigV2{
Architecture: "amd64",
OS: "linux",
}
var layers []*LayerReader
params := make(map[string][]string)
embed := EmbeddingParams{fn: fn, opts: api.DefaultOptions()}
@@ -283,6 +293,18 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
}
defer file.Close()
ggml, err := llm.DecodeGGML(file, llm.ModelFamilyLlama)
if err != nil {
return err
}
config.ModelFamily = ggml.ModelFamily
config.ModelType = ggml.ModelType
config.FileType = ggml.FileType
// reset the file
file.Seek(0, io.SeekStart)
l, err := CreateLayer(file)
if err != nil {
return fmt.Errorf("failed to create layer: %v", err)
@@ -291,6 +313,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
layers = append(layers, l)
}
}
if mf != nil {
log.Printf("manifest = %#v", mf)
for _, l := range mf.Layers {
@@ -320,7 +343,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
layers = append(layers, layer)
case "template", "system", "prompt":
fn(api.ProgressResponse{Status: fmt.Sprintf("creating model %s layer", c.Name)})
// remove the prompt layer if one exists
// remove the layer if one exists
mediaType := fmt.Sprintf("application/vnd.ollama.image.%s", c.Name)
layers = removeLayerFromLayers(layers, mediaType)
@@ -382,7 +405,7 @@ func CreateModel(ctx context.Context, name string, path string, fn func(resp api
// Create a layer for the config object
fn(api.ProgressResponse{Status: "creating config layer"})
cfg, err := createConfigLayer(digests)
cfg, err := createConfigLayer(config, digests)
if err != nil {
return err
}
@@ -429,13 +452,13 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
}
e.opts.EmbeddingOnly = true
llm, err := llama.New(e.model, e.opts)
llmModel, err := llm.New(e.model, e.opts)
if err != nil {
return nil, fmt.Errorf("load model to generate embeddings: %v", err)
}
defer func() {
if llm != nil {
llm.Close()
if llmModel != nil {
llmModel.Close()
}
}()
@@ -479,7 +502,7 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
Total: len(data) - 1,
Completed: i,
})
embed, err := llm.Embedding(d)
embed, err := llmModel.Embedding(d)
if err != nil {
log.Printf("failed to generate embedding for '%s' line %d: %v", filePath, i+1, err)
continue
@@ -675,7 +698,7 @@ func getLayerDigests(layers []*LayerReader) ([]string, error) {
// CreateLayer creates a Layer object from a given file
func CreateLayer(f io.ReadSeeker) (*LayerReader, error) {
digest, size := GetSHA256Digest(f)
f.Seek(0, 0)
f.Seek(0, io.SeekStart)
layer := &LayerReader{
Layer: Layer{
@@ -767,10 +790,6 @@ func DeleteModel(name string) error {
return err
}
if err != nil {
return err
}
// only delete the files which are still in the deleteMap
for k, v := range deleteMap {
if v {
@@ -969,15 +988,10 @@ func pullModelManifest(mp ModelPath, regOpts *RegistryOptions) (*ManifestV2, err
return m, err
}
func createConfigLayer(layers []string) (*LayerReader, error) {
// TODO change architecture and OS
config := ConfigV2{
Architecture: "arm64",
OS: "linux",
RootFS: RootFS{
Type: "layers",
DiffIDs: layers,
},
func createConfigLayer(config ConfigV2, layers []string) (*LayerReader, error) {
config.RootFS = RootFS{
Type: "layers",
DiffIDs: layers,
}
configJSON, err := json.Marshal(config)

View File

@@ -21,14 +21,14 @@ import (
"gonum.org/v1/gonum/mat"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama"
"github.com/jmorganca/ollama/llm"
"github.com/jmorganca/ollama/vector"
)
var loaded struct {
mu sync.Mutex
llm *llama.LLM
llm llm.LLM
Embeddings []vector.Embedding
expireAt time.Time
@@ -63,11 +63,16 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
loaded.Embeddings = model.Embeddings
}
llm, err := llama.New(model.ModelPath, opts)
llmModel, err := llm.New(model.ModelPath, opts)
if err != nil {
return err
}
// set cache values before modifying opts
loaded.llm = llmModel
loaded.digest = model.Digest
loaded.options = opts
if opts.NumKeep < 0 {
promptWithSystem, err := model.Prompt(api.GenerateRequest{}, "")
if err != nil {
@@ -79,15 +84,13 @@ func load(model *Model, reqOpts map[string]interface{}, sessionDuration time.Dur
return err
}
tokensWithSystem := llm.Encode(promptWithSystem)
tokensNoSystem := llm.Encode(promptNoSystem)
tokensWithSystem := llmModel.Encode(promptWithSystem)
tokensNoSystem := llmModel.Encode(promptNoSystem)
llm.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
opts.NumKeep = len(tokensWithSystem) - len(tokensNoSystem) + 1
llmModel.SetOptions(opts)
}
loaded.llm = llm
loaded.digest = model.Digest
loaded.options = opts
}
loaded.expireAt = time.Now().Add(sessionDuration)