mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
Convert Safetensors to an Ollama model (#2824)
This commit is contained in:
@@ -163,9 +163,9 @@ func DecodeGGML(r io.ReadSeeker) (*GGML, error) {
|
||||
case FILE_MAGIC_GGLA:
|
||||
c = &containerLORA{}
|
||||
case FILE_MAGIC_GGUF_LE:
|
||||
c = &containerGGUF{bo: binary.LittleEndian}
|
||||
c = &ContainerGGUF{ByteOrder: binary.LittleEndian}
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
c = &containerGGUF{bo: binary.BigEndian}
|
||||
c = &ContainerGGUF{ByteOrder: binary.BigEndian}
|
||||
default:
|
||||
return nil, errors.New("invalid file magic")
|
||||
}
|
||||
|
||||
711
llm/gguf.go
711
llm/gguf.go
@@ -5,12 +5,20 @@ import (
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"regexp"
|
||||
|
||||
"github.com/d4l3k/go-bfloat16"
|
||||
"github.com/pdevine/tensor"
|
||||
"github.com/pdevine/tensor/native"
|
||||
"github.com/x448/float16"
|
||||
|
||||
"github.com/jmorganca/ollama/format"
|
||||
)
|
||||
|
||||
type containerGGUF struct {
|
||||
bo binary.ByteOrder
|
||||
type ContainerGGUF struct {
|
||||
ByteOrder binary.ByteOrder
|
||||
|
||||
Version uint32
|
||||
|
||||
@@ -23,23 +31,28 @@ type containerGGUF struct {
|
||||
NumTensor uint64
|
||||
NumKV uint64
|
||||
}
|
||||
|
||||
V3 struct {
|
||||
NumTensor uint64
|
||||
NumKV uint64
|
||||
}
|
||||
}
|
||||
|
||||
func (c *containerGGUF) Name() string {
|
||||
func (c *ContainerGGUF) Name() string {
|
||||
return "gguf"
|
||||
}
|
||||
|
||||
func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
||||
binary.Read(rso, c.bo, &c.Version)
|
||||
func (c *ContainerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
||||
binary.Read(rso, c.ByteOrder, &c.Version)
|
||||
|
||||
switch c.Version {
|
||||
case 1:
|
||||
binary.Read(rso, c.bo, &c.V1)
|
||||
binary.Read(rso, c.ByteOrder, &c.V1)
|
||||
default:
|
||||
binary.Read(rso, c.bo, &c.V2)
|
||||
binary.Read(rso, c.ByteOrder, &c.V2)
|
||||
}
|
||||
|
||||
model := newGGUFModel(c)
|
||||
model := NewGGUFModel(c)
|
||||
if err := model.Decode(rso); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -48,47 +61,61 @@ func (c *containerGGUF) Decode(rso *readSeekOffset) (model, error) {
|
||||
}
|
||||
|
||||
const (
|
||||
ggufTypeUint8 uint32 = iota
|
||||
ggufTypeInt8
|
||||
ggufTypeUint16
|
||||
ggufTypeInt16
|
||||
ggufTypeUint32
|
||||
ggufTypeInt32
|
||||
ggufTypeFloat32
|
||||
ggufTypeBool
|
||||
ggufTypeString
|
||||
ggufTypeArray
|
||||
ggufTypeUint64
|
||||
ggufTypeInt64
|
||||
ggufTypeFloat64
|
||||
_ uint32 = iota
|
||||
GGUFTokenNormal
|
||||
GGUFTokenUnknown
|
||||
GGUFTokenControl
|
||||
GGUFTokenUserDefined
|
||||
GGUFTokenUnused
|
||||
GGUFTokenByte
|
||||
)
|
||||
|
||||
type kv map[string]any
|
||||
const (
|
||||
GGUFTypeUint8 uint32 = iota
|
||||
GGUFTypeInt8
|
||||
GGUFTypeUint16
|
||||
GGUFTypeInt16
|
||||
GGUFTypeUint32
|
||||
GGUFTypeInt32
|
||||
GGUFTypeFloat32
|
||||
GGUFTypeBool
|
||||
GGUFTypeString
|
||||
GGUFTypeArray
|
||||
GGUFTypeUint64
|
||||
GGUFTypeInt64
|
||||
GGUFTypeFloat64
|
||||
)
|
||||
|
||||
type tensor struct {
|
||||
name string
|
||||
kind uint32
|
||||
offset uint64
|
||||
type KV map[string]any
|
||||
|
||||
type Tensor struct {
|
||||
Name string
|
||||
Kind uint32
|
||||
Offset uint64
|
||||
|
||||
// shape is the number of elements in each dimension
|
||||
shape [4]uint64
|
||||
Shape [4]uint64
|
||||
|
||||
FileName string
|
||||
OffsetPadding uint64
|
||||
FileOffsets []uint64
|
||||
}
|
||||
|
||||
func (t tensor) blockSize() uint64 {
|
||||
func (t Tensor) BlockSize() uint64 {
|
||||
switch {
|
||||
case t.kind < 2:
|
||||
case t.Kind < 2:
|
||||
return 1
|
||||
case t.kind < 10:
|
||||
case t.Kind < 10:
|
||||
return 32
|
||||
default:
|
||||
return 256
|
||||
}
|
||||
}
|
||||
|
||||
func (t tensor) typeSize() uint64 {
|
||||
blockSize := t.blockSize()
|
||||
func (t Tensor) TypeSize() uint64 {
|
||||
blockSize := t.BlockSize()
|
||||
|
||||
switch t.kind {
|
||||
switch t.Kind {
|
||||
case 0: // FP32
|
||||
return 4
|
||||
case 1: // FP16
|
||||
@@ -128,31 +155,63 @@ func (t tensor) typeSize() uint64 {
|
||||
}
|
||||
}
|
||||
|
||||
func (t tensor) parameters() uint64 {
|
||||
return t.shape[0] * t.shape[1] * t.shape[2] * t.shape[3]
|
||||
func (t Tensor) Parameters() uint64 {
|
||||
return t.Shape[0] * t.Shape[1] * t.Shape[2] * t.Shape[3]
|
||||
}
|
||||
|
||||
func (t tensor) size() uint64 {
|
||||
return t.parameters() * t.typeSize() / t.blockSize()
|
||||
func (t Tensor) Size() uint64 {
|
||||
return t.Parameters() * t.TypeSize() / t.BlockSize()
|
||||
}
|
||||
|
||||
type ggufModel struct {
|
||||
*containerGGUF
|
||||
func (t Tensor) Repack(data []uint16, heads int) ([]uint16, error) {
|
||||
n := tensor.New(tensor.WithShape(int(t.Shape[0]), int(t.Shape[1])), tensor.WithBacking(data))
|
||||
origShape := n.Shape().Clone()
|
||||
|
||||
kv
|
||||
tensors []tensor
|
||||
// reshape the tensor and swap axes 1 and 2 to unpack the layer for gguf
|
||||
if err := n.Reshape(heads, 2, origShape[0]/heads/2, origShape[1]); err != nil {
|
||||
return []uint16{}, err
|
||||
}
|
||||
|
||||
if err := n.T(0, 2, 1, 3); err != nil {
|
||||
return []uint16{}, err
|
||||
}
|
||||
|
||||
if err := n.Reshape(origShape...); err != nil {
|
||||
return []uint16{}, err
|
||||
}
|
||||
|
||||
if err := n.Transpose(); err != nil {
|
||||
return []uint16{}, err
|
||||
}
|
||||
newN, err := native.SelectU16(n, 1)
|
||||
if err != nil {
|
||||
return []uint16{}, err
|
||||
}
|
||||
|
||||
var fullTensor []uint16
|
||||
for _, v := range newN {
|
||||
fullTensor = append(fullTensor, v...)
|
||||
}
|
||||
return fullTensor, nil
|
||||
}
|
||||
|
||||
type GGUFModel struct {
|
||||
*ContainerGGUF
|
||||
|
||||
KV
|
||||
Tensors []Tensor
|
||||
|
||||
parameters uint64
|
||||
}
|
||||
|
||||
func newGGUFModel(container *containerGGUF) *ggufModel {
|
||||
return &ggufModel{
|
||||
containerGGUF: container,
|
||||
kv: make(kv),
|
||||
func NewGGUFModel(container *ContainerGGUF) *GGUFModel {
|
||||
return &GGUFModel{
|
||||
ContainerGGUF: container,
|
||||
KV: make(KV),
|
||||
}
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumTensor() uint64 {
|
||||
func (llm *GGUFModel) NumTensor() uint64 {
|
||||
if llm.Version == 1 {
|
||||
return uint64(llm.V1.NumTensor)
|
||||
}
|
||||
@@ -160,7 +219,7 @@ func (llm *ggufModel) NumTensor() uint64 {
|
||||
return llm.V2.NumTensor
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumKV() uint64 {
|
||||
func (llm *GGUFModel) NumKV() uint64 {
|
||||
if llm.Version == 1 {
|
||||
return uint64(llm.V1.NumKV)
|
||||
}
|
||||
@@ -168,15 +227,15 @@ func (llm *ggufModel) NumKV() uint64 {
|
||||
return llm.V2.NumKV
|
||||
}
|
||||
|
||||
func (llm *ggufModel) ModelFamily() string {
|
||||
if t, ok := llm.kv["general.architecture"].(string); ok {
|
||||
func (llm *GGUFModel) ModelFamily() string {
|
||||
if t, ok := llm.KV["general.architecture"].(string); ok {
|
||||
return t
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func (llm *ggufModel) ModelType() string {
|
||||
func (llm *GGUFModel) ModelType() string {
|
||||
if llm.parameters > 0 {
|
||||
return format.HumanNumber(llm.parameters)
|
||||
}
|
||||
@@ -184,15 +243,393 @@ func (llm *ggufModel) ModelType() string {
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func (llm *ggufModel) FileType() string {
|
||||
if t, ok := llm.kv["general.file_type"].(uint32); ok {
|
||||
func (llm *GGUFModel) FileType() string {
|
||||
if t, ok := llm.KV["general.file_type"].(uint32); ok {
|
||||
return fileType(t)
|
||||
}
|
||||
|
||||
return "unknown"
|
||||
}
|
||||
|
||||
func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||
func (llm *GGUFModel) Encode(f *os.File) error {
|
||||
// this mimics the order of the llama.cpp convert script
|
||||
kOrder := []string{
|
||||
"general.architecture",
|
||||
"general.name",
|
||||
"llama.context_length",
|
||||
"llama.embedding_length",
|
||||
"llama.block_count",
|
||||
"llama.feed_forward_length",
|
||||
"llama.rope.dimension_count",
|
||||
"llama.attention.head_count",
|
||||
"llama.attention.head_count_kv",
|
||||
"llama.attention.layer_norm_rms_epsilon",
|
||||
"llama.rope.freq_base",
|
||||
"general.file_type",
|
||||
"tokenizer.ggml.model",
|
||||
"tokenizer.ggml.tokens",
|
||||
"tokenizer.ggml.scores",
|
||||
"tokenizer.ggml.token_type",
|
||||
"tokenizer.ggml.bos_token_id",
|
||||
"tokenizer.ggml.eos_token_id",
|
||||
"tokenizer.ggml.unknown_token_id",
|
||||
"tokenizer.ggml.add_bos_token",
|
||||
"tokenizer.ggml.add_eos_token",
|
||||
"tokenizer.chat_template",
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, []byte("GGUF")); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint32(3)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumTensor)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(llm.V3.NumKV)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, k := range kOrder {
|
||||
val, ok := llm.KV[k]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(k))); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := binary.Write(f, llm.ByteOrder, []byte(k)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
switch v := val.(type) {
|
||||
case uint32:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := llm.writeUint32(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
case float32:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := llm.writeF32(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
case bool:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeBool); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := llm.writeBool(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
case string:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := llm.writeString(f, v); err != nil {
|
||||
return err
|
||||
}
|
||||
case []int32:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeInt32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, i := range v {
|
||||
if err := llm.writeInt32(f, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case []uint32:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeUint32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, i := range v {
|
||||
if err := llm.writeUint32(f, i); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case []float32:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeFloat32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
for _, fl := range v {
|
||||
if err := llm.writeF32(f, fl); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
case []string:
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeArray); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, GGUFTypeString); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(v))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, s := range v {
|
||||
if err := llm.writeString(f, s); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// write layer metadata
|
||||
for _, t := range llm.Tensors {
|
||||
if err := llm.writeString(f, t.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// the dimensions of the tensor
|
||||
dims := 1
|
||||
if t.Shape[1] > 0 {
|
||||
dims = 2
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint32(dims)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for i := 0; i < dims; i++ {
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(t.Shape[dims-1-i])); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint32(t.Kind)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(t.Offset)); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
offset, terr := f.Seek(0, io.SeekCurrent)
|
||||
if terr != nil {
|
||||
return terr
|
||||
}
|
||||
slog.Debug(fmt.Sprintf("tensors offset = %x", offset))
|
||||
|
||||
if err := llm.writePadding(f, 32); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var dataFile *os.File
|
||||
var currentFile string
|
||||
var err error
|
||||
for _, t := range llm.Tensors {
|
||||
if currentFile != t.FileName {
|
||||
if f != nil {
|
||||
dataFile.Close()
|
||||
}
|
||||
currentFile = t.FileName
|
||||
dataFile, err = os.Open(t.FileName)
|
||||
if err != nil {
|
||||
fmt.Println(err)
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
dataFile.Seek(int64(t.OffsetPadding+t.FileOffsets[0]), 0)
|
||||
|
||||
pattern := `^blk\.[0-9]+\.attn_(?P<layer>q|k)\.weight$`
|
||||
re, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
matches := re.FindAllStringSubmatch(t.Name, -1)
|
||||
if len(matches) > 0 {
|
||||
layerSize := t.FileOffsets[1] - t.FileOffsets[0]
|
||||
|
||||
var err error
|
||||
tData := make([]uint16, layerSize/2)
|
||||
if err = binary.Read(dataFile, llm.ByteOrder, tData); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
layerType := matches[0][re.SubexpIndex("layer")]
|
||||
var heads uint32
|
||||
switch layerType {
|
||||
case "q":
|
||||
heads = llm.KV["llama.attention.head_count"].(uint32)
|
||||
case "k":
|
||||
heads = llm.KV["llama.attention.head_count_kv"].(uint32)
|
||||
if heads == 0 {
|
||||
heads = llm.KV["llama.attention.head_count"].(uint32)
|
||||
}
|
||||
}
|
||||
|
||||
tData, err = t.Repack(tData, int(heads))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
var buf []byte
|
||||
for _, n := range tData {
|
||||
buf = binary.LittleEndian.AppendUint16(buf, n)
|
||||
}
|
||||
|
||||
tempBuf := make([]uint16, len(tData))
|
||||
tDataF32 := bfloat16.DecodeFloat32(buf)
|
||||
for cnt, v := range tDataF32 {
|
||||
tDataF16 := float16.Fromfloat32(v)
|
||||
tempBuf[cnt] = uint16(tDataF16)
|
||||
}
|
||||
|
||||
if err = binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := llm.writePadding(f, 32); err != nil {
|
||||
return err
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
remaining := t.FileOffsets[1] - t.FileOffsets[0]
|
||||
|
||||
bufSize := uint64(10240)
|
||||
var finished bool
|
||||
for {
|
||||
data := make([]byte, min(bufSize, remaining))
|
||||
|
||||
b, err := io.ReadFull(dataFile, data)
|
||||
remaining -= uint64(b)
|
||||
|
||||
if err == io.EOF || remaining <= 0 {
|
||||
finished = true
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// convert bfloat16 -> ieee float32
|
||||
tDataF32 := bfloat16.DecodeFloat32(data)
|
||||
|
||||
switch t.Kind {
|
||||
case 0:
|
||||
if err := binary.Write(f, llm.ByteOrder, tDataF32); err != nil {
|
||||
return err
|
||||
}
|
||||
case 1:
|
||||
// convert float32 -> float16
|
||||
tempBuf := make([]uint16, len(data)/2)
|
||||
for cnt, v := range tDataF32 {
|
||||
tDataF16 := float16.Fromfloat32(v)
|
||||
tempBuf[cnt] = uint16(tDataF16)
|
||||
}
|
||||
if err := binary.Write(f, llm.ByteOrder, tempBuf); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if finished {
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if err := llm.writePadding(f, 32); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
f.Close()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writePadding(f *os.File, align int64) error {
|
||||
// gguf file padding is defined in https://github.com/ggerganov/ggml/blob/master/docs/gguf.md#file-structure
|
||||
offset, err := f.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
padding := ((offset + align - 1) / align) * align
|
||||
buf := make([]byte, padding-offset)
|
||||
if err := binary.Write(f, llm.ByteOrder, buf); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writeInt32(f *os.File, v int32) error {
|
||||
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writeUint32(f *os.File, v uint32) error {
|
||||
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writeF32(f *os.File, v float32) error {
|
||||
if err := binary.Write(f, llm.ByteOrder, v); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writeBool(f *os.File, b bool) error {
|
||||
if err := binary.Write(f, llm.ByteOrder, b); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) writeString(f *os.File, s string) error {
|
||||
if err := binary.Write(f, llm.ByteOrder, uint64(len(s))); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := binary.Write(f, llm.ByteOrder, []byte(s)); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *GGUFModel) Decode(rso *readSeekOffset) error {
|
||||
// decode key-values
|
||||
for i := 0; uint64(i) < llm.NumKV(); i++ {
|
||||
k, err := llm.readString(rso)
|
||||
@@ -204,36 +641,36 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||
|
||||
var v any
|
||||
switch vtype {
|
||||
case ggufTypeUint8:
|
||||
case GGUFTypeUint8:
|
||||
v = llm.readU8(rso)
|
||||
case ggufTypeInt8:
|
||||
case GGUFTypeInt8:
|
||||
v = llm.readI8(rso)
|
||||
case ggufTypeUint16:
|
||||
case GGUFTypeUint16:
|
||||
v = llm.readU16(rso)
|
||||
case ggufTypeInt16:
|
||||
case GGUFTypeInt16:
|
||||
v = llm.readI16(rso)
|
||||
case ggufTypeUint32:
|
||||
case GGUFTypeUint32:
|
||||
v = llm.readU32(rso)
|
||||
case ggufTypeInt32:
|
||||
case GGUFTypeInt32:
|
||||
v = llm.readI32(rso)
|
||||
case ggufTypeUint64:
|
||||
case GGUFTypeUint64:
|
||||
v = llm.readU64(rso)
|
||||
case ggufTypeInt64:
|
||||
case GGUFTypeInt64:
|
||||
v = llm.readI64(rso)
|
||||
case ggufTypeFloat32:
|
||||
case GGUFTypeFloat32:
|
||||
v = llm.readF32(rso)
|
||||
case ggufTypeFloat64:
|
||||
case GGUFTypeFloat64:
|
||||
v = llm.readF64(rso)
|
||||
case ggufTypeBool:
|
||||
case GGUFTypeBool:
|
||||
v = llm.readBool(rso)
|
||||
case ggufTypeString:
|
||||
case GGUFTypeString:
|
||||
s, err := llm.readString(rso)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
v = s
|
||||
case ggufTypeArray:
|
||||
case GGUFTypeArray:
|
||||
a, err := llm.readArray(rso)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -244,7 +681,7 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||
return fmt.Errorf("invalid type: %d", vtype)
|
||||
}
|
||||
|
||||
llm.kv[k] = v
|
||||
llm.KV[k] = v
|
||||
}
|
||||
|
||||
// decode tensors
|
||||
@@ -262,33 +699,33 @@ func (llm *ggufModel) Decode(rso *readSeekOffset) error {
|
||||
shape[i] = llm.readU64(rso)
|
||||
}
|
||||
|
||||
tensor := tensor{
|
||||
name: name,
|
||||
kind: llm.readU32(rso),
|
||||
offset: llm.readU64(rso),
|
||||
shape: shape,
|
||||
tensor := Tensor{
|
||||
Name: name,
|
||||
Kind: llm.readU32(rso),
|
||||
Offset: llm.readU64(rso),
|
||||
Shape: shape,
|
||||
}
|
||||
|
||||
llm.tensors = append(llm.tensors, tensor)
|
||||
llm.parameters += tensor.parameters()
|
||||
llm.Tensors = append(llm.Tensors, tensor)
|
||||
llm.parameters += tensor.Parameters()
|
||||
}
|
||||
|
||||
alignment, ok := llm.kv["general.alignment"].(uint32)
|
||||
alignment, ok := llm.KV["general.alignment"].(uint32)
|
||||
if !ok {
|
||||
alignment = 32
|
||||
}
|
||||
|
||||
rso.Seek(int64(alignment)-rso.offset%int64(alignment), io.SeekCurrent)
|
||||
for _, tensor := range llm.tensors {
|
||||
padded := (int64(tensor.size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||
for _, tensor := range llm.Tensors {
|
||||
padded := (int64(tensor.Size()) + int64(alignment) - 1) & ^(int64(alignment) - 1)
|
||||
rso.Seek(padded, io.SeekCurrent)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumLayers() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
||||
func (llm *GGUFModel) NumLayers() uint32 {
|
||||
value, exists := llm.KV[fmt.Sprintf("%s.block_count", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
@@ -296,8 +733,8 @@ func (llm *ggufModel) NumLayers() uint32 {
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumHead() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
|
||||
func (llm *GGUFModel) NumHead() uint32 {
|
||||
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
@@ -305,8 +742,8 @@ func (llm *ggufModel) NumHead() uint32 {
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumEmbed() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
|
||||
func (llm *GGUFModel) NumEmbed() uint32 {
|
||||
value, exists := llm.KV[fmt.Sprintf("%s.embedding_length", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
@@ -314,8 +751,8 @@ func (llm *ggufModel) NumEmbed() uint32 {
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumHeadKv() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
|
||||
func (llm *GGUFModel) NumHeadKv() uint32 {
|
||||
value, exists := llm.KV[fmt.Sprintf("%s.attention.head_count_kv", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
@@ -323,8 +760,8 @@ func (llm *ggufModel) NumHeadKv() uint32 {
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumCtx() uint32 {
|
||||
value, exists := llm.kv[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
|
||||
func (llm *GGUFModel) NumCtx() uint32 {
|
||||
value, exists := llm.KV[fmt.Sprintf("%s.context_length", llm.ModelFamily())]
|
||||
if !exists {
|
||||
return 0
|
||||
}
|
||||
@@ -332,7 +769,7 @@ func (llm *ggufModel) NumCtx() uint32 {
|
||||
return value.(uint32)
|
||||
}
|
||||
|
||||
func (llm *ggufModel) NumGQA() uint32 {
|
||||
func (llm *GGUFModel) NumGQA() uint32 {
|
||||
numHeadKv := llm.NumHeadKv()
|
||||
if numHeadKv == 0 {
|
||||
return 0
|
||||
@@ -341,75 +778,75 @@ func (llm *ggufModel) NumGQA() uint32 {
|
||||
return llm.NumHead() / numHeadKv
|
||||
}
|
||||
|
||||
func (llm ggufModel) readU8(r io.Reader) uint8 {
|
||||
func (llm GGUFModel) readU8(r io.Reader) uint8 {
|
||||
var u8 uint8
|
||||
binary.Read(r, llm.bo, &u8)
|
||||
binary.Read(r, llm.ByteOrder, &u8)
|
||||
return u8
|
||||
}
|
||||
|
||||
func (llm ggufModel) readI8(r io.Reader) int8 {
|
||||
func (llm GGUFModel) readI8(r io.Reader) int8 {
|
||||
var i8 int8
|
||||
binary.Read(r, llm.bo, &i8)
|
||||
binary.Read(r, llm.ByteOrder, &i8)
|
||||
return i8
|
||||
}
|
||||
|
||||
func (llm ggufModel) readU16(r io.Reader) uint16 {
|
||||
func (llm GGUFModel) readU16(r io.Reader) uint16 {
|
||||
var u16 uint16
|
||||
binary.Read(r, llm.bo, &u16)
|
||||
binary.Read(r, llm.ByteOrder, &u16)
|
||||
return u16
|
||||
}
|
||||
|
||||
func (llm ggufModel) readI16(r io.Reader) int16 {
|
||||
func (llm GGUFModel) readI16(r io.Reader) int16 {
|
||||
var i16 int16
|
||||
binary.Read(r, llm.bo, &i16)
|
||||
binary.Read(r, llm.ByteOrder, &i16)
|
||||
return i16
|
||||
}
|
||||
|
||||
func (llm ggufModel) readU32(r io.Reader) uint32 {
|
||||
func (llm GGUFModel) readU32(r io.Reader) uint32 {
|
||||
var u32 uint32
|
||||
binary.Read(r, llm.bo, &u32)
|
||||
binary.Read(r, llm.ByteOrder, &u32)
|
||||
return u32
|
||||
}
|
||||
|
||||
func (llm ggufModel) readI32(r io.Reader) int32 {
|
||||
func (llm GGUFModel) readI32(r io.Reader) int32 {
|
||||
var i32 int32
|
||||
binary.Read(r, llm.bo, &i32)
|
||||
binary.Read(r, llm.ByteOrder, &i32)
|
||||
return i32
|
||||
}
|
||||
|
||||
func (llm ggufModel) readU64(r io.Reader) uint64 {
|
||||
func (llm GGUFModel) readU64(r io.Reader) uint64 {
|
||||
var u64 uint64
|
||||
binary.Read(r, llm.bo, &u64)
|
||||
binary.Read(r, llm.ByteOrder, &u64)
|
||||
return u64
|
||||
}
|
||||
|
||||
func (llm ggufModel) readI64(r io.Reader) int64 {
|
||||
func (llm GGUFModel) readI64(r io.Reader) int64 {
|
||||
var i64 int64
|
||||
binary.Read(r, llm.bo, &i64)
|
||||
binary.Read(r, llm.ByteOrder, &i64)
|
||||
return i64
|
||||
}
|
||||
|
||||
func (llm ggufModel) readF32(r io.Reader) float32 {
|
||||
func (llm GGUFModel) readF32(r io.Reader) float32 {
|
||||
var f32 float32
|
||||
binary.Read(r, llm.bo, &f32)
|
||||
binary.Read(r, llm.ByteOrder, &f32)
|
||||
return f32
|
||||
}
|
||||
|
||||
func (llm ggufModel) readF64(r io.Reader) float64 {
|
||||
func (llm GGUFModel) readF64(r io.Reader) float64 {
|
||||
var f64 float64
|
||||
binary.Read(r, llm.bo, &f64)
|
||||
binary.Read(r, llm.ByteOrder, &f64)
|
||||
return f64
|
||||
}
|
||||
|
||||
func (llm ggufModel) readBool(r io.Reader) bool {
|
||||
func (llm GGUFModel) readBool(r io.Reader) bool {
|
||||
var b bool
|
||||
binary.Read(r, llm.bo, &b)
|
||||
binary.Read(r, llm.ByteOrder, &b)
|
||||
return b
|
||||
}
|
||||
|
||||
func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
|
||||
func (llm GGUFModel) readStringV1(r io.Reader) (string, error) {
|
||||
var nameLength uint32
|
||||
binary.Read(r, llm.bo, &nameLength)
|
||||
binary.Read(r, llm.ByteOrder, &nameLength)
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
||||
@@ -422,13 +859,13 @@ func (llm ggufModel) readStringV1(r io.Reader) (string, error) {
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func (llm ggufModel) readString(r io.Reader) (string, error) {
|
||||
func (llm GGUFModel) readString(r io.Reader) (string, error) {
|
||||
if llm.Version == 1 {
|
||||
return llm.readStringV1(r)
|
||||
}
|
||||
|
||||
var nameLength uint64
|
||||
binary.Read(r, llm.bo, &nameLength)
|
||||
binary.Read(r, llm.ByteOrder, &nameLength)
|
||||
|
||||
var b bytes.Buffer
|
||||
if _, err := io.CopyN(&b, r, int64(nameLength)); err != nil {
|
||||
@@ -438,29 +875,29 @@ func (llm ggufModel) readString(r io.Reader) (string, error) {
|
||||
return b.String(), nil
|
||||
}
|
||||
|
||||
func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
|
||||
func (llm *GGUFModel) readArrayV1(r io.Reader) (arr []any, err error) {
|
||||
atype := llm.readU32(r)
|
||||
n := llm.readU32(r)
|
||||
|
||||
for i := 0; uint32(i) < n; i++ {
|
||||
switch atype {
|
||||
case ggufTypeUint8:
|
||||
case GGUFTypeUint8:
|
||||
arr = append(arr, llm.readU8(r))
|
||||
case ggufTypeInt8:
|
||||
case GGUFTypeInt8:
|
||||
arr = append(arr, llm.readI8(r))
|
||||
case ggufTypeUint16:
|
||||
case GGUFTypeUint16:
|
||||
arr = append(arr, llm.readU16(r))
|
||||
case ggufTypeInt16:
|
||||
case GGUFTypeInt16:
|
||||
arr = append(arr, llm.readI16(r))
|
||||
case ggufTypeUint32:
|
||||
case GGUFTypeUint32:
|
||||
arr = append(arr, llm.readU32(r))
|
||||
case ggufTypeInt32:
|
||||
case GGUFTypeInt32:
|
||||
arr = append(arr, llm.readI32(r))
|
||||
case ggufTypeFloat32:
|
||||
case GGUFTypeFloat32:
|
||||
arr = append(arr, llm.readF32(r))
|
||||
case ggufTypeBool:
|
||||
case GGUFTypeBool:
|
||||
arr = append(arr, llm.readBool(r))
|
||||
case ggufTypeString:
|
||||
case GGUFTypeString:
|
||||
s, err := llm.readStringV1(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -475,7 +912,7 @@ func (llm *ggufModel) readArrayV1(r io.Reader) (arr []any, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
|
||||
func (llm *GGUFModel) readArray(r io.Reader) (arr []any, err error) {
|
||||
if llm.Version == 1 {
|
||||
return llm.readArrayV1(r)
|
||||
}
|
||||
@@ -485,29 +922,29 @@ func (llm *ggufModel) readArray(r io.Reader) (arr []any, err error) {
|
||||
|
||||
for i := 0; uint64(i) < n; i++ {
|
||||
switch atype {
|
||||
case ggufTypeUint8:
|
||||
case GGUFTypeUint8:
|
||||
arr = append(arr, llm.readU8(r))
|
||||
case ggufTypeInt8:
|
||||
case GGUFTypeInt8:
|
||||
arr = append(arr, llm.readI8(r))
|
||||
case ggufTypeUint16:
|
||||
case GGUFTypeUint16:
|
||||
arr = append(arr, llm.readU16(r))
|
||||
case ggufTypeInt16:
|
||||
case GGUFTypeInt16:
|
||||
arr = append(arr, llm.readI16(r))
|
||||
case ggufTypeUint32:
|
||||
case GGUFTypeUint32:
|
||||
arr = append(arr, llm.readU32(r))
|
||||
case ggufTypeInt32:
|
||||
case GGUFTypeInt32:
|
||||
arr = append(arr, llm.readI32(r))
|
||||
case ggufTypeUint64:
|
||||
case GGUFTypeUint64:
|
||||
arr = append(arr, llm.readU64(r))
|
||||
case ggufTypeInt64:
|
||||
case GGUFTypeInt64:
|
||||
arr = append(arr, llm.readI64(r))
|
||||
case ggufTypeFloat32:
|
||||
case GGUFTypeFloat32:
|
||||
arr = append(arr, llm.readF32(r))
|
||||
case ggufTypeFloat64:
|
||||
case GGUFTypeFloat64:
|
||||
arr = append(arr, llm.readF64(r))
|
||||
case ggufTypeBool:
|
||||
case GGUFTypeBool:
|
||||
arr = append(arr, llm.readBool(r))
|
||||
case ggufTypeString:
|
||||
case GGUFTypeString:
|
||||
s, err := llm.readString(r)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
||||
Reference in New Issue
Block a user