Add gemma safetensors conversion (#3250)

Co-authored-by: Michael Yang <mxyng@pm.me>
This commit is contained in:
Patrick Devine
2024-03-28 18:54:01 -07:00
committed by GitHub
parent 97ae517fbf
commit 5a5efee46b
11 changed files with 949 additions and 833 deletions

View File

@@ -7,16 +7,18 @@ import (
"slices"
)
type ContainerGGLA struct {
type containerGGLA struct {
version uint32
}
func (c *ContainerGGLA) Name() string {
func (c *containerGGLA) Name() string {
return "ggla"
}
func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
binary.Read(rs, binary.LittleEndian, &c.version)
func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
return nil, err
}
switch c.version {
case 1:
@@ -24,26 +26,26 @@ func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
return nil, errors.New("invalid version")
}
model := newModelGGLA(c)
model := newGGLA(c)
err := model.decode(rs)
return model, err
}
type ModelGGLA struct {
*ContainerGGLA
type ggla struct {
*containerGGLA
kv KV
tensors []Tensor
}
func newModelGGLA(container *ContainerGGLA) *ModelGGLA {
return &ModelGGLA{
ContainerGGLA: container,
func newGGLA(container *containerGGLA) *ggla {
return &ggla{
containerGGLA: container,
kv: make(KV),
}
}
func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
func (m *ggla) decode(rs io.ReadSeeker) error {
var r uint32
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
return err
@@ -109,7 +111,7 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
t.Offset = uint64(offset)
if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
return err
}
@@ -117,46 +119,46 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
}
}
func (m *ModelGGLA) KV() KV {
func (m *ggla) KV() KV {
return m.kv
}
func (m *ModelGGLA) Tensor() []Tensor {
func (m *ggla) Tensor() []Tensor {
return m.tensors
}
func (*ModelGGLA) ModelFamily() string {
func (*ggla) ModelFamily() string {
return "ggla"
}
func (*ModelGGLA) ModelType() string {
func (*ggla) ModelType() string {
panic("not implemented")
}
func (*ModelGGLA) FileType() string {
func (*ggla) FileType() string {
panic("not implemented")
}
func (*ModelGGLA) NumLayers() uint32 {
func (*ggla) NumLayers() uint32 {
panic("not implemented")
}
func (*ModelGGLA) NumGQA() uint32 {
func (*ggla) NumGQA() uint32 {
panic("not implemented")
}
func (*ModelGGLA) NumEmbed() uint32 {
func (*ggla) NumEmbed() uint32 {
panic("not implemented")
}
func (*ModelGGLA) NumHead() uint32 {
func (*ggla) NumHead() uint32 {
panic("not implemented")
}
func (*ModelGGLA) NumHeadKv() uint32 {
func (*ggla) NumHeadKv() uint32 {
panic("not implemented")
}
func (*ModelGGLA) NumCtx() uint32 {
func (*ggla) NumCtx() uint32 {
panic("not implemented")
}

View File

@@ -101,6 +101,85 @@ type model interface {
NumCtx() uint32
}
type KV map[string]any
type Tensor struct {
Name string
Kind uint32
Offset uint64
// Shape is the number of elements in each dimension
Shape []uint64
io.WriterTo
}
func (t Tensor) blockSize() uint64 {
switch {
case t.Kind < 2:
return 1
case t.Kind < 10:
return 32
default:
return 256
}
}
func (t Tensor) typeSize() uint64 {
blockSize := t.blockSize()
switch t.Kind {
case 0: // FP32
return 4
case 1: // FP16
return 2
case 2: // Q4_0
return 2 + blockSize/2
case 3: // Q4_1
return 2 + 2 + blockSize/2
case 6: // Q5_0
return 2 + 4 + blockSize/2
case 7: // Q5_1
return 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
return 2 + blockSize
case 9: // Q8_1
return 4 + 4 + blockSize
case 10: // Q2_K
return blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
return blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
return 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
return 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
return blockSize/2 + blockSize/4 + blockSize/16 + 2
case 15: // Q8_K
return 2 + blockSize + 2*blockSize/16
case 16: // IQ2_XXS
return 2 + 2*blockSize/8
case 17: // IQ2_XS
return 2 + 2*blockSize/8 + blockSize/32
case 18: // IQ3_XXS
return 2 + 3*blockSize/8
default:
return 0
}
}
func (t Tensor) parameters() uint64 {
var count uint64 = 1
for _, n := range t.Shape {
count *= n
}
return count
}
func (t Tensor) size() uint64 {
return t.parameters() * t.typeSize() / t.blockSize()
}
type container interface {
Name() string
Decode(io.ReadSeeker) (model, error)
@@ -133,11 +212,11 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, error) {
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
return nil, ErrUnsupportedFormat
case FILE_MAGIC_GGLA:
c = &ContainerGGLA{}
c = &containerGGLA{}
case FILE_MAGIC_GGUF_LE:
c = &ContainerGGUF{ByteOrder: binary.LittleEndian}
c = &containerGGUF{ByteOrder: binary.LittleEndian}
case FILE_MAGIC_GGUF_BE:
c = &ContainerGGUF{ByteOrder: binary.BigEndian}
c = &containerGGUF{ByteOrder: binary.BigEndian}
default:
return nil, errors.New("invalid file magic")
}

File diff suppressed because it is too large Load Diff