Add gemma safetensors conversion (#3250)

Co-authored-by: Michael Yang <mxyng@pm.me>
2025-12-10 07:46:59 +00:00 · 2024-03-28 18:54:01 -07:00
parent 97ae517fbf
commit 5a5efee46b
11 changed files with 949 additions and 833 deletions
--- a/llm/ggla.go
+++ b/llm/ggla.go
@@ -7,16 +7,18 @@ import (
 	"slices"
 )

-type ContainerGGLA struct {
+type containerGGLA struct {
 	version uint32
 }

-func (c *ContainerGGLA) Name() string {
+func (c *containerGGLA) Name() string {
 	return "ggla"
 }

-func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
-	binary.Read(rs, binary.LittleEndian, &c.version)
+func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
+	if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
+		return nil, err
+	}

 	switch c.version {
 	case 1:
@@ -24,26 +26,26 @@ func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
 		return nil, errors.New("invalid version")
 	}

-	model := newModelGGLA(c)
+	model := newGGLA(c)
 	err := model.decode(rs)
 	return model, err
 }

-type ModelGGLA struct {
-	*ContainerGGLA
+type ggla struct {
+	*containerGGLA

 	kv      KV
 	tensors []Tensor
 }

-func newModelGGLA(container *ContainerGGLA) *ModelGGLA {
-	return &ModelGGLA{
-		ContainerGGLA: container,
+func newGGLA(container *containerGGLA) *ggla {
+	return &ggla{
+		containerGGLA: container,
 		kv:            make(KV),
 	}
 }

-func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
+func (m *ggla) decode(rs io.ReadSeeker) error {
 	var r uint32
 	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
 		return err
@@ -109,7 +111,7 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {

 		t.Offset = uint64(offset)

-		if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
+		if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
 			return err
 		}

@@ -117,46 +119,46 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
 	}
 }

-func (m *ModelGGLA) KV() KV {
+func (m *ggla) KV() KV {
 	return m.kv
 }

-func (m *ModelGGLA) Tensor() []Tensor {
+func (m *ggla) Tensor() []Tensor {
 	return m.tensors
 }

-func (*ModelGGLA) ModelFamily() string {
+func (*ggla) ModelFamily() string {
 	return "ggla"
 }

-func (*ModelGGLA) ModelType() string {
+func (*ggla) ModelType() string {
 	panic("not implemented")
 }

-func (*ModelGGLA) FileType() string {
+func (*ggla) FileType() string {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumLayers() uint32 {
+func (*ggla) NumLayers() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumGQA() uint32 {
+func (*ggla) NumGQA() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumEmbed() uint32 {
+func (*ggla) NumEmbed() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumHead() uint32 {
+func (*ggla) NumHead() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumHeadKv() uint32 {
+func (*ggla) NumHeadKv() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumCtx() uint32 {
+func (*ggla) NumCtx() uint32 {
 	panic("not implemented")
 }
--- a/llm/ggml.go
+++ b/llm/ggml.go
@@ -101,6 +101,85 @@ type model interface {
 	NumCtx() uint32
 }

+type KV map[string]any
+
+type Tensor struct {
+	Name   string
+	Kind   uint32
+	Offset uint64
+
+	// Shape is the number of elements in each dimension
+	Shape []uint64
+
+	io.WriterTo
+}
+
+func (t Tensor) blockSize() uint64 {
+	switch {
+	case t.Kind < 2:
+		return 1
+	case t.Kind < 10:
+		return 32
+	default:
+		return 256
+	}
+}
+
+func (t Tensor) typeSize() uint64 {
+	blockSize := t.blockSize()
+
+	switch t.Kind {
+	case 0: // FP32
+		return 4
+	case 1: // FP16
+		return 2
+	case 2: // Q4_0
+		return 2 + blockSize/2
+	case 3: // Q4_1
+		return 2 + 2 + blockSize/2
+	case 6: // Q5_0
+		return 2 + 4 + blockSize/2
+	case 7: // Q5_1
+		return 2 + 2 + 4 + blockSize/2
+	case 8: // Q8_0
+		return 2 + blockSize
+	case 9: // Q8_1
+		return 4 + 4 + blockSize
+	case 10: // Q2_K
+		return blockSize/16 + blockSize/4 + 2 + 2
+	case 11: // Q3_K
+		return blockSize/8 + blockSize/4 + 12 + 2
+	case 12: // Q4_K
+		return 2 + 2 + 12 + blockSize/2
+	case 13: // Q5_K
+		return 2 + 2 + 12 + blockSize/8 + blockSize/2
+	case 14: // Q6_K
+		return blockSize/2 + blockSize/4 + blockSize/16 + 2
+	case 15: // Q8_K
+		return 2 + blockSize + 2*blockSize/16
+	case 16: // IQ2_XXS
+		return 2 + 2*blockSize/8
+	case 17: // IQ2_XS
+		return 2 + 2*blockSize/8 + blockSize/32
+	case 18: // IQ3_XXS
+		return 2 + 3*blockSize/8
+	default:
+		return 0
+	}
+}
+
+func (t Tensor) parameters() uint64 {
+	var count uint64 = 1
+	for _, n := range t.Shape {
+		count *= n
+	}
+	return count
+}
+
+func (t Tensor) size() uint64 {
+	return t.parameters() * t.typeSize() / t.blockSize()
+}
+
 type container interface {
 	Name() string
 	Decode(io.ReadSeeker) (model, error)
@@ -133,11 +212,11 @@ func DecodeGGML(rs io.ReadSeeker) (*GGML, error) {
 	case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
 		return nil, ErrUnsupportedFormat
 	case FILE_MAGIC_GGLA:
-		c = &ContainerGGLA{}
+		c = &containerGGLA{}
 	case FILE_MAGIC_GGUF_LE:
-		c = &ContainerGGUF{ByteOrder: binary.LittleEndian}
+		c = &containerGGUF{ByteOrder: binary.LittleEndian}
 	case FILE_MAGIC_GGUF_BE:
-		c = &ContainerGGUF{ByteOrder: binary.BigEndian}
+		c = &containerGGUF{ByteOrder: binary.BigEndian}
 	default:
 		return nil, errors.New("invalid file magic")
 	}
--- a/llm/gguf.go
+++ b/llm/gguf.go