Add gemma safetensors conversion (#3250)

Co-authored-by: Michael Yang <mxyng@pm.me>
2025-12-11 16:26:59 +00:00 · 2024-03-28 18:54:01 -07:00
parent 97ae517fbf
commit 5a5efee46b
11 changed files with 949 additions and 833 deletions
--- a/llm/ggla.go
+++ b/llm/ggla.go
@@ -7,16 +7,18 @@ import (
 	"slices"
 )

-type ContainerGGLA struct {
+type containerGGLA struct {
 	version uint32
 }

-func (c *ContainerGGLA) Name() string {
+func (c *containerGGLA) Name() string {
 	return "ggla"
 }

-func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
-	binary.Read(rs, binary.LittleEndian, &c.version)
+func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
+	if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
+		return nil, err
+	}

 	switch c.version {
 	case 1:
@@ -24,26 +26,26 @@ func (c *ContainerGGLA) Decode(rs io.ReadSeeker) (model, error) {
 		return nil, errors.New("invalid version")
 	}

-	model := newModelGGLA(c)
+	model := newGGLA(c)
 	err := model.decode(rs)
 	return model, err
 }

-type ModelGGLA struct {
-	*ContainerGGLA
+type ggla struct {
+	*containerGGLA

 	kv      KV
 	tensors []Tensor
 }

-func newModelGGLA(container *ContainerGGLA) *ModelGGLA {
-	return &ModelGGLA{
-		ContainerGGLA: container,
+func newGGLA(container *containerGGLA) *ggla {
+	return &ggla{
+		containerGGLA: container,
 		kv:            make(KV),
 	}
 }

-func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
+func (m *ggla) decode(rs io.ReadSeeker) error {
 	var r uint32
 	if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
 		return err
@@ -109,7 +111,7 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {

 		t.Offset = uint64(offset)

-		if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
+		if _, err := rs.Seek(int64(t.size()), io.SeekCurrent); err != nil {
 			return err
 		}

@@ -117,46 +119,46 @@ func (m *ModelGGLA) decode(rs io.ReadSeeker) error {
 	}
 }

-func (m *ModelGGLA) KV() KV {
+func (m *ggla) KV() KV {
 	return m.kv
 }

-func (m *ModelGGLA) Tensor() []Tensor {
+func (m *ggla) Tensor() []Tensor {
 	return m.tensors
 }

-func (*ModelGGLA) ModelFamily() string {
+func (*ggla) ModelFamily() string {
 	return "ggla"
 }

-func (*ModelGGLA) ModelType() string {
+func (*ggla) ModelType() string {
 	panic("not implemented")
 }

-func (*ModelGGLA) FileType() string {
+func (*ggla) FileType() string {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumLayers() uint32 {
+func (*ggla) NumLayers() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumGQA() uint32 {
+func (*ggla) NumGQA() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumEmbed() uint32 {
+func (*ggla) NumEmbed() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumHead() uint32 {
+func (*ggla) NumHead() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumHeadKv() uint32 {
+func (*ggla) NumHeadKv() uint32 {
 	panic("not implemented")
 }

-func (*ModelGGLA) NumCtx() uint32 {
+func (*ggla) NumCtx() uint32 {
 	panic("not implemented")
 }