split from into one or more models

This commit is contained in:
Michael Yang
2023-11-24 11:57:20 -08:00
parent 7232f1fa41
commit 2cb0fa7d40
3 changed files with 179 additions and 71 deletions

View File

@@ -23,26 +23,24 @@ type containerGGUF struct {
NumTensor uint64
NumKV uint64
}
parameters uint64
}
func (c *containerGGUF) Name() string {
return "gguf"
}
func (c *containerGGUF) Decode(r io.Reader) (model, error) {
binary.Read(r, c.bo, &c.Version)
func (c *containerGGUF) Decode(ro *readOffset) (model, error) {
binary.Read(ro, c.bo, &c.Version)
switch c.Version {
case 1:
binary.Read(r, c.bo, &c.V1)
binary.Read(ro, c.bo, &c.V1)
default:
binary.Read(r, c.bo, &c.V2)
binary.Read(ro, c.bo, &c.V2)
}
model := newGGUFModel(c)
if err := model.Decode(r); err != nil {
if err := model.Decode(ro); err != nil {
return nil, err
}
@@ -67,9 +65,23 @@ const (
type kv map[string]any
type tensor struct {
name string
kind uint32
offset uint64
size uint64
// shape is the number of elements in each dimension
shape [4]uint64
}
type ggufModel struct {
*containerGGUF
kv
tensors []tensor
parameters uint64
}
func newGGUFModel(container *containerGGUF) *ggufModel {
@@ -142,49 +154,49 @@ func (llm *ggufModel) FileType() string {
return "unknown"
}
func (llm *ggufModel) Decode(r io.Reader) error {
func (llm *ggufModel) Decode(ro *readOffset) error {
// decode key-values
for i := 0; uint64(i) < llm.NumKV(); i++ {
k, err := llm.readString(r)
k, err := llm.readString(ro)
if err != nil {
return err
}
vtype := llm.readU32(r)
vtype := llm.readU32(ro)
var v any
switch vtype {
case ggufTypeUint8:
v = llm.readU8(r)
v = llm.readU8(ro)
case ggufTypeInt8:
v = llm.readI8(r)
v = llm.readI8(ro)
case ggufTypeUint16:
v = llm.readU16(r)
v = llm.readU16(ro)
case ggufTypeInt16:
v = llm.readI16(r)
v = llm.readI16(ro)
case ggufTypeUint32:
v = llm.readU32(r)
v = llm.readU32(ro)
case ggufTypeInt32:
v = llm.readI32(r)
v = llm.readI32(ro)
case ggufTypeUint64:
v = llm.readU64(r)
v = llm.readU64(ro)
case ggufTypeInt64:
v = llm.readI64(r)
v = llm.readI64(ro)
case ggufTypeFloat32:
v = llm.readF32(r)
v = llm.readF32(ro)
case ggufTypeFloat64:
v = llm.readF64(r)
v = llm.readF64(ro)
case ggufTypeBool:
v = llm.readBool(r)
v = llm.readBool(ro)
case ggufTypeString:
s, err := llm.readString(r)
s, err := llm.readString(ro)
if err != nil {
return err
}
v = s
case ggufTypeArray:
a, err := llm.readArray(r)
a, err := llm.readArray(ro)
if err != nil {
return err
}
@@ -199,21 +211,84 @@ func (llm *ggufModel) Decode(r io.Reader) error {
// decode tensors
for i := 0; uint64(i) < llm.NumTensor(); i++ {
if _, err := llm.readString(r); err != nil {
name, err := llm.readString(ro)
if err != nil {
return err
}
dimensions := llm.readU32(r)
dims := llm.readU32(ro)
var elements uint64 = 1
for i := 0; uint32(i) < dimensions; i++ {
elements *= llm.readU64(r)
shape := [4]uint64{1, 1, 1, 1}
for i := 0; uint32(i) < dims; i++ {
shape[i] = llm.readU64(ro)
}
llm.readU32(r) // type
llm.readU64(r) // offset
kind := llm.readU32(ro)
offset := llm.readU64(ro)
llm.parameters += elements
var blockSize uint64
switch {
case kind < 2:
blockSize = 1
case kind < 10:
blockSize = 32
default:
blockSize = 256
}
var typeSize uint64
switch kind {
case 0: // FP32
typeSize = 4
case 1: // FP16
typeSize = 2
case 2: // Q4_0
typeSize = 2 + blockSize/2
case 3: // Q4_1
typeSize = 2 + 2 + blockSize/2
case 6: // Q5_0
typeSize = 2 + 4 + blockSize/2
case 7: // Q5_1
typeSize = 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
typeSize = 2 + blockSize
case 9: // Q8_1
typeSize = 4 + 4 + blockSize
case 10: // Q2_K
typeSize = blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
typeSize = blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
typeSize = 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
typeSize = 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
typeSize = blockSize/2 + blockSize/4 + blockSize/16 + 2
}
parameters := shape[0] * shape[1] * shape[2] * shape[3]
size := parameters * typeSize / blockSize
llm.tensors = append(llm.tensors, tensor{
name: name,
kind: kind,
offset: offset,
size: size,
shape: shape,
})
llm.parameters += parameters
}
alignment, ok := llm.kv["general.alignment"].(uint32)
if !ok {
alignment = 32
}
io.CopyN(io.Discard, ro, int64(alignment)-ro.offset%int64(alignment))
for _, tensor := range llm.tensors {
padded := (int64(tensor.size) + int64(alignment) - 1) & ^(int64(alignment) - 1)
io.CopyN(io.Discard, ro, padded)
}
return nil