mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Merge branch 'main' into drifkin/array-head-count-simple
This commit is contained in:
@@ -15,6 +15,7 @@ import (
|
||||
type GGML struct {
|
||||
container
|
||||
model
|
||||
Length int64
|
||||
}
|
||||
|
||||
type model interface {
|
||||
@@ -170,6 +171,8 @@ func (kv KV) OllamaEngineRequired() bool {
|
||||
"gemma3",
|
||||
"mistral3",
|
||||
"llama4",
|
||||
"mllama",
|
||||
"qwen25vl",
|
||||
}, kv.Architecture())
|
||||
}
|
||||
|
||||
@@ -429,12 +432,12 @@ func DetectContentType(b []byte) string {
|
||||
//
|
||||
// It collects array values for arrays with a size less than or equal to
|
||||
// maxArraySize. If the maxArraySize is negative, all arrays are collected.
|
||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, error) {
|
||||
rs = bufioutil.NewBufferedSeeker(rs, 32<<10)
|
||||
|
||||
var magic uint32
|
||||
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
|
||||
return nil, 0, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var c container
|
||||
@@ -444,24 +447,25 @@ func Decode(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
|
||||
case FILE_MAGIC_GGUF_BE:
|
||||
c = &containerGGUF{ByteOrder: binary.BigEndian, maxArraySize: maxArraySize}
|
||||
default:
|
||||
return nil, 0, errors.New("invalid file magic")
|
||||
return nil, errors.New("invalid file magic")
|
||||
}
|
||||
|
||||
model, err := c.Decode(rs)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
offset, err := rs.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// final model type
|
||||
return &GGML{
|
||||
container: c,
|
||||
model: model,
|
||||
}, offset, nil
|
||||
Length: offset,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (f GGML) GraphSize(context, batch uint64, numParallel int, kvCacheType string) (kv []uint64, partialOffload, fullOffload uint64) {
|
||||
@@ -693,6 +697,20 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
|
||||
graphSize = 4 * (imageSize*imageSize*numChannels +
|
||||
embeddingLength*patchSize +
|
||||
numPatches*numPatches*headCount)
|
||||
case "qwen25vl":
|
||||
maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
|
||||
|
||||
numPatches := maxPixels / (patchSize * patchSize)
|
||||
|
||||
graphSize = 4 * (maxPixels*numChannels + // Original image storage
|
||||
// Normalized pixels
|
||||
maxPixels*numChannels +
|
||||
// Patches storage (numPatches * channels * patchSize^2)
|
||||
numPatches*numChannels*patchSize*patchSize +
|
||||
// Self-attention calculations
|
||||
numPatches*numPatches*headCount +
|
||||
// Additional buffer for processing
|
||||
embeddingLength*numPatches)
|
||||
case "llama4":
|
||||
// vision graph is computed independently in the same schedule
|
||||
// and is negligible compared to the worst case text graph
|
||||
|
||||
@@ -527,23 +527,17 @@ func WriteGGUF(f *os.File, kv KV, ts []*Tensor) error {
|
||||
return err
|
||||
}
|
||||
|
||||
keys := slices.Collect(maps.Keys(kv))
|
||||
slices.Sort(keys)
|
||||
|
||||
for _, key := range keys {
|
||||
for _, key := range slices.Sorted(maps.Keys(kv)) {
|
||||
if err := ggufWriteKV(f, key, kv[key]); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
slices.SortStableFunc(ts, func(a, b *Tensor) int {
|
||||
if i, j := a.block(), b.block(); i < 0 && j > 0 {
|
||||
return 1
|
||||
} else if i > 0 && j < 0 {
|
||||
return -1
|
||||
} else {
|
||||
if i, j := a.block(), b.block(); i > 0 && j > 0 {
|
||||
return cmp.Compare(i, j)
|
||||
}
|
||||
return cmp.Compare(a.Name, b.Name)
|
||||
})
|
||||
|
||||
var s uint64
|
||||
|
||||
@@ -2,62 +2,82 @@ package ggml
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"math/rand/v2"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/google/go-cmp/cmp"
|
||||
)
|
||||
|
||||
func TestWriteGGUF(t *testing.T) {
|
||||
w, err := os.CreateTemp(t.TempDir(), "*.bin")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
r := rand.New(rand.NewPCG(0, 0))
|
||||
for range 8 {
|
||||
t.Run("shuffle", func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
if err := WriteGGUF(w, KV{
|
||||
"general.alignment": uint32(16),
|
||||
}, []*Tensor{
|
||||
{Name: "test.0", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.1", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.2", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.3", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.4", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
{Name: "test.5", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(slices.Repeat([]byte{0}, 2*3*4))},
|
||||
}); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
ts := []*Tensor{
|
||||
{Name: "token_embd.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.0.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.1.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.2.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.3.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.4.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "blk.5.attn_norm.weight", Shape: []uint64{2, 3}, WriterTo: bytes.NewBuffer(make([]byte, 2*3))},
|
||||
{Name: "output_norm.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
|
||||
{Name: "output.weight", Shape: []uint64{3, 2}, WriterTo: bytes.NewBuffer(make([]byte, 3*2))},
|
||||
}
|
||||
|
||||
r, err := os.Open(w.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
r.Shuffle(len(ts), func(i, j int) {
|
||||
ts[i], ts[j] = ts[j], ts[i]
|
||||
})
|
||||
|
||||
ff, _, err := Decode(r, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
w, err := os.CreateTemp(t.TempDir(), strings.ReplaceAll(t.Name(), "/", "_")+"*.bin")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer w.Close()
|
||||
|
||||
if diff := cmp.Diff(ff.KV(), KV{
|
||||
"general.alignment": uint32(16),
|
||||
"general.parameter_count": uint64(36),
|
||||
}); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
if err := WriteGGUF(w, KV{
|
||||
"general.alignment": uint32(16),
|
||||
}, ts); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(ff.Tensors(), Tensors{
|
||||
Offset: 336,
|
||||
items: []*Tensor{
|
||||
{Name: "test.0", Offset: 0, Shape: []uint64{2, 3}},
|
||||
{Name: "test.1", Offset: 32, Shape: []uint64{2, 3}},
|
||||
{Name: "test.2", Offset: 64, Shape: []uint64{2, 3}},
|
||||
{Name: "test.3", Offset: 96, Shape: []uint64{2, 3}},
|
||||
{Name: "test.4", Offset: 128, Shape: []uint64{2, 3}},
|
||||
{Name: "test.5", Offset: 160, Shape: []uint64{2, 3}},
|
||||
},
|
||||
}, cmp.AllowUnexported(Tensors{})); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
r, err := os.Open(w.Name())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer r.Close()
|
||||
|
||||
ff, err := Decode(r, 0)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(KV{
|
||||
"general.alignment": uint32(16),
|
||||
"general.parameter_count": uint64(54),
|
||||
}, ff.KV()); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
|
||||
if diff := cmp.Diff(Tensors{
|
||||
Offset: 608,
|
||||
items: []*Tensor{
|
||||
{Name: "blk.0.attn_norm.weight", Offset: 0, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.1.attn_norm.weight", Offset: 32, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.2.attn_norm.weight", Offset: 64, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.3.attn_norm.weight", Offset: 96, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.4.attn_norm.weight", Offset: 128, Shape: []uint64{2, 3}},
|
||||
{Name: "blk.5.attn_norm.weight", Offset: 160, Shape: []uint64{2, 3}},
|
||||
{Name: "output.weight", Offset: 192, Shape: []uint64{3, 2}},
|
||||
{Name: "output_norm.weight", Offset: 224, Shape: []uint64{3, 2}},
|
||||
{Name: "token_embd.weight", Offset: 256, Shape: []uint64{2, 3}},
|
||||
},
|
||||
}, ff.Tensors(), cmp.AllowUnexported(Tensors{})); diff != "" {
|
||||
t.Errorf("Mismatch (-want +got):\n%s", diff)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
125
fs/ggml/type.go
125
fs/ggml/type.go
@@ -12,42 +12,42 @@ type FileType uint32
|
||||
const (
|
||||
FileTypeF32 FileType = iota
|
||||
FileTypeF16
|
||||
FileTypeQ4_0
|
||||
FileTypeQ4_1
|
||||
fileTypeQ4_0
|
||||
fileTypeQ4_1
|
||||
fileTypeQ4_1_F16 // unused by GGML
|
||||
fileTypeQ4_2 // unused by GGML
|
||||
fileTypeQ4_3 // unused by GGML
|
||||
FileTypeQ8_0
|
||||
FileTypeQ5_0
|
||||
FileTypeQ5_1
|
||||
FileTypeQ2_K
|
||||
FileTypeQ3_K_S
|
||||
FileTypeQ3_K_M
|
||||
FileTypeQ3_K_L
|
||||
fileTypeQ5_0
|
||||
fileTypeQ5_1
|
||||
fileTypeQ2_K
|
||||
fileTypeQ3_K_S
|
||||
fileTypeQ3_K_M
|
||||
fileTypeQ3_K_L
|
||||
FileTypeQ4_K_S
|
||||
FileTypeQ4_K_M
|
||||
FileTypeQ5_K_S
|
||||
FileTypeQ5_K_M
|
||||
FileTypeQ6_K
|
||||
fileTypeIQ2_XXS // not supported by ollama
|
||||
fileTypeIQ2_XS // not supported by ollama
|
||||
FileTypeQ2_K_S
|
||||
fileTypeIQ3_XS // not supported by ollama
|
||||
fileTypeIQ3_XXS // not supported by ollama
|
||||
fileTypeIQ1_S // not supported by ollama
|
||||
fileTypeIQ4_NL // not supported by ollama
|
||||
fileTypeIQ3_S // not supported by ollama
|
||||
fileTypeIQ3_M // not supported by ollama
|
||||
fileTypeIQ2_S // not supported by ollama
|
||||
fileTypeIQ2_M // not supported by ollama
|
||||
fileTypeIQ4_XS // not supported by ollama
|
||||
fileTypeIQ1_M // not supported by ollama
|
||||
fileTypeQ5_K_S
|
||||
fileTypeQ5_K_M
|
||||
fileTypeQ6_K
|
||||
fileTypeIQ2_XXS
|
||||
fileTypeIQ2_XS
|
||||
fileTypeQ2_K_S
|
||||
fileTypeIQ3_XS
|
||||
fileTypeIQ3_XXS
|
||||
fileTypeIQ1_S
|
||||
fileTypeIQ4_NL
|
||||
fileTypeIQ3_S
|
||||
fileTypeIQ3_M
|
||||
fileTypeIQ2_S
|
||||
fileTypeIQ2_M
|
||||
fileTypeIQ4_XS
|
||||
fileTypeIQ1_M
|
||||
FileTypeBF16
|
||||
fileTypeQ4_0_4_4 // unused by GGML
|
||||
fileTypeQ4_0_4_8 // unused by GGML
|
||||
fileTypeQ4_0_8_8 // unused by GGML
|
||||
fileTypeTQ1_0 // not supported by ollama
|
||||
fileTypeTQ2_0 // not supported by ollama
|
||||
fileTypeTQ1_0
|
||||
fileTypeTQ2_0
|
||||
|
||||
FileTypeUnknown = 1024
|
||||
)
|
||||
@@ -60,36 +60,12 @@ func ParseFileType(s string) (FileType, error) {
|
||||
return FileTypeF32, nil
|
||||
case "F16":
|
||||
return FileTypeF16, nil
|
||||
case "Q4_0":
|
||||
return FileTypeQ4_0, nil
|
||||
case "Q4_1":
|
||||
return FileTypeQ4_1, nil
|
||||
case "Q8_0":
|
||||
return FileTypeQ8_0, nil
|
||||
case "Q5_0":
|
||||
return FileTypeQ5_0, nil
|
||||
case "Q5_1":
|
||||
return FileTypeQ5_1, nil
|
||||
case "Q2_K":
|
||||
return FileTypeQ2_K, nil
|
||||
case "Q3_K_S":
|
||||
return FileTypeQ3_K_S, nil
|
||||
case "Q3_K_M":
|
||||
return FileTypeQ3_K_M, nil
|
||||
case "Q3_K_L":
|
||||
return FileTypeQ3_K_L, nil
|
||||
case "Q4_K_S":
|
||||
return FileTypeQ4_K_S, nil
|
||||
case "Q4_K_M", "Q4_K":
|
||||
return FileTypeQ4_K_M, nil
|
||||
case "Q5_K_S":
|
||||
return FileTypeQ5_K_S, nil
|
||||
case "Q5_K_M", "Q5_K":
|
||||
return FileTypeQ5_K_M, nil
|
||||
case "Q6_K":
|
||||
return FileTypeQ6_K, nil
|
||||
case "Q2_K_S":
|
||||
return FileTypeQ2_K_S, nil
|
||||
case "BF16":
|
||||
return FileTypeBF16, nil
|
||||
default:
|
||||
@@ -111,40 +87,41 @@ func ParseFileType(s string) (FileType, error) {
|
||||
}
|
||||
|
||||
func (t FileType) String() string {
|
||||
// Note: this routine will return a broader set of file types for existing models
|
||||
switch t {
|
||||
case FileTypeF32:
|
||||
return "F32"
|
||||
case FileTypeF16:
|
||||
return "F16"
|
||||
case FileTypeQ4_0:
|
||||
case fileTypeQ4_0:
|
||||
return "Q4_0"
|
||||
case FileTypeQ4_1:
|
||||
case fileTypeQ4_1:
|
||||
return "Q4_1"
|
||||
case FileTypeQ8_0:
|
||||
return "Q8_0"
|
||||
case FileTypeQ5_0:
|
||||
case fileTypeQ5_0:
|
||||
return "Q5_0"
|
||||
case FileTypeQ5_1:
|
||||
case fileTypeQ5_1:
|
||||
return "Q5_1"
|
||||
case FileTypeQ2_K:
|
||||
case fileTypeQ2_K:
|
||||
return "Q2_K"
|
||||
case FileTypeQ3_K_S:
|
||||
case fileTypeQ3_K_S:
|
||||
return "Q3_K_S"
|
||||
case FileTypeQ3_K_M:
|
||||
case fileTypeQ3_K_M:
|
||||
return "Q3_K_M"
|
||||
case FileTypeQ3_K_L:
|
||||
case fileTypeQ3_K_L:
|
||||
return "Q3_K_L"
|
||||
case FileTypeQ4_K_S:
|
||||
return "Q4_K_S"
|
||||
case FileTypeQ4_K_M:
|
||||
return "Q4_K_M"
|
||||
case FileTypeQ5_K_S:
|
||||
case fileTypeQ5_K_S:
|
||||
return "Q5_K_S"
|
||||
case FileTypeQ5_K_M:
|
||||
case fileTypeQ5_K_M:
|
||||
return "Q5_K_M"
|
||||
case FileTypeQ6_K:
|
||||
case fileTypeQ6_K:
|
||||
return "Q6_K"
|
||||
case FileTypeQ2_K_S:
|
||||
case fileTypeQ2_K_S:
|
||||
return "Q2_K_S"
|
||||
case FileTypeBF16:
|
||||
return "BF16"
|
||||
@@ -163,35 +140,35 @@ func (ftype FileType) ToTensorType() TensorType {
|
||||
return TensorTypeF32
|
||||
case FileTypeF16:
|
||||
return TensorTypeF16
|
||||
case FileTypeQ4_0:
|
||||
case fileTypeQ4_0:
|
||||
return TensorTypeQ4_0
|
||||
case FileTypeQ4_1:
|
||||
case fileTypeQ4_1:
|
||||
return TensorTypeQ4_1
|
||||
case FileTypeQ8_0:
|
||||
return TensorTypeQ8_0
|
||||
case FileTypeQ5_0:
|
||||
case fileTypeQ5_0:
|
||||
return TensorTypeQ5_0
|
||||
case FileTypeQ5_1:
|
||||
case fileTypeQ5_1:
|
||||
return TensorTypeQ5_1
|
||||
case FileTypeQ2_K:
|
||||
case fileTypeQ2_K:
|
||||
return TensorTypeQ2_K
|
||||
case FileTypeQ3_K_S:
|
||||
case fileTypeQ3_K_S:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ3_K_M:
|
||||
case fileTypeQ3_K_M:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ3_K_L:
|
||||
case fileTypeQ3_K_L:
|
||||
return TensorTypeQ3_K
|
||||
case FileTypeQ4_K_S:
|
||||
return TensorTypeQ4_K
|
||||
case FileTypeQ4_K_M:
|
||||
return TensorTypeQ4_K
|
||||
case FileTypeQ5_K_S:
|
||||
case fileTypeQ5_K_S:
|
||||
return TensorTypeQ5_K
|
||||
case FileTypeQ5_K_M:
|
||||
case fileTypeQ5_K_M:
|
||||
return TensorTypeQ5_K
|
||||
case FileTypeQ6_K:
|
||||
case fileTypeQ6_K:
|
||||
return TensorTypeQ6_K
|
||||
case FileTypeQ2_K_S:
|
||||
case fileTypeQ2_K_S:
|
||||
return TensorTypeQ2_K
|
||||
case FileTypeBF16:
|
||||
return TensorTypeBF16
|
||||
|
||||
Reference in New Issue
Block a user