feat: incremental gguf parser (#10822)

* incremental gguf parser
* gguf: update test to not rely on gguf on disc
* re-use existing create gguf
* read capabilities from gguf kv
* kv exists
* update tests
* s/doneFunc/successFunc/g
* new buffered reader

---------

Co-authored-by: Bruce MacDonald <brucewmacdonald@gmail.com>
This commit is contained in:
Michael Yang
2025-06-12 11:04:11 -07:00
committed by GitHub
parent 45f56355d5
commit 6b04cad7e8
13 changed files with 1357 additions and 164 deletions

View File

@@ -1,123 +1,42 @@
package server
import (
"bytes"
"encoding/binary"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"github.com/ollama/ollama/fs/ggml"
"github.com/ollama/ollama/template"
"github.com/ollama/ollama/types/model"
)
// Constants for GGUF magic bytes and version
var (
ggufMagic = []byte{0x47, 0x47, 0x55, 0x46} // "GGUF"
ggufVer = uint32(3) // Version 3
)
// Helper function to create mock GGUF data
func createMockGGUFData(architecture string, vision bool) []byte {
var buf bytes.Buffer
// Write GGUF header
buf.Write(ggufMagic)
binary.Write(&buf, binary.LittleEndian, ggufVer)
// Write tensor count (0 for our test)
var numTensors uint64 = 0
binary.Write(&buf, binary.LittleEndian, numTensors)
// Calculate number of metadata entries
numMetaEntries := uint64(1) // architecture entry
if vision {
numMetaEntries++
}
// Add embedding entry if architecture is "bert"
if architecture == "bert" {
numMetaEntries++
}
binary.Write(&buf, binary.LittleEndian, numMetaEntries)
// Write architecture metadata
archKey := "general.architecture"
keyLen := uint64(len(archKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(archKey)
// String type (8)
var strType uint32 = 8
binary.Write(&buf, binary.LittleEndian, strType)
// String length
strLen := uint64(len(architecture))
binary.Write(&buf, binary.LittleEndian, strLen)
buf.WriteString(architecture)
if vision {
visionKey := architecture + ".vision.block_count"
keyLen = uint64(len(visionKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(visionKey)
// uint32 type (4)
var uint32Type uint32 = 4
binary.Write(&buf, binary.LittleEndian, uint32Type)
// uint32 value (1)
var countVal uint32 = 1
binary.Write(&buf, binary.LittleEndian, countVal)
}
// Write embedding metadata if architecture is "bert"
if architecture == "bert" {
poolKey := architecture + ".pooling_type"
keyLen = uint64(len(poolKey))
binary.Write(&buf, binary.LittleEndian, keyLen)
buf.WriteString(poolKey)
// uint32 type (4)
var uint32Type uint32 = 4
binary.Write(&buf, binary.LittleEndian, uint32Type)
// uint32 value (1)
var poolingVal uint32 = 1
binary.Write(&buf, binary.LittleEndian, poolingVal)
}
return buf.Bytes()
}
func TestModelCapabilities(t *testing.T) {
// Create a temporary directory for test files
tempDir := t.TempDir()
// Create completion model (llama architecture without vision)
completionModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "llama",
}, []*ggml.Tensor{})
// Create different types of mock model files
completionModelPath := filepath.Join(tempDir, "model.bin")
visionModelPath := filepath.Join(tempDir, "vision_model.bin")
embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")
// Create a simple model file for tests that don't depend on GGUF content
simpleModelPath := filepath.Join(tempDir, "simple_model.bin")
// Create vision model (llama architecture with vision block count)
visionModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "llama",
"llama.vision.block_count": uint32(1),
}, []*ggml.Tensor{})
if err := errors.Join(
os.WriteFile(completionModelPath, createMockGGUFData("llama", false), 0o644),
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
); err != nil {
t.Fatalf("Failed to create model files: %v", err)
}
// Create embedding model (bert architecture with pooling type)
embeddingModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "bert",
"bert.pooling_type": uint32(1),
}, []*ggml.Tensor{})
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
}
chatTemplate, err := template.Parse("{{ .prompt }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
}
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
@@ -145,21 +64,13 @@ func TestModelCapabilities(t *testing.T) {
},
expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools, model.CapabilityInsert},
},
{
name: "model with tools and insert capability",
model: Model{
ModelPath: simpleModelPath,
Template: toolsInsertTemplate,
},
expectedCaps: []model.Capability{model.CapabilityTools, model.CapabilityInsert},
},
{
name: "model with tools capability",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: toolsTemplate,
},
expectedCaps: []model.Capability{model.CapabilityTools},
expectedCaps: []model.Capability{model.CapabilityCompletion, model.CapabilityTools},
},
{
name: "model with vision capability",
@@ -224,29 +135,33 @@ func TestModelCapabilities(t *testing.T) {
}
func TestModelCheckCapabilities(t *testing.T) {
// Create a temporary directory for test files
tempDir := t.TempDir()
// Create simple model file for tests that don't depend on GGUF content
completionModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "llama",
}, []*ggml.Tensor{})
visionModelPath := filepath.Join(tempDir, "vision_model.bin")
simpleModelPath := filepath.Join(tempDir, "model.bin")
embeddingModelPath := filepath.Join(tempDir, "embedding_model.bin")
// Create vision model (llama architecture with vision block count)
visionModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "llama",
"llama.vision.block_count": uint32(1),
}, []*ggml.Tensor{})
if err := errors.Join(
os.WriteFile(simpleModelPath, []byte("dummy model data"), 0o644),
os.WriteFile(visionModelPath, createMockGGUFData("llama", true), 0o644),
os.WriteFile(embeddingModelPath, createMockGGUFData("bert", false), 0o644),
); err != nil {
t.Fatalf("Failed to create model files: %v", err)
}
// Create embedding model (bert architecture with pooling type)
embeddingModelPath, _ := createBinFile(t, ggml.KV{
"general.architecture": "bert",
"bert.pooling_type": uint32(1),
}, []*ggml.Tensor{})
toolsInsertTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}{{ if .suffix }}{{ .suffix }}{{ end }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
}
chatTemplate, err := template.Parse("{{ .prompt }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
}
toolsTemplate, err := template.Parse("{{ .prompt }}{{ if .tools }}{{ .tools }}{{ end }}")
if err != nil {
t.Fatalf("Failed to parse template: %v", err)
@@ -261,7 +176,7 @@ func TestModelCheckCapabilities(t *testing.T) {
{
name: "completion model without tools capability",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: chatTemplate,
},
checkCaps: []model.Capability{model.CapabilityTools},
@@ -270,7 +185,7 @@ func TestModelCheckCapabilities(t *testing.T) {
{
name: "model with all needed capabilities",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: toolsInsertTemplate,
},
checkCaps: []model.Capability{model.CapabilityTools, model.CapabilityInsert},
@@ -278,7 +193,7 @@ func TestModelCheckCapabilities(t *testing.T) {
{
name: "model missing insert capability",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: toolsTemplate,
},
checkCaps: []model.Capability{model.CapabilityInsert},
@@ -287,7 +202,7 @@ func TestModelCheckCapabilities(t *testing.T) {
{
name: "model missing vision capability",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: toolsTemplate,
},
checkCaps: []model.Capability{model.CapabilityVision},
@@ -312,7 +227,7 @@ func TestModelCheckCapabilities(t *testing.T) {
{
name: "unknown capability",
model: Model{
ModelPath: simpleModelPath,
ModelPath: completionModelPath,
Template: chatTemplate,
},
checkCaps: []model.Capability{"unknown"},