Reapply "feat: incremental gguf parser (#10822)" (#11114) (#11119)

* Reapply "feat: incremental gguf parser (#10822)" (#11114)

This reverts commit a6e64fbdf2.

* fix older ggufs
This commit is contained in:
Michael Yang
2025-06-20 11:11:40 -07:00
committed by GitHub
parent 87b7af6cee
commit 0a066cfd91
13 changed files with 1357 additions and 164 deletions

View File

@@ -112,11 +112,7 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
b.ctx, b.ctxDone = context.WithCancel(ctx)
t.Helper()
f, err := os.CreateTemp(t.TempDir(), modelName)
require.NoError(t, err)
defer f.Close()
require.NoError(t, ggml.WriteGGUF(f, ggml.KV{
p, _ := createBinFile(t, ggml.KV{
"general.architecture": "llama",
"llama.context_length": uint32(32),
"llama.embedding_length": uint32(4096),
@@ -129,14 +125,14 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
}, []*ggml.Tensor{
{Name: "blk.0.attn.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
{Name: "output.weight", Kind: uint32(0), Offset: uint64(0), Shape: []uint64{1, 1, 1, 1}, WriterTo: bytes.NewReader(make([]byte, 32))},
}))
require.NoError(t, err)
fname := f.Name()
model := &Model{Name: modelName, ModelPath: fname}
b.f, err = llm.LoadModel(model.ModelPath, 0)
require.NoError(t, err)
})
model := &Model{Name: modelName, ModelPath: p}
f, err := llm.LoadModel(model.ModelPath, 0)
if err != nil {
t.Fatal(err)
}
b.f = f
if duration == nil {
duration = &api.Duration{Duration: 5 * time.Millisecond}
}