Move quantization to new backend (#10363)

* Move quantization logic to GGML via new backend

This moves the model aware logic to Go code and calls GGMLs quantization code for model creation.

* Remove "add model quantizations"

This is no longer needed now that quantization is implemented in Go+GGML code directly.
This commit is contained in:
Daniel Hiltgen
2025-05-06 11:20:48 -07:00
committed by GitHub
parent 95e744beeb
commit 424810450f
39 changed files with 1854 additions and 440 deletions

View File

@@ -68,19 +68,19 @@ func (p *phi3Model) KV(t *Tokenizer) ggml.KV {
return kv
}
func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
func (p *phi3Model) Tensors(ts []Tensor) []*ggml.Tensor {
var addRopeFactors sync.Once
out := make([]ggml.Tensor, 0, len(ts)+2)
out := make([]*ggml.Tensor, 0, len(ts)+2)
for _, t := range ts {
if strings.HasPrefix(t.Name(), "blk.0.") {
addRopeFactors.Do(func() {
out = append(out, ggml.Tensor{
out = append(out, &ggml.Tensor{
Name: "rope_factors_long.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.LongFactor))},
WriterTo: p.RopeScaling.LongFactor,
}, ggml.Tensor{
}, &ggml.Tensor{
Name: "rope_factors_short.weight",
Kind: 0,
Shape: []uint64{uint64(len(p.RopeScaling.ShortFactor))},
@@ -89,7 +89,7 @@ func (p *phi3Model) Tensors(ts []Tensor) []ggml.Tensor {
})
}
out = append(out, ggml.Tensor{
out = append(out, &ggml.Tensor{
Name: t.Name(),
Kind: t.Kind(),
Shape: t.Shape(),