quantize any fp16/fp32 model

- FROM /path/to/{safetensors,pytorch}
- FROM /path/to/fp{16,32}.bin
- FROM model:fp{16,32}
This commit is contained in:
Michael Yang
2024-04-12 13:55:12 -07:00
parent d091fe3c21
commit 9685c34509
12 changed files with 654 additions and 556 deletions

View File

@@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
return nil
}
func (m *LlamaModel) WriteGGUF() (string, error) {
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
@@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) {
f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
return err
}
defer f.Close()
mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}
slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))
return f.Name(), nil
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
}