quantize any fp16/fp32 model

- FROM /path/to/{safetensors,pytorch}
- FROM /path/to/fp{16,32}.bin
- FROM model:fp{16,32}
This commit is contained in:
Michael Yang
2024-04-12 13:55:12 -07:00
parent d091fe3c21
commit 9685c34509
12 changed files with 654 additions and 556 deletions

View File

@@ -560,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel()
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil {
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
ch <- gin.H{"error": err.Error()}
}
}()
@@ -852,11 +852,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
return
}
if _, err := layer.Commit(); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Status(http.StatusCreated)
}