Merge pull request #3682 from ollama/mxyng/quantize-all-the-things

quantize any fp16/fp32 model
This commit is contained in:
Michael Yang
2024-05-07 15:20:49 -07:00
committed by GitHub
14 changed files with 641 additions and 606 deletions

View File

@@ -560,7 +560,7 @@ func (s *Server) CreateModelHandler(c *gin.Context) {
ctx, cancel := context.WithCancel(c.Request.Context())
defer cancel()
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), req.Quantization, modelfile, fn); err != nil {
if err := CreateModel(ctx, name.String(), filepath.Dir(req.Path), strings.ToUpper(req.Quantization), modelfile, fn); err != nil {
ch <- gin.H{"error": err.Error()}
}
}()
@@ -852,11 +852,6 @@ func (s *Server) CreateBlobHandler(c *gin.Context) {
return
}
if _, err := layer.Commit(); err != nil {
c.AbortWithStatusJSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
c.Status(http.StatusCreated)
}