pr comments

- default to embeddings enabled
- move embedding logic for loaded model to request
- allow embedding full directory
- close llm on reload
This commit is contained in:
Bruce MacDonald
2023-08-08 13:49:37 -04:00
parent a6f6d18f83
commit 21ddcaa1f1
3 changed files with 97 additions and 82 deletions

View File

@@ -17,6 +17,7 @@ import (
"github.com/gin-contrib/cors"
"github.com/gin-gonic/gin"
"gonum.org/v1/gonum/mat"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/llama"
@@ -114,7 +115,22 @@ func GenerateHandler(c *gin.Context) {
checkpointLoaded := time.Now()
prompt, err := model.Prompt(req)
embedding := ""
if model.Embeddings != nil && len(model.Embeddings) > 0 {
promptEmbed, err := loaded.llm.Embedding(req.Prompt)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return
}
// TODO: set embed_top from specified parameters in modelfile
embed_top := 3
topK := vector.TopK(embed_top, mat.NewVecDense(len(promptEmbed), promptEmbed), loaded.Embeddings)
for _, e := range topK {
embedding = fmt.Sprintf("%s %s", embedding, e.Embedding.Data)
}
}
prompt, err := model.Prompt(req, embedding)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return