mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
Add Metrics to api\embed response (#5709)
* add prompt tokens to embed response * rm slog * metrics * types * prompt n * clean up * reset submodule * update tests * test name * list metrics
This commit is contained in:
@@ -284,6 +284,7 @@ func (s *Server) GenerateHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
checkpointStart := time.Now()
|
||||
var req api.EmbedRequest
|
||||
err := c.ShouldBindJSON(&req)
|
||||
switch {
|
||||
@@ -332,6 +333,8 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
checkpointLoaded := time.Now()
|
||||
|
||||
kvData, err := getKVData(m.ModelPath, false)
|
||||
if err != nil {
|
||||
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
|
||||
@@ -370,13 +373,16 @@ func (s *Server) EmbedHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
for i, e := range embeddings {
|
||||
embeddings[i] = normalize(e)
|
||||
for i, e := range embeddings.Embedding {
|
||||
embeddings.Embedding[i] = normalize(e)
|
||||
}
|
||||
|
||||
resp := api.EmbedResponse{
|
||||
Model: req.Model,
|
||||
Embeddings: embeddings,
|
||||
Model: req.Model,
|
||||
Embeddings: embeddings.Embedding,
|
||||
TotalDuration: time.Since(checkpointStart),
|
||||
LoadDuration: checkpointLoaded.Sub(checkpointStart),
|
||||
PromptEvalCount: embeddings.PromptEvalCount,
|
||||
}
|
||||
c.JSON(http.StatusOK, resp)
|
||||
}
|
||||
@@ -428,9 +434,9 @@ func (s *Server) EmbeddingsHandler(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
embedding := make([]float64, len(embeddings[0]))
|
||||
embedding := make([]float64, len(embeddings.Embedding[0]))
|
||||
|
||||
for i, v := range embeddings[0] {
|
||||
for i, v := range embeddings.Embedding[0] {
|
||||
embedding[i] = float64(v)
|
||||
}
|
||||
|
||||
|
||||
@@ -709,7 +709,7 @@ type mockLlm struct {
|
||||
pingResp error
|
||||
waitResp error
|
||||
completionResp error
|
||||
embedResp [][]float32
|
||||
embedResp *llm.EmbedResponse
|
||||
embedRespErr error
|
||||
tokenizeResp []int
|
||||
tokenizeRespErr error
|
||||
@@ -727,7 +727,7 @@ func (s *mockLlm) WaitUntilRunning(ctx context.Context) error { return s.waitRes
|
||||
func (s *mockLlm) Completion(ctx context.Context, req llm.CompletionRequest, fn func(llm.CompletionResponse)) error {
|
||||
return s.completionResp
|
||||
}
|
||||
func (s *mockLlm) Embed(ctx context.Context, input []string) ([][]float32, error) {
|
||||
func (s *mockLlm) Embed(ctx context.Context, input []string) (*llm.EmbedResponse, error) {
|
||||
return s.embedResp, s.embedRespErr
|
||||
}
|
||||
func (s *mockLlm) Tokenize(ctx context.Context, content string) ([]int, error) {
|
||||
|
||||
Reference in New Issue
Block a user