restore model load duration on generate response (#1524)

* restore model load duration on generate response - set model load duration on generate and chat done response - calculate createAt time when response created * remove checkpoints predict opts * Update routes.go
2025-12-11 16:26:59 +00:00 · 2023-12-14 12:15:50 -05:00
parent 31f0551dab
commit 6ee8c80199
2 changed files with 27 additions and 36 deletions
--- a/llm/llama.go
+++ b/llm/llama.go
@@ -548,17 +548,12 @@ const maxBufferSize = 512 * format.KiloByte
 const maxRetries = 6

 type PredictOpts struct {
-	Prompt           string
-	Format           string
-	Images           []api.ImageData
-	CheckpointStart  time.Time
-	CheckpointLoaded time.Time
+	Prompt string
+	Format string
+	Images []api.ImageData
 }

 type PredictResult struct {
-	CreatedAt          time.Time
-	TotalDuration      time.Duration
-	LoadDuration       time.Duration
 	Content            string
 	Done               bool
 	PromptEvalCount    int
@@ -681,16 +676,12 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred

 				if p.Content != "" {
 					fn(PredictResult{
-						CreatedAt: time.Now().UTC(),
-						Content:   p.Content,
+						Content: p.Content,
 					})
 				}

 				if p.Stop {
 					fn(PredictResult{
-						CreatedAt:     time.Now().UTC(),
-						TotalDuration: time.Since(predict.CheckpointStart),
-
 						Done:               true,
 						PromptEvalCount:    p.Timings.PromptN,
 						PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),