restore model load duration on generate response (#1524)

* restore model load duration on generate response

- set model load duration on generate and chat done response
- calculate createAt time when response created

* remove checkpoints predict opts

* Update routes.go
This commit is contained in:
Bruce MacDonald
2023-12-14 12:15:50 -05:00
committed by GitHub
parent 31f0551dab
commit 6ee8c80199
2 changed files with 27 additions and 36 deletions

View File

@@ -548,17 +548,12 @@ const maxBufferSize = 512 * format.KiloByte
const maxRetries = 6
type PredictOpts struct {
Prompt string
Format string
Images []api.ImageData
CheckpointStart time.Time
CheckpointLoaded time.Time
Prompt string
Format string
Images []api.ImageData
}
type PredictResult struct {
CreatedAt time.Time
TotalDuration time.Duration
LoadDuration time.Duration
Content string
Done bool
PromptEvalCount int
@@ -681,16 +676,12 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
if p.Content != "" {
fn(PredictResult{
CreatedAt: time.Now().UTC(),
Content: p.Content,
Content: p.Content,
})
}
if p.Stop {
fn(PredictResult{
CreatedAt: time.Now().UTC(),
TotalDuration: time.Since(predict.CheckpointStart),
Done: true,
PromptEvalCount: p.Timings.PromptN,
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),