mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
restore model load duration on generate response (#1524)
* restore model load duration on generate response - set model load duration on generate and chat done response - calculate createAt time when response created * remove checkpoints predict opts * Update routes.go
This commit is contained in:
17
llm/llama.go
17
llm/llama.go
@@ -548,17 +548,12 @@ const maxBufferSize = 512 * format.KiloByte
|
||||
const maxRetries = 6
|
||||
|
||||
type PredictOpts struct {
|
||||
Prompt string
|
||||
Format string
|
||||
Images []api.ImageData
|
||||
CheckpointStart time.Time
|
||||
CheckpointLoaded time.Time
|
||||
Prompt string
|
||||
Format string
|
||||
Images []api.ImageData
|
||||
}
|
||||
|
||||
type PredictResult struct {
|
||||
CreatedAt time.Time
|
||||
TotalDuration time.Duration
|
||||
LoadDuration time.Duration
|
||||
Content string
|
||||
Done bool
|
||||
PromptEvalCount int
|
||||
@@ -681,16 +676,12 @@ func (llm *llama) Predict(ctx context.Context, predict PredictOpts, fn func(Pred
|
||||
|
||||
if p.Content != "" {
|
||||
fn(PredictResult{
|
||||
CreatedAt: time.Now().UTC(),
|
||||
Content: p.Content,
|
||||
Content: p.Content,
|
||||
})
|
||||
}
|
||||
|
||||
if p.Stop {
|
||||
fn(PredictResult{
|
||||
CreatedAt: time.Now().UTC(),
|
||||
TotalDuration: time.Since(predict.CheckpointStart),
|
||||
|
||||
Done: true,
|
||||
PromptEvalCount: p.Timings.PromptN,
|
||||
PromptEvalDuration: parseDurationMs(p.Timings.PromptMS),
|
||||
|
||||
Reference in New Issue
Block a user