llm: limit generation to 10x context size to avoid run on generations (#3918)

* llm: limit generation to 10x context size to avoid run on generations

* add comment

* simplify condition statement
This commit is contained in:
Jeffrey Morgan
2024-04-25 19:02:30 -04:00
committed by GitHub
parent 5f73c08729
commit 993cf8bf55
2 changed files with 11 additions and 2 deletions

View File

@@ -560,6 +560,13 @@ func (s *llmServer) Completion(ctx context.Context, req CompletionRequest, fn fu
return err
}
defer s.sem.Release(1)
// only allow maximum 10 "context shifts" to avoid infinite generation
if req.Options.NumPredict < 0 || req.Options.NumPredict > 10*s.options.NumCtx {
req.Options.NumPredict = 10 * s.options.NumCtx
slog.Debug("setting token limit to 10x num_ctx", "num_ctx", s.options.NumCtx, "num_predict", req.Options.NumPredict)
}
request := map[string]any{
"prompt": req.Prompt,
"stream": true,