mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 07:46:59 +00:00
Don't clamp ctx size in PredictServerFit (#4317)
* dont clamp ctx size in `PredictServerFit` * minimum 4 context * remove context warning
This commit is contained in:
@@ -61,6 +61,10 @@ func InitScheduler(ctx context.Context) *Scheduler {
|
||||
// context must be canceled to decrement ref count and release the runner
|
||||
func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options, sessionDuration time.Duration) (chan *runnerRef, chan error) {
|
||||
// allocate a large enough kv cache for all parallel requests
|
||||
if opts.NumCtx < 4 {
|
||||
opts.NumCtx = 4
|
||||
}
|
||||
|
||||
opts.NumCtx = opts.NumCtx * envconfig.NumParallel
|
||||
|
||||
req := &LlmRequest{
|
||||
|
||||
Reference in New Issue
Block a user