mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
chore: update mllama to use ollama engine (#10637)
This commit is contained in:
@@ -8,6 +8,7 @@ import (
|
||||
"os"
|
||||
"reflect"
|
||||
"runtime"
|
||||
"slices"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@@ -132,11 +133,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
|
||||
continue
|
||||
}
|
||||
numParallel := int(envconfig.NumParallel())
|
||||
// TODO (jmorganca): mllama doesn't support parallel yet
|
||||
// see https://github.com/ollama/ollama/issues/4165
|
||||
if checkMllamaModelFamily(pending.model) && numParallel != 1 {
|
||||
// `mllama` is a snowflake and uses an encoder cache which cannot be used with num_parallel > 1
|
||||
// ref: https://github.com/ollama/ollama/issues/4165
|
||||
if slices.Contains(pending.model.Config.ModelFamilies, "mllama") && numParallel != 1 {
|
||||
numParallel = 1
|
||||
slog.Warn("mllama doesn't support parallel requests yet")
|
||||
slog.Warn("mllama does not currently support parallel requests")
|
||||
}
|
||||
|
||||
for {
|
||||
|
||||
Reference in New Issue
Block a user