Merge pull request #6402 from rick-github/numParallel

Override numParallel in pickBestPartialFitByLibrary() only if unset.
This commit is contained in:
Daniel Hiltgen
2024-08-19 11:07:22 -07:00
committed by GitHub

View File

@@ -734,7 +734,10 @@ func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoL
// If multiple Libraries are detected, pick the Library which loads the most layers for the model // If multiple Libraries are detected, pick the Library which loads the most layers for the model
func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList { func pickBestPartialFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus gpu.GpuInfoList, numParallel *int) gpu.GpuInfoList {
*numParallel = 1 if *numParallel <= 0 {
*numParallel = 1
req.opts.NumCtx = req.origNumCtx
}
byLibrary := gpus.ByLibrary() byLibrary := gpus.ByLibrary()
if len(byLibrary) <= 1 { if len(byLibrary) <= 1 {
return gpus return gpus