all: fix typos in documentation, code, and comments (#7021)

This commit is contained in:
Stefan Weil
2024-12-10 21:58:06 +01:00
committed by GitHub
parent 82a02e18d9
commit abfdc4710f
22 changed files with 35 additions and 35 deletions

View File

@@ -54,7 +54,7 @@ type Scheduler struct {
var defaultModelsPerGPU = 3
// Default automatic value for parallel setting
// Model will still need to fit in VRAM. If this setting wont fit
// Model will still need to fit in VRAM. If this setting won't fit
// we'll back off down to 1 to try to get it to fit
var defaultParallel = 4
@@ -501,7 +501,7 @@ func (s *Scheduler) updateFreeSpace(allGpus discover.GpuInfoList) {
} else if (allGpus[i].TotalMemory - p) < allGpus[i].FreeMemory { // predicted free is smaller than reported free, use it
// TODO maybe we should just always trust our numbers, since cuda's free memory reporting is laggy
// and we might unload models we didn't actually need to. The risk is if some other GPU intensive app is loaded
// after we start our first runner, then we'll never acount for that, so picking the smallest free value seems prudent.
// after we start our first runner, then we'll never account for that, so picking the smallest free value seems prudent.
allGpus[i].FreeMemory = allGpus[i].TotalMemory - p
}
slog.Info("updated VRAM based on existing loaded models", "gpu", allGpus[i].ID, "library", allGpus[i].Library, "total", format.HumanBytes2(allGpus[i].TotalMemory), "available", format.HumanBytes2(allGpus[i].FreeMemory))
@@ -683,7 +683,7 @@ func (a ByDuration) Less(i, j int) bool {
// pickBestFullFitByLibrary will try to find the optimal placement of the model in the available GPUs where the model fully fits
// The list of GPUs returned will always be the same brand (library)
// If the model can not be fit fully within the available GPU(s) nil is returned
// If numParallel is <= 0, this will attempt try to optimize parallism based on available VRAM, and adjust
// If numParallel is <= 0, this will attempt try to optimize parallelism based on available VRAM, and adjust
// opts.NumCtx accordingly
func pickBestFullFitByLibrary(req *LlmRequest, ggml *llm.GGML, gpus discover.GpuInfoList, numParallel *int) discover.GpuInfoList {
var estimatedVRAM uint64