mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-10 15:57:04 +00:00
Reintroduce nvidia nvml library for windows
This library will give us the most reliable free VRAM reporting on windows to enable concurrent model scheduling.
This commit is contained in:
@@ -487,8 +487,10 @@ func (runner *runnerRef) needsReload(ctx context.Context, req *LlmRequest) bool
|
||||
func (runner *runnerRef) waitForVRAMRecovery() chan interface{} {
|
||||
finished := make(chan interface{}, 1)
|
||||
|
||||
// CPU or Metal don't need checking, so no waiting required, windows can page VRAM, and the APIs we query tend to be optimistic on free space
|
||||
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) || runtime.GOOS == "windows" {
|
||||
// CPU or Metal don't need checking, so no waiting required
|
||||
// windows can page VRAM, only cuda currently can report accurate used vram usage
|
||||
if (len(runner.gpus) == 1 && (runner.gpus[0].Library == "cpu" || runner.gpus[0].Library == "metal")) ||
|
||||
(runtime.GOOS == "windows" && runner.gpus[0].Library != "cuda") {
|
||||
finished <- struct{}{}
|
||||
return finished
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user