mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
use 10% vram overhead for cuda
This commit is contained in:
@@ -117,6 +117,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
|
||||
bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
|
||||
log.Println("bytes per layer:", bytesPerLayer)
|
||||
layers := available / bytesPerLayer
|
||||
log.Println("total required with split:", requiredAlloc+(layers*bytesPerLayer))
|
||||
if layers < int64(opts.NumGPU) {
|
||||
opts.NumGPU = int(layers)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user