use 10% vram overhead for cuda

This commit is contained in:
Jeffrey Morgan
2024-01-08 23:17:44 -05:00
parent 58ce2d8273
commit cb534e6ac2
2 changed files with 6 additions and 4 deletions

View File

@@ -117,6 +117,7 @@ func New(workDir, model string, adapters, projectors []string, opts api.Options)
bytesPerLayer := int64((requiredModel + requiredKv) / int64(ggml.NumLayers()))
log.Println("bytes per layer:", bytesPerLayer)
layers := available / bytesPerLayer
log.Println("total required with split:", requiredAlloc+(layers*bytesPerLayer))
if layers < int64(opts.NumGPU) {
opts.NumGPU = int(layers)
}