mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 08:17:03 +00:00
Verify permissions for AMD GPU (#6736)
This adds back a check which was lost many releases back to verify /dev/kfd permissions which when lacking, can lead to confusing failure modes of: "rocBLAS error: Could not initialize Tensile host: No devices found" This implementation does not hard fail the serve command but instead will fall back to CPU with an error log. In the future we can include this in the GPU discovery UX to show detected but unsupported devices we discovered.
This commit is contained in:
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
@@ -359,6 +360,10 @@ func AMDGetGPUInfo() []RocmGPUInfo {
|
||||
if len(resp) == 0 {
|
||||
slog.Info("no compatible amdgpu devices detected")
|
||||
}
|
||||
if err := verifyKFDDriverAccess(); err != nil {
|
||||
slog.Error("amdgpu devices detected but permission problems block access", "error", err)
|
||||
return nil
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
@@ -455,3 +460,19 @@ func getFreeMemory(usedFile string) (uint64, error) {
|
||||
}
|
||||
return usedMemory, nil
|
||||
}
|
||||
|
||||
func verifyKFDDriverAccess() error {
|
||||
// Verify we have permissions - either running as root, or we have group access to the driver
|
||||
fd, err := os.OpenFile("/dev/kfd", os.O_RDWR, 0o666)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrPermission) {
|
||||
return fmt.Errorf("permissions not set up properly. Either run ollama as root, or add you user account to the render group. %w", err)
|
||||
} else if errors.Is(err, fs.ErrNotExist) {
|
||||
// Container runtime failure?
|
||||
return fmt.Errorf("kfd driver not loaded. If running in a container, remember to include '--device /dev/kfd --device /dev/dri'")
|
||||
}
|
||||
return fmt.Errorf("failed to check permission on /dev/kfd: %w", err)
|
||||
}
|
||||
fd.Close()
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user