mirror of
https://github.com/dogkeeper886/ollama37.git
synced 2025-12-11 16:26:59 +00:00
Harden GPU mgmt library lookup
When there are multiple management libraries installed on a system not every one will be compatible with the current driver. This change improves our management library algorithm to build up a set of discovered libraries based on glob patterns, and then try all of them until we're able to load one without error.
This commit is contained in:
@@ -4,26 +4,9 @@
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#ifndef _WIN32
|
||||
const char *cuda_lib_paths[] = {
|
||||
"libnvidia-ml.so",
|
||||
"/usr/local/cuda/lib64/libnvidia-ml.so",
|
||||
"/usr/lib/x86_64-linux-gnu/nvidia/current/libnvidia-ml.so",
|
||||
"/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1",
|
||||
"/usr/lib/wsl/lib/libnvidia-ml.so.1", // TODO Maybe glob?
|
||||
NULL,
|
||||
};
|
||||
#else
|
||||
const char *cuda_lib_paths[] = {
|
||||
"nvml.dll",
|
||||
"",
|
||||
NULL,
|
||||
};
|
||||
#endif
|
||||
|
||||
#define CUDA_LOOKUP_SIZE 6
|
||||
|
||||
void cuda_init(cuda_init_resp_t *resp) {
|
||||
void cuda_init(char *cuda_lib_path, cuda_init_resp_t *resp) {
|
||||
nvmlReturn_t ret;
|
||||
resp->err = NULL;
|
||||
const int buflen = 256;
|
||||
@@ -42,16 +25,12 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
{"nvmlDeviceGetCudaComputeCapability", (void *)&resp->ch.getComputeCapability},
|
||||
};
|
||||
|
||||
for (i = 0; cuda_lib_paths[i] != NULL && resp->ch.handle == NULL; i++) {
|
||||
resp->ch.handle = LOAD_LIBRARY(cuda_lib_paths[i], RTLD_LAZY);
|
||||
}
|
||||
resp->ch.handle = LOAD_LIBRARY(cuda_lib_path, RTLD_LAZY);
|
||||
if (!resp->ch.handle) {
|
||||
// TODO improve error message, as the LOAD_ERR will have typically have the
|
||||
// final path that was checked which might be confusing.
|
||||
char *msg = LOAD_ERR();
|
||||
snprintf(buf, buflen,
|
||||
"Unable to load %s library to query for Nvidia GPUs: %s",
|
||||
cuda_lib_paths[0], msg);
|
||||
cuda_lib_path, msg);
|
||||
free(msg);
|
||||
resp->err = strdup(buf);
|
||||
return;
|
||||
@@ -73,6 +52,8 @@ void cuda_init(cuda_init_resp_t *resp) {
|
||||
|
||||
ret = (*resp->ch.initFn)();
|
||||
if (ret != NVML_SUCCESS) {
|
||||
UNLOAD_LIBRARY(resp->ch.handle);
|
||||
resp->ch.handle = NULL;
|
||||
snprintf(buf, buflen, "nvml vram init failure: %d", ret);
|
||||
resp->err = strdup(buf);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user