review comments and coverage

This commit is contained in:
Daniel Hiltgen
2024-06-05 12:07:20 -07:00
parent ff4f0cbd1d
commit 6f351bf586
18 changed files with 375 additions and 456 deletions

View File

@@ -4,8 +4,7 @@
#include <string.h>
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
{
void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp) {
ze_result_t ret;
resp->err = NULL;
resp->oh.devices = NULL;
@@ -15,8 +14,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
const int buflen = 256;
char buf[buflen + 1];
int i, d, count;
struct lookup
{
struct lookup {
char *s;
void **p;
} l[] = {
@@ -32,8 +30,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
};
resp->oh.handle = LOAD_LIBRARY(oneapi_lib_path, RTLD_LAZY);
if (!resp->oh.handle)
{
if (!resp->oh.handle) {
char *msg = LOAD_ERR();
snprintf(buf, buflen,
"Unable to load %s library to query for Intel GPUs: %s\n",
@@ -48,14 +45,12 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
"wiring Level-Zero management library functions in %s\n",
oneapi_lib_path);
for (i = 0; l[i].s != NULL; i++)
{
for (i = 0; l[i].s != NULL; i++) {
// TODO once we've squashed the remaining corner cases remove this log
LOG(resp->oh.verbose, "dlsym: %s\n", l[i].s);
*l[i].p = LOAD_SYMBOL(resp->oh.handle, l[i].s);
if (!l[i].p)
{
if (!l[i].p) {
resp->oh.handle = NULL;
char *msg = LOAD_ERR();
LOG(resp->oh.verbose, "dlerr: %s\n", msg);
@@ -68,8 +63,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
}
ret = (*resp->oh.zesInit)(0);
if (ret != ZE_RESULT_SUCCESS)
{
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesInit err: %x\n", ret);
snprintf(buf, buflen, "oneapi vram init failure: %x", ret);
resp->err = strdup(buf);
@@ -79,8 +73,7 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
count = 0;
ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get driver count: %x", ret);
resp->err = strdup(buf);
@@ -91,10 +84,10 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
resp->oh.drivers = malloc(resp->oh.num_drivers * sizeof(zes_driver_handle_t));
resp->oh.num_devices = malloc(resp->oh.num_drivers * sizeof(uint32_t));
memset(&resp->oh.num_devices[0], 0, resp->oh.num_drivers * sizeof(uint32_t));
resp->oh.devices = malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t*));
resp->oh.devices =
malloc(resp->oh.num_drivers * sizeof(zes_device_handle_t *));
ret = (*resp->oh.zesDriverGet)(&resp->oh.num_drivers, &resp->oh.drivers[0]);
if (ret != ZE_RESULT_SUCCESS)
{
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDriverGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get driver count: %x", ret);
resp->err = strdup(buf);
@@ -103,19 +96,20 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
}
for (d = 0; d < resp->oh.num_drivers; d++) {
ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], NULL);
if (ret != ZE_RESULT_SUCCESS)
{
ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d],
&resp->oh.num_devices[d], NULL);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get device count: %x", ret);
resp->err = strdup(buf);
oneapi_release(resp->oh);
return;
}
resp->oh.devices[d] = malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
ret = (*resp->oh.zesDeviceGet)(resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
if (ret != ZE_RESULT_SUCCESS)
{
resp->oh.devices[d] =
malloc(resp->oh.num_devices[d] * sizeof(zes_device_handle_t));
ret = (*resp->oh.zesDeviceGet)(
resp->oh.drivers[d], &resp->oh.num_devices[d], resp->oh.devices[d]);
if (ret != ZE_RESULT_SUCCESS) {
LOG(resp->oh.verbose, "zesDeviceGet err: %x\n", ret);
snprintf(buf, buflen, "unable to get device count: %x", ret);
resp->err = strdup(buf);
@@ -128,8 +122,8 @@ void oneapi_init(char *oneapi_lib_path, oneapi_init_resp_t *resp)
return;
}
void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *resp)
{
void oneapi_check_vram(oneapi_handle_t h, int driver, int device,
mem_info_t *resp) {
ze_result_t ret;
resp->err = NULL;
uint64_t totalMem = 0;
@@ -138,12 +132,11 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
char buf[buflen + 1];
int i, d, m;
if (h.handle == NULL)
{
if (h.handle == NULL) {
resp->err = strdup("Level-Zero handle not initialized");
return;
}
if (driver > h.num_drivers || device > h.num_devices[driver]) {
resp->err = strdup("driver of device index out of bounds");
return;
@@ -161,8 +154,7 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
props.pNext = &ext_props;
ret = (*h.zesDeviceGetProperties)(h.devices[driver][device], &props);
if (ret != ZE_RESULT_SUCCESS)
{
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to get device properties: %d", ret);
resp->err = strdup(buf);
return;
@@ -175,8 +167,7 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
// TODO - the driver isn't included - what if there are multiple drivers?
snprintf(&resp->gpu_id[0], GPU_ID_LEN, "%d", device);
if (h.verbose)
{
if (h.verbose) {
// When in verbose mode, report more information about
// the card we discover.
LOG(h.verbose, "[%d:%d] oneAPI device name: %s\n", driver, device,
@@ -195,11 +186,11 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
// Compute Capability equivalent in resp->major, resp->minor, resp->patch
uint32_t memCount = 0;
ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, NULL);
if (ret != ZE_RESULT_SUCCESS)
{
snprintf(buf, buflen,
"unable to enumerate Level-Zero memory modules: %x", ret);
ret = (*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount,
NULL);
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to enumerate Level-Zero memory modules: %x",
ret);
resp->err = strdup(buf);
return;
}
@@ -209,14 +200,12 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
zes_mem_handle_t *mems = malloc(memCount * sizeof(zes_mem_handle_t));
(*h.zesDeviceEnumMemoryModules)(h.devices[driver][device], &memCount, mems);
for (m = 0; m < memCount; m++)
{
for (m = 0; m < memCount; m++) {
zes_mem_state_t state;
state.stype = ZES_STRUCTURE_TYPE_MEM_STATE;
state.pNext = NULL;
ret = (*h.zesMemoryGetState)(mems[m], &state);
if (ret != ZE_RESULT_SUCCESS)
{
if (ret != ZE_RESULT_SUCCESS) {
snprintf(buf, buflen, "unable to get memory state: %x", ret);
resp->err = strdup(buf);
free(mems);
@@ -230,29 +219,23 @@ void oneapi_check_vram(oneapi_handle_t h, int driver, int device, mem_info_t *re
free(mems);
}
void oneapi_release(oneapi_handle_t h)
{
void oneapi_release(oneapi_handle_t h) {
int d;
LOG(h.verbose, "releasing oneapi library\n");
for (d = 0; d < h.num_drivers; d++)
{
if (h.devices != NULL && h.devices[d] != NULL)
{
for (d = 0; d < h.num_drivers; d++) {
if (h.devices != NULL && h.devices[d] != NULL) {
free(h.devices[d]);
}
}
if (h.devices != NULL)
{
if (h.devices != NULL) {
free(h.devices);
h.devices = NULL;
}
if (h.num_devices != NULL)
{
if (h.num_devices != NULL) {
free(h.num_devices);
h.num_devices = NULL;
}
if (h.drivers != NULL)
{
if (h.drivers != NULL) {
free(h.drivers);
h.drivers = NULL;
}
@@ -261,14 +244,11 @@ void oneapi_release(oneapi_handle_t h)
h.handle = NULL;
}
int oneapi_get_device_count(oneapi_handle_t h, int driver)
{
if (h.handle == NULL || h.num_devices == NULL)
{
int oneapi_get_device_count(oneapi_handle_t h, int driver) {
if (h.handle == NULL || h.num_devices == NULL) {
return 0;
}
if (driver > h.num_drivers)
{
if (driver > h.num_drivers) {
return 0;
}
return (int)h.num_devices[driver];