Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PR from fork/443 #459

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 0 additions & 60 deletions src/variorum/AMD_GPU/amd_gpu_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,6 @@ void get_power_data(int chipid, int total_sockets, int verbose, FILE *output)

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -143,16 +133,6 @@ void get_power_limit_data(int chipid, int total_sockets, int verbose,

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -277,16 +257,6 @@ void get_thermals_data(int chipid, int total_sockets, int verbose, FILE *output)

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -471,16 +441,6 @@ void get_clocks_data(int chipid, int total_sockets, int verbose, FILE *output)

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -678,16 +638,6 @@ void get_gpu_utilization_data(int chipid, int total_sockets, int verbose,

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down Expand Up @@ -886,16 +836,6 @@ void cap_each_gpu_power_limit(int chipid, int total_sockets,

gethostname(hostname, 1024);

ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}

ret = rsmi_num_monitor_devices(&num_devices);
if (ret != RSMI_STATUS_SUCCESS)
{
Expand Down
15 changes: 15 additions & 0 deletions src/variorum/AMD_GPU/config_amd_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ uint64_t *detect_amd_gpu_arch(void)

int set_amd_gpu_func_ptrs(int idx)
{
static int init_complete = 0;
int err = 0;
rsmi_status_t ret;

if (*g_platform[idx].arch_id == AMD_INSTINCT)
{
Expand All @@ -46,5 +48,18 @@ int set_amd_gpu_func_ptrs(int idx)
err = VARIORUM_ERROR_UNSUPPORTED_PLATFORM;
}

if (init_complete == 0)
{
ret = rsmi_init(0);
if (ret != RSMI_STATUS_SUCCESS)
{
variorum_error_handler("Could not initialize RSMI",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}
init_complete = 1;
}
return err;
}
84 changes: 45 additions & 39 deletions src/variorum/ARM/config_arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,49 +25,55 @@ uint64_t *detect_arm_arch(void)
uint64_t cpu0_id_val;
uint64_t cpu1_id_val;

/* This logic reads the IDs for the first two cores on the system.
* On the ARM big.LITTLE system, the first two cores report the CPU IDs
* from the big and LITTLE clusters, respectively, on the SoC.
* On the Neoverse N1 system, the first two cores report identical
* CPU IDs. Variorum checks the combined CPU IDs returned by the system.
*/
char *cpu0_id_reg_path =
"/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
char *cpu1_id_reg_path =
"/sys/devices/system/cpu/cpu1/regs/identification/midr_el1";
unsigned long *model = (unsigned long *) malloc(sizeof(uint64_t));

int cpu0_id_reg_fd = open(cpu0_id_reg_path, O_RDONLY);
int cpu1_id_reg_fd = open(cpu1_id_reg_path, O_RDONLY);

if (!cpu0_id_reg_fd || !cpu1_id_reg_fd)
{
variorum_error_handler("Error encountered in accessing CPU ID information",
VARIORUM_ERROR_INVAL, getenv("HOSTNAME"),
__FILE__, __FUNCTION__, __LINE__);
return NULL;
}
static unsigned long model_id = -1;

int cpu0_id_bytes = read(cpu0_id_reg_fd, cpu0_id_str, CPU_ID_SIZE);
int cpu1_id_bytes = read(cpu1_id_reg_fd, cpu1_id_str, CPU_ID_SIZE);

if (!cpu0_id_bytes || !cpu1_id_bytes)
if (model_id == -1)
{
variorum_error_handler("Error encountered in accessing CPU ID information",
VARIORUM_ERROR_INVAL, getenv("HOSTNAME"),
__FILE__, __FUNCTION__, __LINE__);
return NULL;
/* This logic reads the IDs for the first two cores on the system.
* On the ARM big.LITTLE system, the first two cores report the CPU IDs
* from the big and LITTLE clusters, respectively, on the SoC.
* On the Neoverse N1 system, the first two cores report identical
* CPU IDs. Variorum checks the combined CPU IDs returned by the system.
*/
char *cpu0_id_reg_path =
"/sys/devices/system/cpu/cpu0/regs/identification/midr_el1";
char *cpu1_id_reg_path =
"/sys/devices/system/cpu/cpu1/regs/identification/midr_el1";

int cpu0_id_reg_fd = open(cpu0_id_reg_path, O_RDONLY);
int cpu1_id_reg_fd = open(cpu1_id_reg_path, O_RDONLY);

if (!cpu0_id_reg_fd || !cpu1_id_reg_fd)
{
variorum_error_handler("Error encountered in accessing CPU ID information",
VARIORUM_ERROR_INVAL, getenv("HOSTNAME"),
__FILE__, __FUNCTION__, __LINE__);
return NULL;
}

int cpu0_id_bytes = read(cpu0_id_reg_fd, cpu0_id_str, CPU_ID_SIZE);
int cpu1_id_bytes = read(cpu1_id_reg_fd, cpu1_id_str, CPU_ID_SIZE);

if (!cpu0_id_bytes || !cpu1_id_bytes)
{
variorum_error_handler("Error encountered in accessing CPU ID information",
VARIORUM_ERROR_INVAL, getenv("HOSTNAME"),
__FILE__, __FUNCTION__, __LINE__);
return NULL;
}

cpu0_id_val = strtol(cpu0_id_str, NULL, 16);
cpu1_id_val = strtol(cpu1_id_str, NULL, 16);

model_id = (cpu0_id_val & 0x000000000000fff0) << 8;
model_id |= ((cpu1_id_val & 0x000000000000fff0) >> 4);

close(cpu0_id_reg_fd);
close(cpu1_id_reg_fd);
}

cpu0_id_val = strtol(cpu0_id_str, NULL, 16);
cpu1_id_val = strtol(cpu1_id_str, NULL, 16);

*model = (cpu0_id_val & 0x000000000000fff0) << 8;
*model |= ((cpu1_id_val & 0x000000000000fff0) >> 4);

close(cpu0_id_reg_fd);
close(cpu1_id_reg_fd);

unsigned long *model = (unsigned long *) malloc(sizeof(uint64_t));
*model = model_id;
return model;
}

Expand Down
7 changes: 6 additions & 1 deletion src/variorum/Nvidia_GPU/config_nvidia.c
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ uint64_t *detect_gpu_arch(void)

int set_nvidia_func_ptrs(int idx)
{
static int init_complete = 0;
int err = 0;

if (*g_platform[idx].arch_id == VOLTA)
Expand All @@ -43,6 +44,10 @@ int set_nvidia_func_ptrs(int idx)
err = VARIORUM_ERROR_UNSUPPORTED_PLATFORM;
}

initNVML();
if (init_complete == 0)
{
initNVML();
init_complete = 1;
}
return err;
}
18 changes: 17 additions & 1 deletion src/variorum/Nvidia_GPU/nvidia_gpu_power_features.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,23 @@ void initNVML(void)
/* Initialize GPU reading */
m_unit_devices_file_desc = NULL;
nvmlReturn_t result = nvmlInit();
nvmlDeviceGetCount(&m_total_unit_devices);
if (result != NVML_SUCCESS)
{
variorum_error_handler("Could not initialize NVML",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}
result = nvmlDeviceGetCount(&m_total_unit_devices);
if (result != NVML_SUCCESS)
{
variorum_error_handler("Could not query GPU devices on the system",
VARIORUM_ERROR_PLATFORM_ENV,
getenv("HOSTNAME"), __FILE__, __FUNCTION__,
__LINE__);
exit(-1);
}
m_unit_devices_file_desc = (nvmlDevice_t *) malloc(sizeof(
nvmlDevice_t) * m_total_unit_devices);
if (m_unit_devices_file_desc == NULL)
Expand Down
Loading