Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[openmp] Use core_siblings_list if physical_package_id not available #111831

Merged
merged 1 commit into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 71 additions & 29 deletions openmp/runtime/src/kmp_affinity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1589,15 +1589,13 @@ kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
return buf;
}

// Return (possibly empty) affinity mask representing the offline CPUs
// Caller must free the mask
kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
kmp_affin_mask_t *offline;
KMP_CPU_ALLOC(offline);
KMP_CPU_ZERO(offline);
static kmp_affin_mask_t *__kmp_parse_cpu_list(const char *path) {
kmp_affin_mask_t *mask;
KMP_CPU_ALLOC(mask);
KMP_CPU_ZERO(mask);
#if KMP_OS_LINUX
int n, begin_cpu, end_cpu;
kmp_safe_raii_file_t offline_file;
kmp_safe_raii_file_t file;
auto skip_ws = [](FILE *f) {
int c;
do {
Expand All @@ -1606,29 +1604,29 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
if (c != EOF)
ungetc(c, f);
};
// File contains CSV of integer ranges representing the offline CPUs
// File contains CSV of integer ranges representing the CPUs
// e.g., 1,2,4-7,9,11-15
int status = offline_file.try_open("/sys/devices/system/cpu/offline", "r");
int status = file.try_open(path, "r");
if (status != 0)
return offline;
while (!feof(offline_file)) {
skip_ws(offline_file);
n = fscanf(offline_file, "%d", &begin_cpu);
return mask;
while (!feof(file)) {
skip_ws(file);
n = fscanf(file, "%d", &begin_cpu);
if (n != 1)
break;
skip_ws(offline_file);
int c = fgetc(offline_file);
skip_ws(file);
int c = fgetc(file);
if (c == EOF || c == ',') {
// Just single CPU
end_cpu = begin_cpu;
} else if (c == '-') {
// Range of CPUs
skip_ws(offline_file);
n = fscanf(offline_file, "%d", &end_cpu);
skip_ws(file);
n = fscanf(file, "%d", &end_cpu);
if (n != 1)
break;
skip_ws(offline_file);
c = fgetc(offline_file); // skip ','
skip_ws(file);
c = fgetc(file); // skip ','
} else {
// Syntax problem
break;
Expand All @@ -1638,13 +1636,19 @@ kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
end_cpu >= __kmp_xproc || begin_cpu > end_cpu) {
continue;
}
// Insert [begin_cpu, end_cpu] into offline mask
// Insert [begin_cpu, end_cpu] into mask
for (int cpu = begin_cpu; cpu <= end_cpu; ++cpu) {
KMP_CPU_SET(cpu, offline);
KMP_CPU_SET(cpu, mask);
}
}
#endif
return offline;
return mask;
}

// Return (possibly empty) affinity mask representing the offline CPUs
// Caller must free the mask
kmp_affin_mask_t *__kmp_affinity_get_offline_cpus() {
return __kmp_parse_cpu_list("/sys/devices/system/cpu/offline");
}

// Return the number of available procs
Expand Down Expand Up @@ -3175,6 +3179,37 @@ static inline const char *__kmp_cpuinfo_get_envvar() {
return envvar;
}

static bool __kmp_package_id_from_core_siblings_list(unsigned **threadInfo,
unsigned num_avail,
unsigned idx) {
if (!KMP_AFFINITY_CAPABLE())
return false;

char path[256];
KMP_SNPRINTF(path, sizeof(path),
"/sys/devices/system/cpu/cpu%u/topology/core_siblings_list",
threadInfo[idx][osIdIndex]);
kmp_affin_mask_t *siblings = __kmp_parse_cpu_list(path);
for (unsigned i = 0; i < num_avail; ++i) {
unsigned cpu_id = threadInfo[i][osIdIndex];
KMP_ASSERT(cpu_id < __kmp_affin_mask_size * CHAR_BIT);
if (!KMP_CPU_ISSET(cpu_id, siblings))
continue;
if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
// Arbitrarily pick the first index we encounter, it only matters that
// the value is the same for all siblings.
threadInfo[i][pkgIdIndex] = idx;
} else if (threadInfo[i][pkgIdIndex] != idx) {
// Contradictory sibling lists.
KMP_CPU_FREE(siblings);
return false;
}
}
KMP_ASSERT(threadInfo[idx][pkgIdIndex] != UINT_MAX);
KMP_CPU_FREE(siblings);
return true;
}

// Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the
// affinity map. On AIX, the map is obtained through system SRAD (Scheduler
// Resource Allocation Domain).
Expand Down Expand Up @@ -3550,18 +3585,13 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
return false;
}

// Check for missing fields. The osId field must be there, and we
// currently require that the physical id field is specified, also.
// Check for missing fields. The osId field must be there. The physical
// id field will be checked later.
if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_MissingProcField;
return false;
}
if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_MissingPhysicalIDField;
return false;
}

// Skip this proc if it is not included in the machine model.
if (KMP_AFFINITY_CAPABLE() &&
Expand Down Expand Up @@ -3591,6 +3621,18 @@ static bool __kmp_affinity_create_cpuinfo_map(int *line,
}
*line = 0;

// At least on powerpc, Linux may return -1 for physical_package_id. Try
// to reconstruct topology from core_siblings_list in that case.
for (i = 0; i < num_avail; ++i) {
if (threadInfo[i][pkgIdIndex] == UINT_MAX) {
if (!__kmp_package_id_from_core_siblings_list(threadInfo, num_avail, i)) {
CLEANUP_THREAD_INFO;
*msg_id = kmp_i18n_str_MissingPhysicalIDField;
return false;
}
}
}

#if KMP_MIC && REDUCE_TEAM_SIZE
unsigned teamSize = 0;
#endif // KMP_MIC && REDUCE_TEAM_SIZE
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/test/affinity/kmp-hw-subset.c
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ static int compare_hw_subset_places(const place_list_t *openmp_places,
expected_per_place = nthreads_per_core;
} else {
expected_total = nsockets;
expected_per_place = ncores_per_socket;
expected_per_place = ncores_per_socket * nthreads_per_core;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Unless I'm misunderstanding something, the count should always be in terms of threads. I think maybe this test has been getting away with it, because on x86 the number of threads per core is at most 2, so after halving it it is always 1 and this multiplication does not matter. On the ppc system I'm testing the number of threads per core is 6, so after halving it's 3 and the test would fail if we don't multiply here.

}
if (openmp_places->num_places != expected_total) {
fprintf(stderr, "error: KMP_HW_SUBSET did not half each resource layer!\n");
Expand Down
Loading