Skip to content

Commit

Permalink
Merge pull request #77 from Qthreads/release-1.16-RC
Browse files Browse the repository at this point in the history
Release 1.16 rc
  • Loading branch information
janciesko authored Jan 18, 2021
2 parents 54c6912 + 0c09373 commit dc89e67
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 69 deletions.
8 changes: 6 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
--- 1.16 (anticipated 2020) ---
--- 1.16 (current) ---

Improvements:
- Add support for Spack testing (spack test run qthreads)
- Add support for hwloc2.x (minimal required version is hwloc1.5)

Documentation:
- User guide for basic Qthreads usage (see userguide subdirectory for LaTex and included code source files)

--- 1.15 (current) ---
--- 1.15 ---

New Features:
- Add experimental support for Thread Local Storage and new Condition variable initializers
Expand Down
12 changes: 6 additions & 6 deletions src/affinity/binders.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,12 @@ void INTERNAL qt_affinity_balanced(int num_workers,
} else {
// No children
workers.binds[start] = hwloc_bitmap_alloc();
hwloc_bitmap_copy(workers.binds[start], obj->allowed_cpuset);
hwloc_bitmap_copy(workers.binds[start], obj->cpuset);
if (num_workers > 1) {
printf("warning: PU oversubscribed\n");
}
if (hwloc_bitmap_weight(obj->allowed_cpuset) != 1) {
printf("error: expected pu, got weight %d\n", hwloc_bitmap_weight(obj->allowed_cpuset));
if (hwloc_bitmap_weight(obj->cpuset) != 1) {
printf("error: expected pu, got weight %d\n", hwloc_bitmap_weight(obj->cpuset));
exit(-1);
}
}
Expand All @@ -73,9 +73,9 @@ int INTERNAL qt_affinity_compact(int num_workers,
} else {
// No children, should be PU
workers.binds[workers.num - n] = hwloc_bitmap_alloc();
hwloc_bitmap_copy(workers.binds[workers.num - n], obj->allowed_cpuset);
if (hwloc_bitmap_weight(obj->allowed_cpuset) != 1){
printf("error: expected pu, got weight %d\n", hwloc_bitmap_weight(obj->allowed_cpuset));
hwloc_bitmap_copy(workers.binds[workers.num - n], obj->cpuset);
if (hwloc_bitmap_weight(obj->cpuset) != 1){
printf("error: expected pu, got weight %d\n", hwloc_bitmap_weight(obj->cpuset));
exit(-1);
}
return n - 1;
Expand Down
54 changes: 27 additions & 27 deletions src/affinity/hwloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ static unsigned int num_usable_by_depth(unsigned int depth)
qthread_debug(AFFINITY_DETAILS, "obj %i of this type is NULL!!!\n", i);
continue;
}
if (hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->allowed_cpuset, HWLOC_OBJ_PU) == 0) {
if (hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_PU) == 0) {
qthread_debug(AFFINITY_DETAILS, "obj %i of this type has no PUs!!!\n", i);
continue;
}
Expand All @@ -80,7 +80,7 @@ static unsigned int num_usable_by_type(hwloc_obj_type_t tp)
qthread_debug(AFFINITY_DETAILS, "obj %i of this type is NULL!!!\n", i);
continue;
}
if (hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->allowed_cpuset, HWLOC_OBJ_PU) == 0) {
if (hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_PU) == 0) {
qthread_debug(AFFINITY_DETAILS, "obj %i of this type has no PUs!!!\n", i);
continue;
}
Expand Down Expand Up @@ -123,9 +123,9 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
* below is used to compare to the cache level */
const size_t shepindexofL1cache = 5;
hwloc_obj_type_t shep_type_options[] = {
HWLOC_OBJ_NODE, HWLOC_OBJ_CACHE, HWLOC_OBJ_SOCKET, HWLOC_OBJ_CORE,
HWLOC_OBJ_NODE, HWLOC_OBJ_CACHE_UNIFIED, HWLOC_OBJ_SOCKET, HWLOC_OBJ_CORE,
HWLOC_OBJ_PU,
HWLOC_OBJ_CACHE, HWLOC_OBJ_CACHE, HWLOC_OBJ_CACHE, HWLOC_OBJ_CACHE
HWLOC_OBJ_CACHE_UNIFIED, HWLOC_OBJ_CACHE_UNIFIED, HWLOC_OBJ_CACHE_UNIFIED, HWLOC_OBJ_CACHE_UNIFIED
};
{
const char *qsh = qt_internal_get_env_str("SHEPHERD_BOUNDARY", "node");
Expand Down Expand Up @@ -200,7 +200,7 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
if ((shep_depth == HWLOC_TYPE_DEPTH_UNKNOWN) ||
(shep_depth == HWLOC_TYPE_DEPTH_MULTIPLE)) {
if ((shep_type_idx > 0) &&
(shep_type_options[shep_type_idx] == HWLOC_OBJ_CACHE)) {
(shep_type_options[shep_type_idx] == HWLOC_OBJ_CACHE_UNIFIED)) {
/* caches are almost always weird; so if the user asked for them, just give best effort */
unsigned int maxdepth = hwloc_topology_get_depth(topology);
unsigned int curdepth;
Expand All @@ -211,7 +211,7 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
for (curdepth = maxdepth; curdepth > 0; --curdepth) {
unsigned int realdepth = curdepth - 1;
hwloc_obj_type_t t = hwloc_get_depth_type(topology, realdepth);
if (t == HWLOC_OBJ_CACHE) {
if (t == HWLOC_OBJ_CACHE_UNIFIED) {
level++;
qthread_debug(AFFINITY_DETAILS,
"L%u at depth %u (nbobjs is %u)\n",
Expand Down Expand Up @@ -261,7 +261,7 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
shep_depth = hwloc_get_type_depth(topology, shep_type_options[shep_type_idx]);
if (shep_depth <= 0) {
qthread_debug(AFFINITY_DETAILS, "invalid shepherd type (%s), finding another one...\n", typenames[shep_type_idx]);
if (shep_type_options[shep_type_idx] == HWLOC_OBJ_CACHE) {
if (shep_type_options[shep_type_idx] == HWLOC_OBJ_CACHE_UNIFIED) {
shep_type_idx = 1;
}
shep_type_idx++;
Expand Down Expand Up @@ -333,7 +333,7 @@ void INTERNAL qt_affinity_mem_tonode(void *addr,

DEBUG_ONLY(hwloc_topology_check(topology));
hwloc_bitmap_set(nodeset, node);
hwloc_set_area_membind_nodeset(topology, addr, bytes, nodeset,
hwloc_set_area_membind(topology, addr, bytes, nodeset,
HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_NOCPUBIND);
hwloc_bitmap_free(nodeset);
Expand All @@ -354,7 +354,7 @@ void INTERNAL *qt_affinity_alloc_onnode(size_t bytes,
DEBUG_ONLY(hwloc_topology_check(topology));
nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(nodeset, node);
ret = hwloc_alloc_membind_nodeset(topology, bytes, nodeset,
ret = hwloc_alloc_membind(topology, bytes, nodeset,
HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_NOCPUBIND);
hwloc_bitmap_free(nodeset);
Expand Down Expand Up @@ -405,7 +405,7 @@ qthread_worker_id_t INTERNAL guess_num_workers_per_shep(qthread_shepherd_id_t ns
hwloc_obj_t obj = hwloc_get_obj_inside_cpuset_by_depth(topology, allowed_cpuset, shep_depth, 0);
int workerobjs_per_shep;

if (hwloc_compare_types(wkr_type, HWLOC_OBJ_CACHE) == 0) {
if (hwloc_compare_types(wkr_type, HWLOC_OBJ_CACHE_UNIFIED) == 0) {
qthread_debug(AFFINITY_DETAILS, "worker type is a cache; must handle specially...\n");
wkr_depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PU);
qthread_debug(AFFINITY_DETAILS, "PU wkr_depth = %u\n", wkr_depth);
Expand All @@ -414,18 +414,18 @@ qthread_worker_id_t INTERNAL guess_num_workers_per_shep(qthread_shepherd_id_t ns
qthread_debug(AFFINITY_DETAILS, "wkridx=%i wkr_index=%i\n", wkridx, wkr_index);
do {
wkr_depth--;
tmp = hwloc_get_obj_inside_cpuset_by_depth(topology, obj->allowed_cpuset, wkr_depth, 0);
qthread_debug(AFFINITY_DETAILS, "wkr_depth %i, type = %s %i\n", wkr_depth, hwloc_obj_type_string(tmp->type), hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE));
} while (hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE) > 0);
if (hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE) != 0) {
tmp = hwloc_get_obj_inside_cpuset_by_depth(topology, obj->cpuset, wkr_depth, 0);
qthread_debug(AFFINITY_DETAILS, "wkr_depth %i, type = %s %i\n", wkr_depth, hwloc_obj_type_string(tmp->type), hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE_UNIFIED));
} while (hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE_UNIFIED) > 0);
if (hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE_UNIFIED) != 0) {
fprintf(stderr, "QTHREADS: worker unit (%s) does not exist on this machine.\n", typenames[wkr_index]);
assert(hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE) == 0);
assert(hwloc_compare_types(tmp->type, HWLOC_OBJ_CACHE_UNIFIED) == 0);
goto guess_my_weight;
}
}
qthread_debug(AFFINITY_DETAILS, "found the desired level of cache!!!\n");
assert(wkr_depth >= shep_depth);
workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->allowed_cpuset, wkr_depth);
workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->cpuset, wkr_depth);
qthread_debug(AFFINITY_CALLS, "workerobjs type = %s, per_shep = %u\n", hwloc_obj_type_string(wkr_type), workerobjs_per_shep);
switch (workerobjs_per_shep) {
case 0:
Expand All @@ -439,7 +439,7 @@ qthread_worker_id_t INTERNAL guess_num_workers_per_shep(qthread_shepherd_id_t ns
}
} else {
qthread_debug(AFFINITY_DETAILS, "worker type is NOT a cache...\n");
workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->allowed_cpuset, wkr_type);
workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, wkr_type);
qthread_debug(AFFINITY_CALLS, "workerobjs = %s, per_shep = %u\n", hwloc_obj_type_string(wkr_type), workerobjs_per_shep);
switch (workerobjs_per_shep) {
case 0:
Expand Down Expand Up @@ -474,7 +474,7 @@ qthread_worker_id_t INTERNAL guess_num_workers_per_shep(qthread_shepherd_id_t ns

for (size_t idx = 0; idx < nshepherds && idx < max_idx; ++idx) {
hwloc_obj_t obj = hwloc_get_obj_inside_cpuset_by_depth(topology, allowed_cpuset, shep_depth, idx);
unsigned int weight = WEIGHT(obj->allowed_cpuset);
unsigned int weight = WEIGHT(obj->cpuset);
qthread_debug(AFFINITY_DETAILS, "%s %u has %u weight\n", hwloc_obj_type_string(hwloc_get_depth_type(topology, shep_depth)), (unsigned int)idx, weight);
total += weight;
if ((idx == 0) || (ret < weight)) {
Expand Down Expand Up @@ -508,9 +508,9 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
assert(wkr_depth >= 0);
qthread_debug(AFFINITY_DETAILS, "wkr_depth = %u\n", wkr_depth);
qthread_debug(AFFINITY_DETAILS, "num_wkrs = %u\n", hwloc_get_nbobjs_inside_cpuset_by_depth(topology, allowed_cpuset, wkr_depth));
qthread_debug(AFFINITY_DETAILS, "num_wkrs = %u\n", hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->allowed_cpuset, wkr_depth));
hwloc_obj_t worker0 = hwloc_get_obj_inside_cpuset_by_depth(topology, obj->allowed_cpuset, wkr_depth, 0);
unsigned int workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->allowed_cpuset, wkr_depth);
qthread_debug(AFFINITY_DETAILS, "num_wkrs = %u\n", hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->cpuset, wkr_depth));
hwloc_obj_t worker0 = hwloc_get_obj_inside_cpuset_by_depth(topology, obj->cpuset, wkr_depth, 0);
unsigned int workerobjs_per_shep = hwloc_get_nbobjs_inside_cpuset_by_depth(topology, obj->cpuset, wkr_depth);

assert(workerobjs_per_shep > 0);
assert(worker0);
Expand All @@ -524,13 +524,13 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
maxshepobjs,
hwloc_obj_type_string(hwloc_get_depth_type(topology, shep_depth)),
(int)hwloc_get_nbobjs_inside_cpuset_by_type(topology, allowed_cpuset, HWLOC_OBJ_PU));
int shep_pus = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->allowed_cpuset, HWLOC_OBJ_PU);
int shep_pus = hwloc_get_nbobjs_inside_cpuset_by_type(topology, obj->cpuset, HWLOC_OBJ_PU);
assert(shep_pus > 0);
unsigned int worker_pus = hwloc_get_nbobjs_inside_cpuset_by_type(topology, worker0->allowed_cpuset, HWLOC_OBJ_PU);
unsigned int worker_pus = hwloc_get_nbobjs_inside_cpuset_by_type(topology, worker0->cpuset, HWLOC_OBJ_PU);
unsigned int wraparounds = me->packed_worker_id / (maxshepobjs * nworkerspershep);
unsigned int worker_wraparounds = me->worker_id / workerobjs_per_shep;
hwloc_obj_t sub_obj =
hwloc_get_obj_inside_cpuset_by_type(topology, obj->allowed_cpuset,
hwloc_get_obj_inside_cpuset_by_type(topology, obj->cpuset,
HWLOC_OBJ_PU,
((me->worker_id * worker_pus) +
(wraparounds * nworkerspershep) +
Expand All @@ -548,7 +548,7 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
qthread_debug(AFFINITY_DETAILS, "%u: (%i*%i) + (((%i * %i) + (%i * %i) + %i) % %i)\n",
(unsigned)me->packed_worker_id,
(int)shep_pus, (int)myshep->node, (int)me->worker_id, (int)worker_pus, (int)wraparounds, (int)nworkerspershep, (int)worker_wraparounds, (int)shep_pus);
ASPRINTF(&str, sub_obj->allowed_cpuset);
ASPRINTF(&str, sub_obj->cpuset);
qthread_debug(AFFINITY_BEHAVIOR,
"binding shep %i worker %i (%i) to PU %i, newPU %i, mask %s\n",
(int)myshep->shepherd_id, (int)me->worker_id,
Expand All @@ -559,15 +559,15 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
FREE(str, strlen(str));
}
#endif /* ifdef QTHREAD_DEBUG_AFFINITY */
if (hwloc_set_cpubind(topology, sub_obj->allowed_cpuset, HWLOC_CPUBIND_THREAD)) {
if (hwloc_set_cpubind(topology, sub_obj->cpuset, HWLOC_CPUBIND_THREAD)) {
char *str;
int i = errno;
#ifdef __APPLE__
if (i == ENOSYS) {
return;
}
#endif
ASPRINTF(&str, sub_obj->allowed_cpuset);
ASPRINTF(&str, sub_obj->cpuset);
fprintf(stderr, "Couldn't bind to cpuset %s because %s (%i)\n", str,
strerror(i), i);
FREE(str, strlen(str));
Expand Down
12 changes: 6 additions & 6 deletions src/affinity/hwloc_v2.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ static void print_system_view(hwloc_topology_t sys_topo)

int const num_cores =
hwloc_get_nbobjs_inside_cpuset_by_type(
sys_topo, allowed_cpuset, HWLOC_OBJ_CORE);
sys_topo, allowed_cpuset, HWLOC_OBJ_CACHE_UNIFIED);

printf("TOPO: number of available COREs: %d\n", num_cores);
}
Expand Down Expand Up @@ -179,7 +179,7 @@ static void init_type_options(void)
while (NULL != obj) {
topo_types[type_id] = obj->type;

if (0 == hwloc_compare_types(HWLOC_OBJ_CACHE, obj->type)) {
if (0 == hwloc_compare_types(HWLOC_OBJ_CACHE_UNIFIED, obj->type)) {
snprintf(topo_type_names[type_id], 8, "L%dcache", cache_level);
cache_level += 1;
} else {
Expand Down Expand Up @@ -559,7 +559,7 @@ void INTERNAL qt_affinity_init(qthread_shepherd_id_t *nbshepherds,
sys_topo, allowed_cpuset, qt_topo.shep_level, i);
hwloc_obj_t logical_core_obj =
hwloc_get_obj_inside_cpuset_by_type(
sys_topo, shep_obj->allowed_cpuset, HWLOC_OBJ_CORE, j);
sys_topo, shep_obj->cpuset, HWLOC_OBJ_CORE, j);
qt_topo.worker_map[uid].bind_obj =
hwloc_get_ancestor_obj_by_depth(
sys_topo, qt_topo.worker_obj->depth, logical_core_obj);
Expand Down Expand Up @@ -587,7 +587,7 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
ASSERT_ONLY(hwloc_topology_check(sys_topo));

hwloc_obj_t target_obj = qt_topo.worker_map[me->unique_id - 1].bind_obj;
if (hwloc_set_cpubind(sys_topo, target_obj->allowed_cpuset,
if (hwloc_set_cpubind(sys_topo, target_obj->cpuset,
HWLOC_CPUBIND_THREAD)) {
char *str;
int i = errno;
Expand All @@ -596,7 +596,7 @@ void INTERNAL qt_affinity_set(qthread_worker_t *me,
return;
}
#endif
hwloc_bitmap_asprintf(&str, target_obj->allowed_cpuset);
hwloc_bitmap_asprintf(&str, target_obj->cpuset);
fprintf(stderr, "Couldn't bind to cpuset %s because %s (%i)\n", str,
strerror(i), i);
FREE(str, strlen(str));
Expand Down Expand Up @@ -683,7 +683,7 @@ void INTERNAL qt_affinity_mem_tonode(void *addr,

DEBUG_ONLY(hwloc_topology_check(sys_topo));
hwloc_bitmap_set(nodeset, node);
hwloc_set_area_membind_nodeset(sys_topo, addr, bytes, nodeset,
hwloc_set_area_membind(sys_topo, addr, bytes, nodeset,
HWLOC_MEMBIND_BIND,
HWLOC_MEMBIND_NOCPUBIND);
hwloc_bitmap_free(nodeset);
Expand Down
Loading

0 comments on commit dc89e67

Please sign in to comment.