Skip to content

Commit

Permalink
Change behavior on systems that have hyperthreading enabled for the a…
Browse files Browse the repository at this point in the history
…ssign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU]) hook. Previous behaviour is that this launches one 1 task per hardware thread. We now change that to launch one task per physical core. A new COMPUTE_UNIT is introduced (COMPUTE_UNIT[HWTHREAD]) in case one wants to retain the previous behavior of launching one task per hardware thread.
  • Loading branch information
Caspar van Leeuwen committed Jun 28, 2024
1 parent 9b44c4e commit 8167eee
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 7 deletions.
2 changes: 2 additions & 0 deletions eessi/testsuite/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

AMD = 'AMD'
CI = 'CI'
HWTHREAD = 'HWTHREAD'
CPU = 'CPU'
CPU_SOCKET = 'CPU_SOCKET'
GPU = 'GPU'
Expand All @@ -19,6 +20,7 @@
}

COMPUTE_UNIT = {
HWTHREAD: 'hwthread',
CPU: 'cpu',
CPU_SOCKET: 'cpu_socket',
GPU: 'gpu',
Expand Down
54 changes: 47 additions & 7 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,18 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
Total task count is determined based on the number of nodes to be used in the test.
Behaviour of this function is (usually) sensible for MPI tests.
WARNING: when using COMPUTE_UNIT[HWTHREAD] and invoking a hook for process binding, please verify that process binding happens correctly
Arguments:
- test: the ReFrame test to which this hook should apply
- compute_unit: a device as listed in eessi.testsuite.constants.COMPUTE_UNIT
Examples:
On a single node with 2 sockets, 64 cores and 128 hyperthreads:
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU]) will launch 64 tasks with 1 thread
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 32 threads per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[HWTHREAD]) will launch 128 tasks with 1 thread per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU]) will launch 64 tasks with 2 threads per task
- assign_tasks_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET]) will launch 2 tasks with 64 threads per task
Future work:
Currently, on a single node with 2 sockets, 64 cores and 128 hyperthreads, this
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU], true) launches 128 tasks with 1 thread
- assign_one_task_per_compute_unit(test, COMPUTE_UNIT[CPU_SOCKET], true) launches 2 tasks with 64 threads per task
In the future, we'd like to add an arugment that disables spawning tasks for hyperthreads.
"""
if num_per != 1 and compute_unit in [COMPUTE_UNIT[GPU], COMPUTE_UNIT[CPU], COMPUTE_UNIT[CPU_SOCKET]]:
raise NotImplementedError(
Expand All @@ -106,6 +104,8 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n

if compute_unit == COMPUTE_UNIT[GPU]:
_assign_one_task_per_gpu(test)
elif compute_unit == COMPUTE_UNIT[HWTHREAD]:
_assign_one_task_per_hwthread(test)
elif compute_unit == COMPUTE_UNIT[CPU]:
_assign_one_task_per_cpu(test)
elif compute_unit == COMPUTE_UNIT[CPU_SOCKET]:
Expand Down Expand Up @@ -223,6 +223,41 @@ def _assign_one_task_per_cpu(test: rfm.RegressionTest):
--setvar num_tasks_per_node=<x> and/or
--setvar num_cpus_per_task=<y>.
Default resources requested:
- num_tasks_per_node = default_num_cpus_per_node
- num_cpus_per_task = default_num_cpus_per_node / num_tasks_per_node
"""
# neither num_tasks_per_node nor num_cpus_per_task are set
if not test.num_tasks_per_node and not test.num_cpus_per_task:
check_proc_attribute_defined(test, 'num_cpus_per_core')
test.num_tasks_per_node = int(test.default_num_cpus_per_node / test.current_partition.processor.num_cpus_per_core)
test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node)

# num_tasks_per_node is not set, but num_cpus_per_task is
elif not test.num_tasks_per_node:
test.num_tasks_per_node = int(test.default_num_cpus_per_node / test.num_cpus_per_task)

# num_cpus_per_task is not set, but num_tasks_per_node is
elif not test.num_cpus_per_task:
test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node)

else:
pass # both num_tasks_per_node and num_cpus_per_node are already set

test.num_tasks = test.num_nodes * test.num_tasks_per_node

log(f'num_tasks_per_node set to {test.num_tasks_per_node}')
log(f'num_cpus_per_task set to {test.num_cpus_per_task}')
log(f'num_tasks set to {test.num_tasks}')


def _assign_one_task_per_hwthread(test: rfm.RegressionTest):
"""
Sets num_tasks_per_node and num_cpus_per_task such that it will run one task per core,
unless specified with:
--setvar num_tasks_per_node=<x> and/or
--setvar num_cpus_per_task=<y>.
Default resources requested:
- num_tasks_per_node = default_num_cpus_per_node
- num_cpus_per_task = default_num_cpus_per_node / num_tasks_per_node
Expand Down Expand Up @@ -508,6 +543,10 @@ def set_compact_process_binding(test: rfm.RegressionTest):
This hook sets a binding policy for process binding.
More specifically, it will bind each process to subsequent domains of test.num_cpus_per_task cores.
Arguments:
- test: the ReFrame test to which this hook should apply
A few examples:
- Pure MPI (test.num_cpus_per_task = 1) will result in binding 1 process to each core.
this will happen in a compact way, i.e. rank 0 to core 0, rank 1 to core 1, etc
Expand All @@ -522,6 +561,7 @@ def set_compact_process_binding(test: rfm.RegressionTest):

# Check if hyperthreading is enabled. If so, divide the number of cpus per task by the number
# of hw threads per core to get a physical core count
# TODO: check if this also leads to sensible binding when using COMPUTE_UNIT[HWTHREAD]
check_proc_attribute_defined(test, 'num_cpus_per_core')
num_cpus_per_core = test.current_partition.processor.num_cpus_per_core
physical_cpus_per_task = int(test.num_cpus_per_task / num_cpus_per_core)
Expand Down
4 changes: 4 additions & 0 deletions eessi/testsuite/tests/apps/espresso/espresso.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,10 @@ def set_mem(self):
mem_required_per_node = self.num_tasks_per_node * 0.9
hooks.req_memory_per_node(test=self, app_mem_req=mem_required_per_node * 1024)

@run_after('setup')
def set_binding(self):
hooks.set_compact_process_binding(self)

@deferrable
def assert_completion(self):
'''Check completion'''
Expand Down

0 comments on commit 8167eee

Please sign in to comment.