Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support always requesting GPUs on partitions that require it #116

Merged
merged 14 commits into from
Feb 29, 2024
2 changes: 2 additions & 0 deletions eessi/testsuite/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
INTEL = 'INTEL'
NODE = 'NODE'
NVIDIA = 'NVIDIA'
ALWAYS_REQUEST_GPUS = 'ALWAYS_REQUEST_GPUS'

DEVICE_TYPES = {
CPU: 'cpu',
Expand All @@ -31,6 +32,7 @@
FEATURES = {
CPU: 'cpu',
GPU: 'gpu',
ALWAYS_REQUEST_GPUS: 'always_request_gpus',
}

GPU_VENDORS = {
Expand Down
74 changes: 44 additions & 30 deletions eessi/testsuite/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import reframe as rfm

from eessi.testsuite.constants import * # noqa
from eessi.testsuite.constants import *
from eessi.testsuite.utils import (get_max_avail_gpus_per_node, is_cuda_required_module, log,
check_proc_attribute_defined)

Expand All @@ -34,6 +34,27 @@ def assign_default_num_cpus_per_node(test: rfm.RegressionTest):
log(f'default_num_cpus_per_node set to {test.default_num_cpus_per_node}')


def assign_default_num_gpus_per_node(test: rfm.RegressionTest):
smoors marked this conversation as resolved.
Show resolved Hide resolved
"""
Check if the default number of gpus per node is already defined in the test
(e.g. by earlier hooks like set_tag_scale).
If so, check if it doesn't exceed the maximum available.
If not, set default_num_gpus_per_node based on the maximum available gpus and node_part
"""

test.max_avail_gpus_per_node = get_max_avail_gpus_per_node(test)
if test.default_num_gpus_per_node:
# may skip if not enough GPUs
test.skip_if(
test.default_num_gpus_per_node > test.max_avail_gpus_per_node,
f'Number of GPUs per node in selected scale ({test.default_num_gpus_per_node}) is higher than max available'
f' ({test.max_avail_gpus_per_node}) in current partition ({test.current_partition.name}).'
)
else:
# no default set yet, so setting one
test.default_num_gpus_per_node = math.ceil(test.max_avail_gpus_per_node / test.node_part)


def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, num_per: int = 1):
"""
Assign one task per compute unit (COMPUTE_UNIT[CPU], COMPUTE_UNIT[CPU_SOCKET] or COMPUTE_UNIT[GPU]).
Expand Down Expand Up @@ -69,8 +90,8 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n

# Check if either node_part, or default_num_cpus_per_node and default_num_gpus_per_node are set correctly
if not (
type(test.node_part) == int or
(type(test.default_num_cpus_per_node) == int and type(test.default_num_gpus_per_node) == int)
type(test.node_part) == int
or (type(test.default_num_cpus_per_node) == int and type(test.default_num_gpus_per_node) == int)
):
raise ValueError(
f'Either node_part ({test.node_part}), or default_num_cpus_per_node ({test.default_num_cpus_per_node}) and'
Expand All @@ -79,6 +100,9 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n

assign_default_num_cpus_per_node(test)

if FEATURES[GPU] in test.current_partition.features:
assign_default_num_gpus_per_node(test)

if compute_unit == COMPUTE_UNIT[GPU]:
_assign_one_task_per_gpu(test)
elif compute_unit == COMPUTE_UNIT[CPU]:
Expand All @@ -90,6 +114,8 @@ def assign_tasks_per_compute_unit(test: rfm.RegressionTest, compute_unit: str, n
else:
raise ValueError(f'compute unit {compute_unit} is currently not supported')

check_always_request_gpus(test)


def _assign_num_tasks_per_node(test: rfm.RegressionTest, num_per: int = 1):
"""
Expand All @@ -112,7 +138,6 @@ def _assign_num_tasks_per_node(test: rfm.RegressionTest, num_per: int = 1):
test.num_tasks_per_node = num_per
test.num_cpus_per_task = int(test.default_num_cpus_per_node / test.num_tasks_per_node)


# num_tasks_per_node is not set, but num_cpus_per_task is
elif not test.num_tasks_per_node:
test.num_tasks_per_node = int(test.default_num_cpus_per_node / test.num_cpus_per_task)
Expand Down Expand Up @@ -222,11 +247,6 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest):
--setvar num_cpus_per_task=<y> and/or
--setvar num_gpus_per_node=<z>.

Variables:
- max_avail_gpus_per_node: maximum available number of GPUs per node
- default_num_gpus_per_node: default number of GPUs per node as defined in the test
(e.g. by earlier hooks like set_tag_scale)

Default resources requested:
- num_gpus_per_node = default_num_gpus_per_node
- num_tasks_per_node = num_gpus_per_node
Expand All @@ -235,22 +255,6 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest):
If num_tasks_per_node is set, set num_gpus_per_node equal to either num_tasks_per_node or default_num_gpus_per_node
(whichever is smallest), unless num_gpus_per_node is also set.
"""
max_avail_gpus_per_node = get_max_avail_gpus_per_node(test)

# Check if the default number of gpus per node is already defined in the test
# (e.g. by earlier hooks like set_tag_scale).
# If so, check if it doesn't exceed the maximum available.
# If not, set default_num_gpus_per_node based on the maximum available gpus and node_part
if test.default_num_gpus_per_node:
# may skip if not enough GPUs
test.skip_if(
test.default_num_gpus_per_node > max_avail_gpus_per_node,
f'Requested GPUs per node ({test.default_num_gpus_per_node}) is higher than max available'
f' ({max_avail_gpus_per_node}) in current partition ({test.current_partition.name}).'
)
else:
# no default set yet, so setting one
test.default_num_gpus_per_node = math.ceil(max_avail_gpus_per_node / test.node_part)

# neither num_tasks_per_node nor num_gpus_per_node are set
if not test.num_tasks_per_node and not test.num_gpus_per_node:
Expand All @@ -273,7 +277,7 @@ def _assign_one_task_per_gpu(test: rfm.RegressionTest):
# limit num_cpus_per_task to the maximum available cpus per gpu
test.num_cpus_per_task = min(
int(test.default_num_cpus_per_node / test.num_tasks_per_node),
int(test.max_avail_cpus_per_node / max_avail_gpus_per_node)
int(test.max_avail_cpus_per_node / test.max_avail_gpus_per_node)
)

test.num_tasks = test.num_nodes * test.num_tasks_per_node
Expand Down Expand Up @@ -303,8 +307,8 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str):
return

# test.valid_systems wasn't set yet, so set it
if len(test.valid_systems) == 0:
# test.valid_systems is empty, meaning all tests are filtered out. This hook shouldn't change that
if len(test.valid_systems) == 0 or test.valid_systems == [INVALID_SYSTEM]:
# test.valid_systems is empty or invalid, meaning all tests are filtered out. This hook shouldn't change that
return
# test.valid_systems still at default value, so overwrite
elif len(test.valid_systems) == 1 and test.valid_systems[0] == '*':
Expand All @@ -314,8 +318,8 @@ def _set_or_append_valid_systems(test: rfm.RegressionTest, valid_systems: str):
test.valid_systems[0] = f'{test.valid_systems[0]} {valid_systems}'
else:
warn_msg = f"valid_systems has multiple ({len(test.valid_systems)}) items,"
warn_msg += f" which is not supported by this hook."
warn_msg += f" Make sure to handle filtering yourself."
warn_msg += " which is not supported by this hook."
warn_msg += " Make sure to handle filtering yourself."
warnings.warn(warn_msg)
return

Expand All @@ -333,6 +337,7 @@ def filter_supported_scales(test: rfm.RegressionTest):

log(f'valid_systems set to {test.valid_systems}')


def filter_valid_systems_by_device_type(test: rfm.RegressionTest, required_device_type: str):
"""
Filter valid_systems by required device type and by whether the module supports CUDA,
Expand Down Expand Up @@ -455,3 +460,12 @@ def set_compact_thread_binding(test: rfm.RegressionTest):
log(f'Set environment variable OMP_PLACES to {test.env_vars["OMP_PLACES"]}')
log(f'Set environment variable OMP_PROC_BIND to {test.env_vars["OMP_PROC_BIND"]}')
log(f'Set environment variable KMP_AFFINITY to {test.env_vars["KMP_AFFINITY"]}')


def check_always_request_gpus(test: rfm.RegressionTest):
smoors marked this conversation as resolved.
Show resolved Hide resolved
"""
Make sure we always request enough GPUs if required for the current GPU partition (cluster-specific policy)
"""
if FEATURES[ALWAYS_REQUEST_GPUS] in test.current_partition.features and not test.num_gpus_per_node:
test.num_gpus_per_node = test.default_num_gpus_per_node
log(f'num_gpus_per_node set to {test.num_gpus_per_node} for partition {test.current_partition.name}')
30 changes: 11 additions & 19 deletions eessi/testsuite/tests/apps/osu.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def set_environment(self):
def set_num_tasks_per_node(self):
""" Setting number of tasks per node and cpus per task in this function. This function sets num_cpus_per_task
for 1 node and 2 node options where the request is for full nodes."""
if(SCALES.get(self.scale).get('num_nodes') == 1):
if SCALES.get(self.scale).get('num_nodes') == 1:
hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE], 2)
else:
hooks.assign_tasks_per_compute_unit(self, COMPUTE_UNIT[NODE])
Expand All @@ -136,24 +136,16 @@ def set_num_gpus_per_node(self):
This test does not require gpus and is for host to host within GPU nodes. But some systems do require a GPU
smoors marked this conversation as resolved.
Show resolved Hide resolved
allocation for to perform any activity in the GPU nodes.
"""
if(FEATURES[GPU] in self.current_partition.features and not utils.is_cuda_required_module(self.module_name)):
max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self)
# Here for the 2_node test we assign max_avail_gpus_per_node but some systems cannot allocate 1_cpn_2_nodes
# for GPUs but need all gpus allocated within the 2 nodes for this work which. The test may fail under such
# conditions for the scale 1_cpn_2_nodes because it is simply not allowed.
self.num_gpus_per_node = self.default_num_gpus_per_node or max_avail_gpus_per_node
elif(FEATURES[GPU] in self.current_partition.features and utils.is_cuda_required_module(self.module_name)):
max_avail_gpus_per_node = utils.get_max_avail_gpus_per_node(self)
if(SCALES.get(self.scale).get('num_nodes') == 1):
# Skip the single node test if there is only 1 device in the node.
if(max_avail_gpus_per_node == 1):
self.skip(msg="There is only 1 device within the node. Skipping tests involving only 1 node.")
else:
self.num_gpus_per_node = 2
else:
# Note these settings are for 1_cpn_2_nodes. In that case we want to test for only 1 GPU per node since
# we have not requested for full nodes.
self.num_gpus_per_node = self.default_num_gpus_per_node or max_avail_gpus_per_node
if self.device_type == DEVICE_TYPES[GPU]:
# Skip scales with only 1 GPU device and single-node tests with only 1 GPU device in the node
self.skip_if(
SCALES[self.scale]['num_nodes'] == 1 and self.default_num_gpus_per_node == 1,
smoors marked this conversation as resolved.
Show resolved Hide resolved
f"There is only 1 GPU device for scale={self.scale} or present in the node."
f" Skipping tests with device_type={DEVICE_TYPES[GPU]} involving only 1 GPU."
)
if not self.num_gpus_per_node:
self.num_gpus_per_node = self.default_num_gpus_per_node
log(f'num_gpus_per_node set to {self.num_gpus_per_node} for partition {self.current_partition.name}')


@rfm.simple_test
Expand Down
6 changes: 6 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@ namespace_packages = eessi

[options.packages.find]
include = eessi*

[flake8]
max-line-length = 120
# ignore star imports (F403, F405)
# ignore obsolete warning (W503)
ignore = F403, F405, W503
Loading