Tox fixes.

lukeseawalker · Jan 22, 2024 · d1b8ed7 · d1b8ed7
1 parent 9168213
commit d1b8ed7
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 18 deletions.
diff --git a/tests/integration-tests/conftest_networking.py b/tests/integration-tests/conftest_networking.py
@@ -307,10 +307,10 @@ def vpc_stack(vpc_stacks_shared, region, az_id):
 
 def _is_scaling_test(tests_config):
     logging.info(f"Checking any scaling stress tests in {tests_config}")
-    return tests_config.get(
-        "test-suites", {}).get(
-        "performance_tests", {}).get(
-        "test_scaling.py::test_scaling_stress_test"
+    return (
+        tests_config.get("test-suites", {})
+        .get("performance_tests", {})
+        .get("test_scaling.py::test_scaling_stress_test")
     )
 
 
@@ -351,7 +351,9 @@ def vpc_stacks_shared(cfn_stacks_factory, request, key_name):
             subnets.append(
                 SubnetConfig(
                     name=subnet_name(visibility="Private", az_id=az_id),
-                    cidr=CIDR_FOR_PRIVATE_SUBNETS_SCALING[index] if is_scaling_test else CIDR_FOR_PRIVATE_SUBNETS[index],
+                    cidr=CIDR_FOR_PRIVATE_SUBNETS_SCALING[index]
+                    if is_scaling_test
+                    else CIDR_FOR_PRIVATE_SUBNETS[index],
                     map_public_ip_on_launch=False,
                     has_nat_gateway=False,
                     availability_zone=az_name,

diff --git a/tests/integration-tests/tests/common/scaling_common.py b/tests/integration-tests/tests/common/scaling_common.py
@@ -26,8 +26,9 @@
 def scaling_target_condition(
     ec2_capacity_time_series,
     compute_nodes_time_series,
-    target_cluster_size, use_ec2_limit=True,  # Stop monitoring after all EC2 instances have been launched
-    use_compute_nodes_limit=True  # Stop monitoring after all nodes have joined the cluster
+    target_cluster_size,
+    use_ec2_limit=True,  # Stop monitoring after all EC2 instances have been launched
+    use_compute_nodes_limit=True,  # Stop monitoring after all nodes have joined the cluster
 ):
     return (
         (use_ec2_limit and ec2_capacity_time_series[-1] != target_cluster_size)

diff --git a/tests/integration-tests/tests/performance_tests/test_scaling.py b/tests/integration-tests/tests/performance_tests/test_scaling.py
@@ -6,10 +6,10 @@
 from benchmarks.common.metrics_reporter import produce_benchmark_metrics_report
 from remote_command_executor import RemoteCommandExecutor
 from time_utils import minutes
+from utils import disable_protected_mode
 
 from tests.common.assertions import assert_no_msg_in_logs
 from tests.common.scaling_common import get_scaling_metrics
-from utils import disable_protected_mode
 
 
 @pytest.mark.parametrize(
@@ -76,7 +76,7 @@ def _get_scaling_time(ec2_capacity_time_series: list, timestamps: list, scaling_
 
 
 @pytest.mark.parametrize(
-    "scaling_max_time_in_mins, scaling_target, shared_headnode_storage, head_node_instance_type, scaling_strategy",
+    "max_monitoring_time_in_mins, scaling_target, shared_headnode_storage, head_node_instance_type, scaling_strategy",
     [
         (20, 1000, "Efs", "c5.24xlarge", "best-effort"),  # TODO: Pass these values from an external source
         (20, 2000, "Efs", "c5.24xlarge", "best-effort"),
@@ -93,7 +93,7 @@ def test_scaling_stress_test(
     pcluster_config_reader,
     scheduler_commands_factory,
     clusters_factory,
-    scaling_max_time_in_mins,
+    max_monitoring_time_in_mins,
     scaling_target,
     shared_headnode_storage,
     head_node_instance_type,
@@ -116,13 +116,12 @@ def test_scaling_stress_test(
     # Creating cluster with intended head node instance type and scaling parameters
     cluster_config = pcluster_config_reader(
         # Prevent nodes being set down before we start monitoring the scale down metrics
-        scaledown_idletime=scaling_max_time_in_mins,
+        scaledown_idletime=max_monitoring_time_in_mins,
         scaling_target=scaling_target,
         head_node_instance_type=head_node_instance_type,
         shared_headnode_storage=shared_headnode_storage,
         scaling_strategy=scaling_strategy,
     )
-    logging.info(f"Cluster config: {cluster_config}")
     cluster = clusters_factory(cluster_config)
     remote_command_executor = RemoteCommandExecutor(cluster)
     scheduler_commands = scheduler_commands_factory(remote_command_executor)
@@ -133,7 +132,7 @@ def test_scaling_stress_test(
     # Submit a simple job to trigger the launch all compute nodes
     scaling_job = {
         # Keep job running until we explicitly cancel it and start monitoring scale down
-        "command": f"srun sleep {minutes(scaling_max_time_in_mins) // 1000}",
+        "command": f"srun sleep {minutes(max_monitoring_time_in_mins) // 1000}",
         "nodes": scaling_target,
     }
     job_id = scheduler_commands.submit_command_and_assert_job_accepted(scaling_job)
@@ -144,7 +143,7 @@ def test_scaling_stress_test(
     # Monitor the cluster during scale up
     ec2_capacity_time_series, compute_nodes_time_series, timestamps, end_time = get_scaling_metrics(
         remote_command_executor,
-        max_monitoring_time=minutes(scaling_max_time_in_mins),
+        max_monitoring_time=minutes(max_monitoring_time_in_mins),
         region=region,
         cluster_name=cluster.name,
         publish_metrics=True,
@@ -155,15 +154,15 @@ def test_scaling_stress_test(
         ec2_capacity_time_series, timestamps, scaling_target, start_time
     )
 
-    # Cancel the running job and scale dow the cluster using the update-compute-fleet command
+    # Cancel the running job and scale down the cluster using the update-compute-fleet command
     scheduler_commands.cancel_job(job_id)
     cluster.stop()
 
     # Monitor the cluster during scale down
     scale_down_start_timestamp = _datetime_to_minute_granularity(datetime.datetime.now(tz=datetime.timezone.utc))
     ec2_capacity_time_series, compute_nodes_time_series, timestamps, end_time = get_scaling_metrics(
         remote_command_executor,
-        max_monitoring_time=minutes(scaling_max_time_in_mins),
+        max_monitoring_time=minutes(max_monitoring_time_in_mins),
         region=region,
         cluster_name=cluster.name,
         publish_metrics=True,

diff --git a/tests/integration-tests/tests/schedulers/test_slurm.py b/tests/integration-tests/tests/schedulers/test_slurm.py
@@ -26,8 +26,10 @@
     check_status,
     get_compute_nodes_instance_ids,
     get_instance_info,
+    retrieve_clustermgtd_conf_path,
+    set_protected_failure_count,
     test_cluster_health_metric,
-    wait_for_computefleet_changed, set_protected_failure_count, retrieve_clustermgtd_conf_path,
+    wait_for_computefleet_changed,
 )
 
 from tests.common.assertions import (
@@ -1911,7 +1913,6 @@ def _inject_bootstrap_failures(cluster, bucket_name, pcluster_config_reader, sca
     _update_and_start_cluster(cluster, updated_config_file)
 
 
-
 @retry(wait_fixed=seconds(30), stop_max_delay=minutes(20))
 def _wait_until_protected_mode_failure_count_set(cluster):
     """Retry setting the protected failure count until the clustermgtd is running."""