From 66a4346b0628ee3c04d8c868dcf060fd31ec24c2 Mon Sep 17 00:00:00 2001 From: Judy Ng Date: Tue, 26 Mar 2024 16:34:20 -0400 Subject: [PATCH] hi Signed-off-by: Judy Ng --- tests/integration-tests/configs/scaling_stress_test.yaml | 4 ++-- tests/integration-tests/tests/common/scaling_common.py | 4 ++-- .../tests/performance_tests/test_scaling.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/integration-tests/configs/scaling_stress_test.yaml b/tests/integration-tests/configs/scaling_stress_test.yaml index 12ccfbad35..dba56b3160 100644 --- a/tests/integration-tests/configs/scaling_stress_test.yaml +++ b/tests/integration-tests/configs/scaling_stress_test.yaml @@ -2,7 +2,7 @@ test-suites: performance_tests: test_scaling.py::test_scaling_stress_test: dimensions: - - regions: [ "euw1-az3" ] - instances: [ "p3.2xlarge" ] + - regions: [ "us-east-1" ] + instances: [ "c5.large" ] oss: [ "alinux2" ] schedulers: [ "slurm" ] diff --git a/tests/integration-tests/tests/common/scaling_common.py b/tests/integration-tests/tests/common/scaling_common.py index c8d944c573..5b4186fc90 100644 --- a/tests/integration-tests/tests/common/scaling_common.py +++ b/tests/integration-tests/tests/common/scaling_common.py @@ -78,8 +78,8 @@ def get_bootstrap_errors(remote_command_executor: RemoteCommandExecutor, cluster Filters=[{"Name": "private-ip-address", "Values": [ip_address]}] )["Reservations"][0]["Instances"][0]["InstanceId"] logging.info(f"Instance {instance_id} had bootstrap errors. Check the logs for details.") - compute_node_log = client.get_console_output(InstanceId=instance_id)["Output"] - with open(f"bootstrap_errors/{ip_address}-{cluster_name}-{instance_id}-bootstrap-error.txt", "w") as file: + compute_node_log = client.get_console_output(InstanceId=instance_id, Latest=True)["Output"] + with open(f"bootstrap_errors/{region}-{cluster_name}-{instance_id}-bootstrap-error.txt", "w") as file: file.write(compute_node_log) diff --git a/tests/integration-tests/tests/performance_tests/test_scaling.py b/tests/integration-tests/tests/performance_tests/test_scaling.py index d2aa259860..12363497d0 100644 --- a/tests/integration-tests/tests/performance_tests/test_scaling.py +++ b/tests/integration-tests/tests/performance_tests/test_scaling.py @@ -157,7 +157,7 @@ def test_scaling_stress_test( @pytest.mark.usefixtures("scheduler") -@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing", "best-effort"]) +@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing"]) def test_static_scaling_stress_test( test_datadir, instance, @@ -287,8 +287,8 @@ def _scale_up_and_down( # Get the compute node logs for bootstrap errors if compute nodes did not scale up to scaling target within time if scaling_target not in compute_nodes_time_series_up: get_bootstrap_errors(remote_command_executor, cluster.name, region) - raise Exception(f"Cluster did not scale up to {scaling_target} nodes." - f"Check the compute node logs for any bootstrap errors.") + raise Exception(f"Cluster did not scale up to {scaling_target} nodes. " + f"Check the compute node logs for any bootstrap errors in the test artifacts.") # Extract scale up duration and timestamp from the monitoring metrics collected above _, scale_up_time_ec2 = _get_scaling_time(ec2_capacity_time_series_up, timestamps, scaling_target, start_time)