From 373c2e4f69a6c3ff4d1bda0237083ca3bb722974 Mon Sep 17 00:00:00 2001 From: Judy Ng Date: Wed, 27 Mar 2024 11:13:45 -0400 Subject: [PATCH] testing Signed-off-by: Judy Ng --- .../configs/scaling_stress_test.yaml | 10 ++-------- .../tests/common/scaling/get_bootstrap_errors.sh | 2 +- .../integration-tests/tests/common/scaling_common.py | 11 ++++++----- .../tests/performance_tests/test_scaling.py | 2 +- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/tests/integration-tests/configs/scaling_stress_test.yaml b/tests/integration-tests/configs/scaling_stress_test.yaml index 546dd2a84d..8e79f651a0 100644 --- a/tests/integration-tests/configs/scaling_stress_test.yaml +++ b/tests/integration-tests/configs/scaling_stress_test.yaml @@ -2,13 +2,7 @@ test-suites: performance_tests: test_scaling.py::test_scaling_stress_test: dimensions: - - regions: [ "us-east-1" ] - instances: [ "c5.large" ] - oss: [ "alinux2" ] - schedulers: [ "slurm" ] - test_scaling.py::test_static_scaling_stress_test: - dimensions: - - regions: [ "us-east-1" ] - instances: [ "c5.large" ] + - regions: [ "use1-az6" ] + instances: [ "p3.2xlarge" ] oss: [ "alinux2" ] schedulers: [ "slurm" ] diff --git a/tests/integration-tests/tests/common/scaling/get_bootstrap_errors.sh b/tests/integration-tests/tests/common/scaling/get_bootstrap_errors.sh index ccde69b393..212b31e485 100644 --- a/tests/integration-tests/tests/common/scaling/get_bootstrap_errors.sh +++ b/tests/integration-tests/tests/common/scaling/get_bootstrap_errors.sh @@ -19,7 +19,7 @@ touch "bootstrap_errors.txt" # Find a log message like: # ... WARNING - Node bootstrap error: Node queue-0-dy-compute-resource-0-1690(192.168.90.197) ... # and get the IP address -sudo cat ${CLUSTERMGTD_LOG} | grep -i "Node bootstrap error" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do +sudo cat ${CLUSTERMGTD_LOG} | grep -i "no corresponding instance in EC2 for node" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do if ! grep -q "${ip_address}" "bootstrap_errors.txt"; then echo "${ip_address}" >> "bootstrap_errors.txt" fi diff --git a/tests/integration-tests/tests/common/scaling_common.py b/tests/integration-tests/tests/common/scaling_common.py index 85848b3d3a..6d82316c80 100644 --- a/tests/integration-tests/tests/common/scaling_common.py +++ b/tests/integration-tests/tests/common/scaling_common.py @@ -72,11 +72,12 @@ def _check_no_node_log_exists_for_ip_address(path, ip_address): return True -def _sort_instances_by_launch_time(describe_instance_response): +def _sort_instances_by_launch_time(describe_instances_page_iterator): instances = [] - for reservation in describe_instance_response["Reservations"]: - for instance in reservation["Instances"]: - instances.append(instance) + for page in describe_instances_page_iterator: + for reservation in page["Reservations"]: + for instance in reservation["Instances"]: + instances.append(instance) instances.sort(key=lambda inst: inst["LaunchTime"]) return instances @@ -105,7 +106,7 @@ def get_bootstrap_errors(remote_command_executor: RemoteCommandExecutor, cluster paginator.paginate(Filters=[{"Name": "private-ip-address", "Values": [ip_address]}]) )[-1]["InstanceId"] logging.warning(f"Instance {instance_id} had bootstrap errors. Check the test outputs for details.") - compute_node_log = client.get_console_output(InstanceId=instance_id, Latest=True)["Output"] + compute_node_log = client.get_console_output(InstanceId=instance_id)["Output"] with open(os.path.join(path, f"{ip_address}-{cluster_name}-{instance_id}-{region}-log.txt"), "w") as f: f.write(compute_node_log) except IndexError: diff --git a/tests/integration-tests/tests/performance_tests/test_scaling.py b/tests/integration-tests/tests/performance_tests/test_scaling.py index ada615eddb..737825d8ed 100644 --- a/tests/integration-tests/tests/performance_tests/test_scaling.py +++ b/tests/integration-tests/tests/performance_tests/test_scaling.py @@ -85,7 +85,7 @@ def _get_scaling_time(capacity_time_series: list, timestamps: list, scaling_targ @pytest.mark.usefixtures("scheduler") -@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing", "best-effort"]) +@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing"]) def test_scaling_stress_test( test_datadir, instance,