Skip to content

Commit

Permalink
actally final
Browse files Browse the repository at this point in the history
Signed-off-by: Judy Ng <[email protected]>
  • Loading branch information
judysng committed Mar 27, 2024
1 parent a8bb1eb commit 33ece39
Show file tree
Hide file tree
Showing 4 changed files with 12 additions and 6 deletions.
10 changes: 8 additions & 2 deletions tests/integration-tests/configs/scaling_stress_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,13 @@ test-suites:
performance_tests:
test_scaling.py::test_scaling_stress_test:
dimensions:
- regions: [ "euw1-az2" ]
instances: [ "p3.2xlarge" ]
- regions: [ "us-east-1" ]
instances: [ "c5.large" ]
oss: [ "alinux2" ]
schedulers: [ "slurm" ]
test_scaling.py::test_static_scaling_stress_test:
dimensions:
- regions: [ "us-east-1" ]
instances: [ "c5.large" ]
oss: [ "alinux2" ]
schedulers: [ "slurm" ]
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ touch "bootstrap_errors.txt"
# Find a log message like:
# ... WARNING - Node bootstrap error: Node queue-0-dy-compute-resource-0-1690(192.168.90.197) ...
# and get the IP address
sudo cat ${CLUSTERMGTD_LOG} | grep -i "no corresponding instance in EC2 for node" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do
sudo cat ${CLUSTERMGTD_LOG} | grep -i "Node bootstrap error" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do
if ! grep -q "${ip_address}" "bootstrap_errors.txt"; then
echo "${ip_address}" >> "bootstrap_errors.txt"
fi
Expand Down
2 changes: 1 addition & 1 deletion tests/integration-tests/tests/common/scaling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_bootstrap_errors(remote_command_executor: RemoteCommandExecutor, cluster
Filters=[{"Name": "private-ip-address", "Values": [ip_address]}]
))[-1]["InstanceId"]
logging.warning(f"Instance {instance_id} had bootstrap errors. Check the logs for details.")
compute_node_log = client.get_console_output(InstanceId=instance_id)["Output"]
compute_node_log = client.get_console_output(InstanceId=instance_id, Latest=True)["Output"]
with open(os.path.join(path, f"{ip_address}-{cluster_name}-{instance_id}-{region}-log.txt"), "w") as f:
f.write(compute_node_log)
except IndexError:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _get_scaling_time(capacity_time_series: list, timestamps: list, scaling_targ


@pytest.mark.usefixtures("scheduler")
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing"])
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing", "best-effort"])
def test_scaling_stress_test(
test_datadir,
instance,
Expand Down Expand Up @@ -157,7 +157,7 @@ def test_scaling_stress_test(


@pytest.mark.usefixtures("scheduler")
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing"])
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing", "best-effort"])
def test_static_scaling_stress_test(
test_datadir,
instance,
Expand Down

0 comments on commit 33ece39

Please sign in to comment.