Skip to content

Commit

Permalink
ah
Browse files Browse the repository at this point in the history
Signed-off-by: Judy Ng <[email protected]>
  • Loading branch information
judysng committed Mar 27, 2024
1 parent 933a16d commit 8659768
Show file tree
Hide file tree
Showing 4 changed files with 5 additions and 11 deletions.
10 changes: 2 additions & 8 deletions tests/integration-tests/configs/scaling_stress_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@ test-suites:
performance_tests:
test_scaling.py::test_scaling_stress_test:
dimensions:
- regions: [ "us-east-1" ]
instances: [ "c5.large" ]
oss: [ "alinux2" ]
schedulers: [ "slurm" ]
test_scaling.py::test_static_scaling_stress_test:
dimensions:
- regions: [ "us-east-1" ]
instances: [ "c5.large" ]
- regions: [ "euw1-az2" ]
instances: [ "p3.2xlarge" ]
oss: [ "alinux2" ]
schedulers: [ "slurm" ]
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ touch "bootstrap_errors.txt"
# Find a log message like:
# ... WARNING - Node bootstrap error: Node queue-0-dy-compute-resource-0-1690(192.168.90.197) ...
# and get the IP address
sudo cat ${CLUSTERMGTD_LOG} | grep -i "Node bootstrap error" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do
sudo cat ${CLUSTERMGTD_LOG} | grep -i "no corresponding instance in EC2 for node" | awk -F"[()]" '{print $2}' | while read -r ip_address ; do
if ! grep -q "${ip_address}" "bootstrap_errors.txt"; then
echo "${ip_address}" >> "bootstrap_errors.txt"
fi
Expand Down
2 changes: 1 addition & 1 deletion tests/integration-tests/tests/common/scaling_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def get_bootstrap_errors(remote_command_executor: RemoteCommandExecutor, cluster
# Since the same cluster is re-used for multiple scale up tests, the script may find the same bootstrap error
# multiple times and then get the wrong instance logs since the IP address would be attached to a new instance.
# Therefore, only write the compute node logs for the IP address if the file doesn't exist yet.
if _check_no_node_log_exists_for_ip_address("bootstrap_errors", ip_address):
if _check_no_node_log_exists_for_ip_address(path, ip_address):
try:
logging.warning(f"Compute node with IP address {ip_address} had bootstrap errors. Getting instance id...")
# Get the latest launched instance with the IP address since the most recent one should have the error
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _get_scaling_time(capacity_time_series: list, timestamps: list, scaling_targ


@pytest.mark.usefixtures("scheduler")
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing", "best-effort"])
@pytest.mark.parametrize("scaling_strategy", ["all-or-nothing"])
def test_scaling_stress_test(
test_datadir,
instance,
Expand Down

0 comments on commit 8659768

Please sign in to comment.