Skip to content

Commit

Permalink
chore: extract yaml file to test_yamls/
Browse files Browse the repository at this point in the history
  • Loading branch information
andylizf committed Nov 26, 2024
1 parent 8da7604 commit 6518e44
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 39 deletions.
59 changes: 20 additions & 39 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -3287,46 +3287,27 @@ def test_managed_jobs_recovery_multi_node_gcp():
def test_managed_jobs_retry_logs():
"""Test managed job retry logs are properly displayed when a task fails."""
name = _get_cluster_name()
# Create a temporary YAML file with two tasks - first one fails, second succeeds
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml') as f:
yaml_content = textwrap.dedent("""
resources:
cpus: 2+
job_recovery:
max_restarts_on_errors: 1
# Task 1: Always fails
run: |
echo "Task 1 starting"
exit 1
---
# Task 2: Never reached due to Task 1 failure
run: |
echo "Task 2 starting"
exit 0
""")
f.write(yaml_content)
f.flush()
yaml_path = 'tests/test_yamls/test_managed_jobs_retry.yaml'

with tempfile.NamedTemporaryFile(mode='w', suffix='.log') as log_file:
test = Test(
'managed_jobs_retry_logs',
[
f'sky jobs launch -n {name} {f.name} -y -d',
f'sky jobs logs -n {name} | tee {log_file.name}',
# First attempt
f'cat {log_file.name} | grep "Job started. Streaming logs..."',
f'cat {log_file.name} | grep "Job 1 failed"',
# Second attempt
f'cat {log_file.name} | grep "Job started. Streaming logs..." | wc -l | grep 2',
f'cat {log_file.name} | grep "Job 1 failed" | wc -l | grep 2',
# Task 2 is not reached
f'! cat {log_file.name} | grep "Job 2"',
],
f'sky jobs cancel -y -n {name}',
timeout=7 * 60, # 5 mins
)
run_one_test(test)
with tempfile.NamedTemporaryFile(mode='w', suffix='.log') as log_file:
test = Test(
'managed_jobs_retry_logs',
[
f'sky jobs launch -n {name} {yaml_path} -y -d',
f'sky jobs logs -n {name} | tee {log_file.name}',
# First attempt
f'cat {log_file.name} | grep "Job started. Streaming logs..."',
f'cat {log_file.name} | grep "Job 1 failed"',
# Second attempt
f'cat {log_file.name} | grep "Job started. Streaming logs..." | wc -l | grep 2',
f'cat {log_file.name} | grep "Job 1 failed" | wc -l | grep 2',
# Task 2 is not reached
f'! cat {log_file.name} | grep "Job 2"',
],
f'sky jobs cancel -y -n {name}',
timeout=7 * 60, # 7 mins
)
run_one_test(test)


@pytest.mark.aws
Expand Down
14 changes: 14 additions & 0 deletions tests/test_yamls/test_managed_jobs_retry.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
resources:
cpus: 2+
job_recovery:
max_restarts_on_errors: 1

# Task 1: Always fails
run: |
echo "Task 1 starting"
exit 1
---
# Task 2: Never reached due to Task 1 failure
run: |
echo "Task 2 starting"
exit 0

0 comments on commit 6518e44

Please sign in to comment.