Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Releases/0.7.1 rc manual #9

Open
wants to merge 4 commits into
base: releases/0.7.1_rc
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@ RUN conda install -c conda-forge google-cloud-sdk && \
rm -rf /var/lib/apt/lists/*

# Install sky
RUN pip install --no-cache-dir "skypilot[all]==0.7.0"
RUN pip install --no-cache-dir "skypilot[all]==0.7.1"
2 changes: 1 addition & 1 deletion sky/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _get_git_commit():


__commit__ = _get_git_commit()
__version__ = '0.7.0'
__version__ = '0.7.1'
__root_dir__ = os.path.dirname(os.path.abspath(__file__))


Expand Down
3 changes: 2 additions & 1 deletion tests/test_optimizer_dryruns.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,8 @@ def test_infer_cloud_from_region_or_zone(monkeypatch):
_test_resources_launch(monkeypatch, zone='us-west2-a')

# Maps to AWS.
_test_resources_launch(monkeypatch, region='us-east-2')
# Not use us-east-2 or us-west-1 as it is also supported by Lambda.
_test_resources_launch(monkeypatch, region='eu-south-1')
_test_resources_launch(monkeypatch, zone='us-west-2a')

# `sky launch`
Expand Down
55 changes: 37 additions & 18 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,20 +312,31 @@ def _terminate_gcp_replica(name: str, zone: str, replica_id: int) -> str:
def run_one_test(test: Test) -> Tuple[int, str, str]:
# Fail fast if `sky` CLI somehow errors out.
subprocess.run(['sky', 'status'], stdout=subprocess.DEVNULL, check=True)
log_file = tempfile.NamedTemporaryFile('a',
prefix=f'{test.name}-',
suffix='.log',
delete=False)
test.echo(f'Test started. Log: less {log_file.name}')
log_to_stdout = os.environ.get('LOG_TO_STDOUT', None)
if log_to_stdout:
write = test.echo
flush = lambda: None
subprocess_out = sys.stderr
test.echo(f'Test started. Log to stdout')
else:
log_file = tempfile.NamedTemporaryFile('a',
prefix=f'{test.name}-',
suffix='.log',
delete=False)
write = log_file.write
flush = log_file.flush
subprocess_out = log_file
test.echo(f'Test started. Log: less {log_file.name}')

env_dict = os.environ.copy()
if test.env:
env_dict.update(test.env)
for command in test.commands:
log_file.write(f'+ {command}\n')
log_file.flush()
write(f'+ {command}\n')
flush()
proc = subprocess.Popen(
command,
stdout=log_file,
stdout=subprocess_out,
stderr=subprocess.STDOUT,
shell=True,
executable='/bin/bash',
Expand All @@ -334,11 +345,11 @@ def run_one_test(test: Test) -> Tuple[int, str, str]:
try:
proc.wait(timeout=test.timeout)
except subprocess.TimeoutExpired as e:
log_file.flush()
flush()
test.echo(f'Timeout after {test.timeout} seconds.')
test.echo(str(e))
log_file.write(f'Timeout after {test.timeout} seconds.\n')
log_file.flush()
write(f'Timeout after {test.timeout} seconds.\n')
flush()
# Kill the current process.
proc.terminate()
proc.returncode = 1 # None if we don't set it.
Expand All @@ -353,22 +364,29 @@ def run_one_test(test: Test) -> Tuple[int, str, str]:
if proc.returncode else f'{fore.GREEN}Passed{style.RESET_ALL}')
reason = f'\nReason: {command}' if proc.returncode else ''
msg = (f'{outcome}.'
f'{reason}'
f'\nLog: less {log_file.name}\n')
test.echo(msg)
log_file.write(msg)
f'{reason}')
if log_to_stdout:
test.echo(msg)
else:
msg += f'\nLog: less {log_file.name}\n'
test.echo(msg)
write(msg)

if (proc.returncode == 0 or
pytest.terminate_on_failure) and test.teardown is not None:
subprocess_utils.run(
test.teardown,
stdout=log_file,
stdout=subprocess_out,
stderr=subprocess.STDOUT,
timeout=10 * 60, # 10 mins
shell=True,
)

if proc.returncode:
raise Exception(f'test failed: less {log_file.name}')
if log_to_stdout:
raise Exception(f'test failed')
else:
raise Exception(f'test failed: less {log_file.name}')


def get_aws_region_for_quota_failover() -> Optional[str]:
Expand Down Expand Up @@ -1126,7 +1144,7 @@ def test_gcp_stale_job_manual_restart():
# Ensure the skylet updated the stale job status.
_get_cmd_wait_until_job_status_contains_without_matching_job(
cluster_name=name,
job_status=[JobStatus.FAILED.value],
job_status=[JobStatus.FAILED],
timeout=events.JobSchedulerEvent.EVENT_INTERVAL_SECONDS)
],
f'sky down -y {name}',
Expand Down Expand Up @@ -3510,6 +3528,7 @@ def test_managed_jobs_storage(generic_cloud: str):
job_name=name,
job_status=[ManagedJobStatus.SUCCEEDED],
timeout=60 + _BUMP_UP_SECONDS),
f'sleep 30',
f'[ $(aws s3api list-buckets --query "Buckets[?contains(Name, \'{storage_name}\')].Name" --output text | wc -l) -eq 0 ]',
# Check if file was written to the mounted output bucket
output_check_cmd
Expand Down
Loading