diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index a37b0d4708f..e98a82ea5e9 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2904,8 +2904,6 @@ def _provision( provisioner.ClusterName(handle.cluster_name, handle.cluster_name_on_cloud), handle.cluster_yaml, - local_wheel_path=local_wheel_path, - wheel_hash=wheel_hash, provision_record=provision_record, custom_resource=resources_vars.get('custom_resources'), log_dir=self.log_dir) diff --git a/sky/provision/instance_setup.py b/sky/provision/instance_setup.py index ca62d949844..9a21f6ed9ca 100644 --- a/sky/provision/instance_setup.py +++ b/sky/provision/instance_setup.py @@ -93,7 +93,8 @@ def _hint_worker_log_path(cluster_name: str, cluster_info: common.ClusterInfo, def _parallel_ssh_with_cache(func, cluster_name: str, stage_name: str, - digest: str, cluster_info: common.ClusterInfo, + digest: Optional[str], + cluster_info: common.ClusterInfo, ssh_credentials: Dict[str, Any]) -> List[Any]: with futures.ThreadPoolExecutor(max_workers=32) as pool: results = [] @@ -140,7 +141,7 @@ def _initialize_docker(runner: command_runner.SSHCommandRunner, stage_name='initialize_docker', # Should not cache docker setup, as it needs to be # run every time a cluster is restarted. - digest=str(time.time()), + digest=None, cluster_info=cluster_info, ssh_credentials=ssh_credentials) logger.debug(f'All docker users: {docker_users}') @@ -372,8 +373,7 @@ def _internal_file_mounts(file_mounts: Dict, @_log_start_end def internal_file_mounts(cluster_name: str, common_file_mounts: Dict, cluster_info: common.ClusterInfo, - ssh_credentials: Dict[str, - str], wheel_hash: str) -> None: + ssh_credentials: Dict[str, str]) -> None: """Executes file mounts - rsyncing internal local files""" _hint_worker_log_path(cluster_name, cluster_info, 'internal_file_mounts') @@ -382,9 +382,14 @@ def _setup_node(runner: command_runner.SSHCommandRunner, del metadata _internal_file_mounts(common_file_mounts, runner, log_path) - _parallel_ssh_with_cache(_setup_node, - cluster_name, - stage_name='internal_file_mounts', - digest=wheel_hash, - cluster_info=cluster_info, - ssh_credentials=ssh_credentials) + _parallel_ssh_with_cache( + _setup_node, + cluster_name, + stage_name='internal_file_mounts', + # Do not cache the file mounts, as the cloud + # credentials may change, and we should always + # update the remote files. The internal file_mounts + # is minimal and should not take too much time. + digest=None, + cluster_info=cluster_info, + ssh_credentials=ssh_credentials) diff --git a/sky/provision/metadata_utils.py b/sky/provision/metadata_utils.py index e534e24d37a..4a9d3c90ffc 100644 --- a/sky/provision/metadata_utils.py +++ b/sky/provision/metadata_utils.py @@ -4,6 +4,7 @@ import functools import pathlib import shutil +from typing import Optional from sky import sky_logging @@ -30,7 +31,7 @@ def _get_instance_metadata_dir(cluster_name: str, def cache_func(cluster_name: str, instance_id: str, stage_name: str, - hash_str: str): + hash_str: Optional[str]): """A helper function for caching function execution.""" def decorator(function): @@ -51,8 +52,11 @@ def wrapper(*args, **kwargs): @contextlib.contextmanager def check_cache_hash_or_update(cluster_name: str, instance_id: str, - stage_name: str, hash_str: str): + stage_name: str, hash_str: Optional[str]): """A decorator for 'cache_func'.""" + if hash_str is None: + yield True + return path = get_instance_cache_dir(cluster_name, instance_id) / stage_name if path.exists(): with open(path) as f: diff --git a/sky/provision/provisioner.py b/sky/provision/provisioner.py index 094ef33d81f..cc80ce0dbd5 100644 --- a/sky/provision/provisioner.py +++ b/sky/provision/provisioner.py @@ -3,7 +3,6 @@ import dataclasses import json import os -import pathlib import shlex import socket import subprocess @@ -311,7 +310,6 @@ def wait_for_ssh(cluster_info: provision_common.ClusterInfo, def _post_provision_setup( cloud_name: str, cluster_name: ClusterName, cluster_yaml: str, - local_wheel_path: pathlib.Path, wheel_hash: str, provision_record: provision_common.ProvisionRecord, custom_resource: Optional[str]) -> provision_common.ClusterInfo: cluster_info = provision.get_cluster_info(cloud_name, @@ -388,21 +386,15 @@ def _post_provision_setup( # (3) all instances need permission to mount storage for all clouds # It is possible to have a "smaller" permission model, but we leave that # for later. - file_mounts = { - backend_utils.SKY_REMOTE_PATH + '/' + wheel_hash: - str(local_wheel_path), - **config_from_yaml.get('file_mounts', {}) - } + file_mounts = config_from_yaml.get('file_mounts', {}) runtime_preparation_str = ('[bold cyan]Preparing SkyPilot ' 'runtime ({step}/3 - {step_name})') status.update( runtime_preparation_str.format(step=1, step_name='initializing')) instance_setup.internal_file_mounts(cluster_name.name_on_cloud, - file_mounts, - cluster_info, - ssh_credentials, - wheel_hash=wheel_hash) + file_mounts, cluster_info, + ssh_credentials) status.update( runtime_preparation_str.format(step=2, step_name='dependencies')) @@ -464,7 +456,6 @@ def _post_provision_setup( def post_provision_runtime_setup( cloud_name: str, cluster_name: ClusterName, cluster_yaml: str, - local_wheel_path: pathlib.Path, wheel_hash: str, provision_record: provision_common.ProvisionRecord, custom_resource: Optional[str], log_dir: str) -> provision_common.ClusterInfo: @@ -483,8 +474,6 @@ def post_provision_runtime_setup( return _post_provision_setup(cloud_name, cluster_name, cluster_yaml=cluster_yaml, - local_wheel_path=local_wheel_path, - wheel_hash=wheel_hash, provision_record=provision_record, custom_resource=custom_resource) except Exception: # pylint: disable=broad-except diff --git a/sky/skylet/skylet.py b/sky/skylet/skylet.py index 6bbb51e7a37..a36eb921660 100644 --- a/sky/skylet/skylet.py +++ b/sky/skylet/skylet.py @@ -2,14 +2,17 @@ import time +import sky from sky import sky_logging +from sky.skylet import constants from sky.skylet import events # Use the explicit logger name so that the logger is under the # `sky.skylet.skylet` namespace when executed directly, so as # to inherit the setup from the `sky` logger. logger = sky_logging.init_logger('sky.skylet.skylet') -logger.info('skylet started') +logger.info(f'Skylet started with version {constants.SKYLET_VERSION}; ' + f'SkyPilot v{sky.__version__} (commit: {sky.__commit__})') EVENTS = [ events.AutostopEvent(),