diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index 32fc069768b..121f96d8e8b 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -18,19 +18,28 @@ import os import platform import re +import runpy import subprocess import sys -from typing import Dict, List import setuptools +# __file__ is setup.py at the root of the repo. We shouldn't assume it's a +# symlink - e.g. in the sdist it's resolved to a normal file. ROOT_DIR = os.path.dirname(__file__) +DEPENDENCIES_FILE_PATH = os.path.join(ROOT_DIR, 'sky', 'setup_files', + 'dependencies.py') INIT_FILE_PATH = os.path.join(ROOT_DIR, 'sky', '__init__.py') _COMMIT_FAILURE_MESSAGE = ( 'WARNING: SkyPilot fail to {verb} the commit hash in ' f'{INIT_FILE_PATH!r} (SkyPilot can still be normally used): ' '{error}') +# setuptools does not include the script dir on the search path, so we can't +# just do `import dependencies`. Instead, use runpy to manually load it. Note: +# dependencies here is a dict, not a module, so we access it by subscripting. +dependencies = runpy.run_path(DEPENDENCIES_FILE_PATH) + original_init_content = None system = platform.system() @@ -130,128 +139,6 @@ def parse_readme(readme: str) -> str: return readme -install_requires = [ - 'wheel', - 'cachetools', - # NOTE: ray requires click>=7.0. - 'click >= 7.0', - 'colorama', - 'cryptography', - # Jinja has a bug in older versions because of the lack of pinning - # the version of the underlying markupsafe package. See: - # https://github.com/pallets/jinja/issues/1585 - 'jinja2 >= 3.0', - 'jsonschema', - 'networkx', - 'pandas>=1.3.0', - 'pendulum', - # PrettyTable with version >=2.0.0 is required for the support of - # `add_rows` method. - 'PrettyTable >= 2.0.0', - 'python-dotenv', - 'rich', - 'tabulate', - # Light weight requirement, can be replaced with "typing" once - # we deprecate Python 3.7 (this will take a while). - 'typing_extensions', - 'filelock >= 3.6.0', - 'packaging', - 'psutil', - 'pulp', - # Cython 3.0 release breaks PyYAML 5.4.* (https://github.com/yaml/pyyaml/issues/601) - # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414 - 'pyyaml > 3.13, != 5.4.*', - 'requests', -] - -local_ray = [ - # Lower version of ray will cause dependency conflict for - # click/grpcio/protobuf. - # Excluded 2.6.0 as it has a bug in the cluster launcher: - # https://github.com/ray-project/ray/releases/tag/ray-2.6.1 - 'ray[default] >= 2.2.0, != 2.6.0', -] - -remote = [ - # Adopted from ray's setup.py: https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py - # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard fails to start when - # ray start is called (#2054). - # Tracking issue: https://github.com/ray-project/ray/issues/30984 - "grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < '3.10' and sys_platform == 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= '3.10' and sys_platform == 'darwin'", # noqa:E501 - # Original issue: https://github.com/ray-project/ray/issues/33833 - "grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < '3.10' and sys_platform != 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= '3.10' and sys_platform != 'darwin'", # noqa:E501 - # Adopted from ray's setup.py: - # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343 - 'protobuf >= 3.15.3, != 3.19.5', - # Some pydantic versions are not compatible with ray. Adopted from ray's - # setup.py: https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254 - 'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3', -] - -# NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the -# following packages dependencies are changed. -aws_dependencies = [ - # botocore does not work with urllib3>=2.0.0, according to https://github.com/boto/botocore/issues/2926 - # We have to explicitly pin the version to optimize the time for - # poetry install. See https://github.com/orgs/python-poetry/discussions/7937 - 'urllib3<2', - # NOTE: this installs CLI V1. To use AWS SSO (e.g., `aws sso login`), users - # should instead use CLI V2 which is not pip-installable. See - # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html. - 'awscli>=1.27.10', - 'botocore>=1.29.10', - 'boto3>=1.26.1', - # NOTE: required by awscli. To avoid ray automatically installing - # the latest version. - 'colorama < 0.4.5', -] - -extras_require: Dict[str, List[str]] = { - 'aws': aws_dependencies, - # TODO(zongheng): azure-cli is huge and takes a long time to install. - # Tracked in: https://github.com/Azure/azure-cli/issues/7387 - # azure-identity is needed in node_provider. - # We need azure-identity>=1.13.0 to enable the customization of the - # timeout of AzureCliCredential. - 'azure': [ - 'azure-cli>=2.65.0', 'azure-core>=1.31.0', 'azure-identity>=1.19.0', - 'azure-mgmt-network>=27.0.0', 'azure-mgmt-compute>=33.0.0', - 'azure-storage-blob>=12.23.1', 'msgraph-sdk' - ] + local_ray, - # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd' - # parameter for stopping instances. - # Reference: https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6 - 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'], - 'ibm': [ - 'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk' - ] + local_ray, - 'docker': ['docker'] + local_ray, - 'lambda': local_ray, - 'cloudflare': aws_dependencies, - 'scp': local_ray, - 'oci': ['oci'] + local_ray, - 'kubernetes': ['kubernetes>=20.0.0'], - 'remote': remote, - 'runpod': ['runpod>=1.5.1'], - 'fluidstack': [], # No dependencies needed for fluidstack - 'cudo': ['cudo-compute>=0.1.10'], - 'paperspace': [], # No dependencies needed for paperspace - 'vast': ['vastai_sdk>=0.1.2'], - 'vsphere': [ - 'pyvmomi==8.0.1.0.2', - # vsphere-automation-sdk is also required, but it does not have - # pypi release, which cause failure of our pypi release. - # https://peps.python.org/pep-0440/#direct-references - # We have the instruction for its installation in our - # docs instead. - # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' - ], -} - -extras_require['all'] = sum(extras_require.values(), []) - long_description = '' readme_filepath = 'README.md' # When sky/backends/wheel_utils.py builds wheels, it will not contain the @@ -278,8 +165,8 @@ def parse_readme(readme: str) -> str: long_description_content_type='text/markdown', setup_requires=['wheel'], requires_python='>=3.7', - install_requires=install_requires, - extras_require=extras_require, + install_requires=dependencies['install_requires'], + extras_require=dependencies['extras_require'], entry_points={ 'console_scripts': ['sky = sky.cli:cli'], }, diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 4b193d9ddd7..31f61d5dc07 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -25,7 +25,9 @@ from sky.jobs import utils as managed_job_utils from sky.serve import constants as serve_constants from sky.serve import serve_utils +from sky.setup_files import dependencies from sky.skylet import constants +from sky.skylet import log_lib from sky.utils import common_utils from sky.utils import env_options from sky.utils import rich_utils @@ -187,79 +189,49 @@ def from_type(cls, controller_type: str) -> Optional['Controllers']: # Install cli dependencies. Not using SkyPilot wheels because the wheel # can be cleaned up by another process. -# TODO(zhwu): Keep the dependencies align with the ones in setup.py def _get_cloud_dependencies_installation_commands( controller: Controllers) -> List[str]: - # TODO(tian): Make dependency installation command a method of cloud - # class and get all installation command for enabled clouds. - commands = [] # We use / instead of strong formatting, as we need to update # the at the end of the for loop, and python does not support # partial string formatting. prefix_str = ('[/] Check & install cloud dependencies ' 'on controller: ') + commands: List[str] = [] # This is to make sure the shorter checking message does not have junk # characters from the previous message. - empty_str = ' ' * 10 - aws_dependencies_installation = ( - 'pip list | grep boto3 > /dev/null 2>&1 || pip install ' - 'botocore>=1.29.10 boto3>=1.26.1; ' - # Need to separate the installation of awscli from above because some - # other clouds will install boto3 but not awscli. - 'pip list | grep awscli> /dev/null 2>&1 || pip install "urllib3<2" ' - 'awscli>=1.27.10 "colorama<0.4.5" > /dev/null 2>&1') - setup_clouds: List[str] = [] + empty_str = ' ' * 20 + + # All python dependencies will be accumulated and then installed in one + # command at the end. This is very fast if the packages are already + # installed, so we don't check that. + python_packages: Set[str] = set() + + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append(f'echo -en "\\r{step_prefix}uv{empty_str}" &&' + f'{constants.SKY_UV_INSTALL_CMD} >/dev/null 2>&1') + for cloud in sky_check.get_cached_enabled_clouds_or_refresh(): - if isinstance( - clouds, - (clouds.Lambda, clouds.SCP, clouds.Fluidstack, clouds.Paperspace)): - # no need to install any cloud dependencies for lambda, scp, - # fluidstack and paperspace - continue - if isinstance(cloud, clouds.AWS): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{step_prefix}AWS{empty_str}" && ' + - aws_dependencies_installation) - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.Azure): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}Azure{empty_str}" && ' - 'pip list | grep azure-cli > /dev/null 2>&1 || ' - 'pip install "azure-cli>=2.31.0" azure-core ' - '"azure-identity>=1.13.0" azure-mgmt-network > /dev/null 2>&1') - # Have to separate this installation of az blob storage from above - # because this is newly-introduced and not part of azure-cli. We - # need a separate installed check for this. + cloud_python_dependencies: List[str] = copy.deepcopy( + dependencies.extras_require[cloud.canonical_name()]) + + if isinstance(cloud, clouds.Azure): + # azure-cli cannot be normally installed by uv. + # See comments in sky/skylet/constants.py. + cloud_python_dependencies.remove(dependencies.AZURE_CLI) + + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( - 'pip list | grep azure-storage-blob > /dev/null 2>&1 || ' - 'pip install azure-storage-blob msgraph-sdk > /dev/null 2>&1') - setup_clouds.append(str(cloud)) + f'echo -en "\\r{step_prefix}azure-cli{empty_str}" &&' + f'{constants.SKY_UV_PIP_CMD} install --prerelease=allow ' + f'"{dependencies.AZURE_CLI}" > /dev/null 2>&1') elif isinstance(cloud, clouds.GCP): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}GCP{empty_str}" && ' - 'pip list | grep google-api-python-client > /dev/null 2>&1 || ' - 'pip install "google-api-python-client>=2.69.0" ' - '> /dev/null 2>&1') - # Have to separate the installation of google-cloud-storage from - # above because for a VM launched on GCP, the VM may have - # google-api-python-client installed alone. - commands.append( - 'pip list | grep google-cloud-storage > /dev/null 2>&1 || ' - 'pip install google-cloud-storage > /dev/null 2>&1') - commands.append(f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') - setup_clouds.append(str(cloud)) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append(f'echo -en "\\r{step_prefix}GCP SDK{empty_str}" &&' + f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') elif isinstance(cloud, clouds.Kubernetes): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( f'echo -en "\\r{step_prefix}Kubernetes{empty_str}" && ' - 'pip list | grep kubernetes > /dev/null 2>&1 || ' - 'pip install "kubernetes>=20.0.0" > /dev/null 2>&1 &&' # Install k8s + skypilot dependencies 'sudo bash -c "if ' '! command -v curl &> /dev/null || ' @@ -275,31 +247,12 @@ def _get_cloud_dependencies_installation_commands( '/bin/linux/amd64/kubectl" && ' 'sudo install -o root -g root -m 0755 ' 'kubectl /usr/local/bin/kubectl))') - setup_clouds.append(str(cloud)) elif isinstance(cloud, clouds.Cudo): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( - f'echo -en "\\r{step_prefix}Cudo{empty_str}" && ' - 'pip list | grep cudo-compute > /dev/null 2>&1 || ' - 'pip install "cudo-compute>=0.1.10" > /dev/null 2>&1 && ' + f'echo -en "\\r{step_prefix}cudoctl{empty_str}" && ' 'wget https://download.cudo.org/compute/cudoctl-0.3.2-amd64.deb -O ~/cudoctl.deb > /dev/null 2>&1 && ' # pylint: disable=line-too-long 'sudo dpkg -i ~/cudoctl.deb > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.RunPod): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{step_prefix}RunPod{empty_str}" && ' - 'pip list | grep runpod > /dev/null 2>&1 || ' - 'pip install "runpod>=1.5.1" > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.OCI): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{prefix_str}OCI{empty_str}" && ' - 'pip list | grep oci > /dev/null 2>&1 || ' - 'pip install oci > /dev/null 2>&1') - setup_clouds.append(str(cloud)) elif isinstance(cloud, clouds.Vast): step_prefix = prefix_str.replace('', str(len(setup_clouds) + 1)) @@ -307,29 +260,30 @@ def _get_cloud_dependencies_installation_commands( 'pip list | grep vastai_sdk > /dev/null 2>&1 || ' 'pip install "vastai_sdk>=0.1.2" > /dev/null 2>&1') setup_clouds.append(str(cloud)) - if controller == Controllers.JOBS_CONTROLLER: - if isinstance(cloud, clouds.IBM): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}IBM{empty_str}" ' - '&& pip list | grep ibm-cloud-sdk-core > /dev/null 2>&1 || ' - 'pip install ibm-cloud-sdk-core ibm-vpc ' - 'ibm-platform-services ibm-cos-sdk > /dev/null 2>&1') - setup_clouds.append(str(cloud)) + elif isinstance(cloud, clouds.IBM): + if controller != Controllers.JOBS_CONTROLLER: + # We only need IBM deps on the jobs controller. + cloud_python_dependencies = [] + + python_packages.update(cloud_python_dependencies) + if (cloudflare.NAME in storage_lib.get_cached_enabled_storage_clouds_or_refresh()): - step_prefix = prefix_str.replace('', str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}Cloudflare{empty_str}" && ' + - aws_dependencies_installation) - setup_clouds.append(cloudflare.NAME) + python_packages.update(dependencies.extras_require['cloudflare']) + + packages_string = ' '.join([f'"{package}"' for package in python_packages]) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append( + f'echo -en "\\r{step_prefix}cloud python packages{empty_str}" && ' + f'{constants.SKY_UV_PIP_CMD} install {packages_string} > /dev/null 2>&1' + ) + total_commands = len(commands) finish_prefix = prefix_str.replace('[/] ', ' ') commands.append(f'echo -e "\\r{finish_prefix}done.{empty_str}"') + commands = [ - command.replace('', str(len(setup_clouds))) - for command in commands + command.replace('', str(total_commands)) for command in commands ] return commands @@ -387,11 +341,19 @@ def download_and_stream_latest_job_log( else: log_dir = list(log_dirs.values())[0] log_file = os.path.join(log_dir, 'run.log') - # Print the logs to the console. + # TODO(zhwu): refactor this into log_utils, along with the + # refactoring for the log_lib.tail_logs. try: with open(log_file, 'r', encoding='utf-8') as f: - print(f.read()) + # Stream the logs to the console without reading the whole + # file into memory. + start_streaming = False + for line in f: + if log_lib.LOG_FILE_START_STREAMING_AT in line: + start_streaming = True + if start_streaming: + print(line, end='', flush=True) except FileNotFoundError: logger.error('Failed to find the logs for the user ' f'program at {log_file}.')