From 9ccb69e8d06c986057d5cf5f2dd9b9a5ab2e2b7e Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Mon, 25 Nov 2024 11:46:54 -0800 Subject: [PATCH 01/16] Revert "remove `uv` from runtime setup due to azure installation issue (#4401)" This reverts commit 0b20d568ee1af454bfec3e50ff62d239f976e52d. --- sky/skylet/constants.py | 28 +++++++++++++++++----------- sky/templates/kubernetes-ray.yml.j2 | 2 +- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index 97d745b2e26..e4b55033f16 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -39,6 +39,7 @@ 'which python3') # Python executable, e.g., /opt/conda/bin/python3 SKY_PYTHON_CMD = f'$({SKY_GET_PYTHON_PATH_CMD})' +# Prefer SKY_UV_PIP_CMD, which is faster. TODO(cooper): remove all usages. SKY_PIP_CMD = f'{SKY_PYTHON_CMD} -m pip' # Ray executable, e.g., /opt/conda/bin/ray # We need to add SKY_PYTHON_CMD before ray executable because: @@ -50,6 +51,10 @@ SKY_REMOTE_PYTHON_ENV_NAME = 'skypilot-runtime' SKY_REMOTE_PYTHON_ENV = f'~/{SKY_REMOTE_PYTHON_ENV_NAME}' ACTIVATE_SKY_REMOTE_PYTHON_ENV = f'source {SKY_REMOTE_PYTHON_ENV}/bin/activate' +# uv is used for venv and pip, much faster than python implementations. +SKY_UV_INSTALL_DIR = '"$HOME/.local/bin"' +SKY_UV_CMD = f'{SKY_UV_INSTALL_DIR}/uv' +SKY_UV_PIP_CMD = f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip' # Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH to deactivate the # environment. `deactivate` command does not work when conda is used. DEACTIVATE_SKY_REMOTE_PYTHON_ENV = ( @@ -148,12 +153,16 @@ 'echo "Creating conda env with Python 3.10" && ' f'conda create -y -n {SKY_REMOTE_PYTHON_ENV_NAME} python=3.10 && ' f'conda activate {SKY_REMOTE_PYTHON_ENV_NAME};' + # Install uv for venv management and pip installation. + 'which uv >/dev/null 2>&1 || ' + 'curl -LsSf https://astral.sh/uv/install.sh ' + f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh;' # Create a separate conda environment for SkyPilot dependencies. f'[ -d {SKY_REMOTE_PYTHON_ENV} ] || ' # Do NOT use --system-site-packages here, because if users upgrade any # packages in the base env, they interfere with skypilot dependencies. # Reference: https://github.com/skypilot-org/skypilot/issues/4097 - f'{SKY_PYTHON_CMD} -m venv {SKY_REMOTE_PYTHON_ENV};' + f'{SKY_UV_CMD} venv {SKY_REMOTE_PYTHON_ENV};' f'echo "$(echo {SKY_REMOTE_PYTHON_ENV})/bin/python" > {SKY_PYTHON_PATH_FILE};' ) @@ -161,15 +170,12 @@ RAY_STATUS = f'RAY_ADDRESS=127.0.0.1:{SKY_REMOTE_RAY_PORT} {SKY_RAY_CMD} status' RAY_INSTALLATION_COMMANDS = ( 'mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app;' - # Disable the pip version check to avoid the warning message, which makes - # the output hard to read. - 'export PIP_DISABLE_PIP_VERSION_CHECK=1;' # Print the PATH in provision.log to help debug PATH issues. 'echo PATH=$PATH; ' # Install setuptools<=69.5.1 to avoid the issue with the latest setuptools # causing the error: # ImportError: cannot import name 'packaging' from 'pkg_resources'" - f'{SKY_PIP_CMD} install "setuptools<70"; ' + f'{SKY_UV_PIP_CMD} install "setuptools<70"; ' # Backward compatibility for ray upgrade (#3248): do not upgrade ray if the # ray cluster is already running, to avoid the ray cluster being restarted. # @@ -183,10 +189,10 @@ # latest ray port 6380, but those existing cluster launched before #1790 # that has ray cluster on the default port 6379 will be upgraded and # restarted. - f'{SKY_PIP_CMD} list | grep "ray " | ' + f'{SKY_UV_PIP_CMD} list | grep "ray " | ' f'grep {SKY_REMOTE_RAY_VERSION} 2>&1 > /dev/null ' f'|| {RAY_STATUS} || ' - f'{SKY_PIP_CMD} install --exists-action w -U ray[default]=={SKY_REMOTE_RAY_VERSION}; ' # pylint: disable=line-too-long + f'{SKY_UV_PIP_CMD} install -U ray[default]=={SKY_REMOTE_RAY_VERSION}; ' # pylint: disable=line-too-long # In some envs, e.g. pip does not have permission to write under /opt/conda # ray package will be installed under ~/.local/bin. If the user's PATH does # not include ~/.local/bin (the pip install will have the output: `WARNING: @@ -202,10 +208,10 @@ f'which ray > {SKY_RAY_PATH_FILE} || exit 1; }}; ') SKYPILOT_WHEEL_INSTALLATION_COMMANDS = ( - f'{{ {SKY_PIP_CMD} list | grep "skypilot " && ' + f'{{ {SKY_UV_PIP_CMD} list | grep "skypilot " && ' '[ "$(cat ~/.sky/wheels/current_sky_wheel_hash)" == "{sky_wheel_hash}" ]; } || ' # pylint: disable=line-too-long - f'{{ {SKY_PIP_CMD} uninstall skypilot -y; ' - f'{SKY_PIP_CMD} install "$(echo ~/.sky/wheels/{{sky_wheel_hash}}/' + f'{{ {SKY_UV_PIP_CMD} uninstall skypilot; ' + f'{SKY_UV_PIP_CMD} install "$(echo ~/.sky/wheels/{{sky_wheel_hash}}/' f'skypilot-{_sky_version}*.whl)[{{cloud}}, remote]" && ' 'echo "{sky_wheel_hash}" > ~/.sky/wheels/current_sky_wheel_hash || ' 'exit 1; }; ') @@ -220,7 +226,7 @@ # The ray installation above can be skipped due to the existing ray cluster # for backward compatibility. In this case, we should not patch the ray # files. - f'{SKY_PIP_CMD} list | grep "ray " | ' + f'{SKY_UV_PIP_CMD} list | grep "ray " | ' f'grep {SKY_REMOTE_RAY_VERSION} 2>&1 > /dev/null && ' f'{{ {SKY_PYTHON_CMD} -c ' '"from sky.skylet.ray_patches import patch; patch()" || exit 1; }; ') diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index e572b263924..535e6f0b1ae 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -414,7 +414,7 @@ available_node_types: done {{ conda_installation_commands }} {{ ray_installation_commands }} - ~/skypilot-runtime/bin/python -m pip install skypilot[kubernetes,remote] + VIRTUAL_ENV=~/skypilot-runtime ~/.local/bin/uv pip install skypilot[kubernetes,remote] touch /tmp/ray_skypilot_installation_complete echo "=== Ray and skypilot installation completed ===" From a31e4c18c7feb761d8bf03ff815b03d9f1c5d445 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Mon, 25 Nov 2024 15:00:03 -0800 Subject: [PATCH 02/16] on azure, use --prerelease=allow to install azure-cli --- sky/setup_files/setup.py | 12 +++++++++--- sky/skylet/constants.py | 10 ++++++++++ 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index 0fd6978ec03..0533b2ede33 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -216,9 +216,15 @@ def parse_readme(readme: str) -> str: # We need azure-identity>=1.13.0 to enable the customization of the # timeout of AzureCliCredential. 'azure': [ - 'azure-cli>=2.65.0', 'azure-core>=1.31.0', 'azure-identity>=1.19.0', - 'azure-mgmt-network>=27.0.0', 'azure-mgmt-compute>=33.0.0', - 'azure-storage-blob>=12.23.1', 'msgraph-sdk' + # If you update the azure-cli dependency, update in sky/skylet/constants.py + # SKYPILOT_WHEEL_INSTALLATION_COMMANDS as well. + 'azure-cli>=2.65.0', + 'azure-core>=1.31.0', + 'azure-identity>=1.19.0', + 'azure-mgmt-network>=27.0.0', + 'azure-mgmt-compute>=33.0.0', + 'azure-storage-blob>=12.23.1', + 'msgraph-sdk', ] + local_ray, # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd' # parameter for stopping instances. diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index e4b55033f16..43a1efe3aa5 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -211,6 +211,16 @@ f'{{ {SKY_UV_PIP_CMD} list | grep "skypilot " && ' '[ "$(cat ~/.sky/wheels/current_sky_wheel_hash)" == "{sky_wheel_hash}" ]; } || ' # pylint: disable=line-too-long f'{{ {SKY_UV_PIP_CMD} uninstall skypilot; ' + # uv cannot install azure-cli normally, since it depends on pre-release + # packages. Manually install azure-cli with the --prerelease=allow flag + # first. This will allow skypilot to successfully install. See + # https://docs.astral.sh/uv/pip/compatibility/#pre-release-compatibility. + # We don't want to use --prerelease=allow for all packages, because it will + # cause uv to use pre-releases for some other packages that have sufficient + # stable releases. + 'if [ "{cloud}" = "azure" ]; then ' + f'{SKY_UV_PIP_CMD} install --prerelease=allow "azure-cli>=2.65.0"; fi;' + # Install skypilot from wheel f'{SKY_UV_PIP_CMD} install "$(echo ~/.sky/wheels/{{sky_wheel_hash}}/' f'skypilot-{_sky_version}*.whl)[{{cloud}}, remote]" && ' 'echo "{sky_wheel_hash}" > ~/.sky/wheels/current_sky_wheel_hash || ' From 7574b94fb96d9b9ca873c1c1eb1a1a4b648c26a1 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Mon, 25 Nov 2024 15:04:22 -0800 Subject: [PATCH 03/16] use uv venv --seed --- sky/skylet/constants.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index 43a1efe3aa5..98e02c11f5b 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -162,7 +162,9 @@ # Do NOT use --system-site-packages here, because if users upgrade any # packages in the base env, they interfere with skypilot dependencies. # Reference: https://github.com/skypilot-org/skypilot/issues/4097 - f'{SKY_UV_CMD} venv {SKY_REMOTE_PYTHON_ENV};' + # --seed will include pip and setuptools, which are present in venvs created + # with python -m venv. + f'{SKY_UV_CMD} venv --seed {SKY_REMOTE_PYTHON_ENV};' f'echo "$(echo {SKY_REMOTE_PYTHON_ENV})/bin/python" > {SKY_PYTHON_PATH_FILE};' ) From 11040e373bfe4912293507734da595c8c2481057 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Mon, 25 Nov 2024 15:42:48 -0800 Subject: [PATCH 04/16] fix backwards compatibility --- sky/skylet/constants.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index 98e02c11f5b..f97199efc04 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -54,6 +54,10 @@ # uv is used for venv and pip, much faster than python implementations. SKY_UV_INSTALL_DIR = '"$HOME/.local/bin"' SKY_UV_CMD = f'{SKY_UV_INSTALL_DIR}/uv' +# This won't reinstall uv if it's already installed, so it's safe to re-run. +SKY_UV_INSTALL_CMD = (f'{SKY_UV_CMD} -V >/dev/null 2>&1 || ' + 'curl -LsSf https://astral.sh/uv/install.sh ' + f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh') SKY_UV_PIP_CMD = f'VIRTUAL_ENV={SKY_REMOTE_PYTHON_ENV} {SKY_UV_CMD} pip' # Deleting the SKY_REMOTE_PYTHON_ENV_NAME from the PATH to deactivate the # environment. `deactivate` command does not work when conda is used. @@ -154,9 +158,7 @@ f'conda create -y -n {SKY_REMOTE_PYTHON_ENV_NAME} python=3.10 && ' f'conda activate {SKY_REMOTE_PYTHON_ENV_NAME};' # Install uv for venv management and pip installation. - 'which uv >/dev/null 2>&1 || ' - 'curl -LsSf https://astral.sh/uv/install.sh ' - f'| UV_INSTALL_DIR={SKY_UV_INSTALL_DIR} sh;' + f'{SKY_UV_INSTALL_CMD};' # Create a separate conda environment for SkyPilot dependencies. f'[ -d {SKY_REMOTE_PYTHON_ENV} ] || ' # Do NOT use --system-site-packages here, because if users upgrade any @@ -171,6 +173,7 @@ _sky_version = str(version.parse(sky.__version__)) RAY_STATUS = f'RAY_ADDRESS=127.0.0.1:{SKY_REMOTE_RAY_PORT} {SKY_RAY_CMD} status' RAY_INSTALLATION_COMMANDS = ( + f'{SKY_UV_INSTALL_CMD};' 'mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app;' # Print the PATH in provision.log to help debug PATH issues. 'echo PATH=$PATH; ' From c8c910c274631fd050a20fe363449d2d0c799baf Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 26 Nov 2024 11:54:33 -0800 Subject: [PATCH 05/16] really fix backwards compatibility --- sky/skylet/constants.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index f97199efc04..1fc782fb1f8 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -213,6 +213,7 @@ f'which ray > {SKY_RAY_PATH_FILE} || exit 1; }}; ') SKYPILOT_WHEEL_INSTALLATION_COMMANDS = ( + f'{SKY_UV_INSTALL_CMD};' f'{{ {SKY_UV_PIP_CMD} list | grep "skypilot " && ' '[ "$(cat ~/.sky/wheels/current_sky_wheel_hash)" == "{sky_wheel_hash}" ]; } || ' # pylint: disable=line-too-long f'{{ {SKY_UV_PIP_CMD} uninstall skypilot; ' From 64a0981f2841925f7c6a0a99fed82da84692d16b Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 26 Nov 2024 23:57:14 -0800 Subject: [PATCH 06/16] use uv to set up controller dependencies --- sky/setup_files/setup.py | 140 +++---------------------------- sky/utils/controller_utils.py | 150 ++++++++++++---------------------- 2 files changed, 64 insertions(+), 226 deletions(-) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index 0533b2ede33..c0ff6b127c4 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -20,17 +20,24 @@ import re import subprocess import sys -from typing import Dict, List import setuptools +# __file__ is setup.py at the root of the repo. We shouldn't assume it's a +# symlink - e.g. in the sdist it's resolved to a normal file. ROOT_DIR = os.path.dirname(__file__) +SETUP_FILE_DIR = os.path.join(ROOT_DIR, 'sky', 'setup_files') INIT_FILE_PATH = os.path.join(ROOT_DIR, 'sky', '__init__.py') _COMMIT_FAILURE_MESSAGE = ( 'WARNING: SkyPilot fail to {verb} the commit hash in ' f'{INIT_FILE_PATH!r} (SkyPilot can still be normally used): ' '{error}') +# setuptools does not include the script dir on the search path, so manually add +# it so that we can import the dependencies file. +sys.path.append(SETUP_FILE_DIR) +import dependencies + original_init_content = None system = platform.system() @@ -130,133 +137,6 @@ def parse_readme(readme: str) -> str: return readme -install_requires = [ - 'wheel', - 'cachetools', - # NOTE: ray requires click>=7.0. - 'click >= 7.0', - 'colorama', - 'cryptography', - # Jinja has a bug in older versions because of the lack of pinning - # the version of the underlying markupsafe package. See: - # https://github.com/pallets/jinja/issues/1585 - 'jinja2 >= 3.0', - 'jsonschema', - 'networkx', - 'pandas>=1.3.0', - 'pendulum', - # PrettyTable with version >=2.0.0 is required for the support of - # `add_rows` method. - 'PrettyTable >= 2.0.0', - 'python-dotenv', - 'rich', - 'tabulate', - # Light weight requirement, can be replaced with "typing" once - # we deprecate Python 3.7 (this will take a while). - 'typing_extensions', - 'filelock >= 3.6.0', - 'packaging', - 'psutil', - 'pulp', - # Cython 3.0 release breaks PyYAML 5.4.* (https://github.com/yaml/pyyaml/issues/601) - # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414 - 'pyyaml > 3.13, != 5.4.*', - 'requests', -] - -local_ray = [ - # Lower version of ray will cause dependency conflict for - # click/grpcio/protobuf. - # Excluded 2.6.0 as it has a bug in the cluster launcher: - # https://github.com/ray-project/ray/releases/tag/ray-2.6.1 - 'ray[default] >= 2.2.0, != 2.6.0', -] - -remote = [ - # Adopted from ray's setup.py: https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py - # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard fails to start when - # ray start is called (#2054). - # Tracking issue: https://github.com/ray-project/ray/issues/30984 - "grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < '3.10' and sys_platform == 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= '3.10' and sys_platform == 'darwin'", # noqa:E501 - # Original issue: https://github.com/ray-project/ray/issues/33833 - "grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < '3.10' and sys_platform != 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= '3.10' and sys_platform != 'darwin'", # noqa:E501 - # Adopted from ray's setup.py: - # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343 - 'protobuf >= 3.15.3, != 3.19.5', - # Some pydantic versions are not compatible with ray. Adopted from ray's - # setup.py: https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254 - 'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3', -] - -# NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the -# following packages dependencies are changed. -aws_dependencies = [ - # botocore does not work with urllib3>=2.0.0, according to https://github.com/boto/botocore/issues/2926 - # We have to explicitly pin the version to optimize the time for - # poetry install. See https://github.com/orgs/python-poetry/discussions/7937 - 'urllib3<2', - # NOTE: this installs CLI V1. To use AWS SSO (e.g., `aws sso login`), users - # should instead use CLI V2 which is not pip-installable. See - # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html. - 'awscli>=1.27.10', - 'botocore>=1.29.10', - 'boto3>=1.26.1', - # NOTE: required by awscli. To avoid ray automatically installing - # the latest version. - 'colorama < 0.4.5', -] - -extras_require: Dict[str, List[str]] = { - 'aws': aws_dependencies, - # TODO(zongheng): azure-cli is huge and takes a long time to install. - # Tracked in: https://github.com/Azure/azure-cli/issues/7387 - # azure-identity is needed in node_provider. - # We need azure-identity>=1.13.0 to enable the customization of the - # timeout of AzureCliCredential. - 'azure': [ - # If you update the azure-cli dependency, update in sky/skylet/constants.py - # SKYPILOT_WHEEL_INSTALLATION_COMMANDS as well. - 'azure-cli>=2.65.0', - 'azure-core>=1.31.0', - 'azure-identity>=1.19.0', - 'azure-mgmt-network>=27.0.0', - 'azure-mgmt-compute>=33.0.0', - 'azure-storage-blob>=12.23.1', - 'msgraph-sdk', - ] + local_ray, - # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd' - # parameter for stopping instances. - # Reference: https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6 - 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'], - 'ibm': [ - 'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk' - ] + local_ray, - 'docker': ['docker'] + local_ray, - 'lambda': local_ray, - 'cloudflare': aws_dependencies, - 'scp': local_ray, - 'oci': ['oci'] + local_ray, - 'kubernetes': ['kubernetes>=20.0.0'], - 'remote': remote, - 'runpod': ['runpod>=1.5.1'], - 'fluidstack': [], # No dependencies needed for fluidstack - 'cudo': ['cudo-compute>=0.1.10'], - 'paperspace': [], # No dependencies needed for paperspace - 'vsphere': [ - 'pyvmomi==8.0.1.0.2', - # vsphere-automation-sdk is also required, but it does not have - # pypi release, which cause failure of our pypi release. - # https://peps.python.org/pep-0440/#direct-references - # We have the instruction for its installation in our - # docs instead. - # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' - ], -} - -extras_require['all'] = sum(extras_require.values(), []) - long_description = '' readme_filepath = 'README.md' # When sky/backends/wheel_utils.py builds wheels, it will not contain the @@ -283,8 +163,8 @@ def parse_readme(readme: str) -> str: long_description_content_type='text/markdown', setup_requires=['wheel'], requires_python='>=3.7', - install_requires=install_requires, - extras_require=extras_require, + install_requires=dependencies.install_requires, + extras_require=dependencies.extras_require, entry_points={ 'console_scripts': ['sky = sky.cli:cli'], }, diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index a6657df960d..052f9be7d8b 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -25,6 +25,7 @@ from sky.jobs import utils as managed_job_utils from sky.serve import constants as serve_constants from sky.serve import serve_utils +from sky.setup_files import dependencies from sky.skylet import constants from sky.utils import common_utils from sky.utils import env_options @@ -187,79 +188,55 @@ def from_type(cls, controller_type: str) -> Optional['Controllers']: # Install cli dependencies. Not using SkyPilot wheels because the wheel # can be cleaned up by another process. -# TODO(zhwu): Keep the dependencies align with the ones in setup.py def _get_cloud_dependencies_installation_commands( controller: Controllers) -> List[str]: - # TODO(tian): Make dependency installation command a method of cloud - # class and get all installation command for enabled clouds. - commands = [] # We use / instead of strong formatting, as we need to update # the at the end of the for loop, and python does not support # partial string formatting. prefix_str = ('[/] Check & install cloud dependencies ' 'on controller: ') + commands: List[str] = [] # This is to make sure the shorter checking message does not have junk # characters from the previous message. - empty_str = ' ' * 10 - aws_dependencies_installation = ( - 'pip list | grep boto3 > /dev/null 2>&1 || pip install ' - 'botocore>=1.29.10 boto3>=1.26.1; ' - # Need to separate the installation of awscli from above because some - # other clouds will install boto3 but not awscli. - 'pip list | grep awscli> /dev/null 2>&1 || pip install "urllib3<2" ' - 'awscli>=1.27.10 "colorama<0.4.5" > /dev/null 2>&1') - setup_clouds: List[str] = [] + empty_str = ' ' * 20 + + # All python dependencies will be accumulated and then installed in one + # command at the end. This is very fast if the packages are already + # installed, so we don't check that. + python_packages: set[str] = set() + + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append( + f'echo -en "\\r{step_prefix}uv{empty_str}" &&' + 'export PATH="$PATH:$HOME/.local/bin" &&' + f'uv -V > /dev/null 2>&1 ||' + 'curl -LsSf https://astral.sh/uv/install.sh 2>/dev/null |' + 'UV_INSTALL_DIR="$HOME/.local/bin" sh >/dev/null 2>&1') + for cloud in sky_check.get_cached_enabled_clouds_or_refresh(): - if isinstance( - clouds, - (clouds.Lambda, clouds.SCP, clouds.Fluidstack, clouds.Paperspace)): - # no need to install any cloud dependencies for lambda, scp, - # fluidstack and paperspace - continue - if isinstance(cloud, clouds.AWS): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{step_prefix}AWS{empty_str}" && ' + - aws_dependencies_installation) - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.Azure): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}Azure{empty_str}" && ' - 'pip list | grep azure-cli > /dev/null 2>&1 || ' - 'pip install "azure-cli>=2.31.0" azure-core ' - '"azure-identity>=1.13.0" azure-mgmt-network > /dev/null 2>&1') - # Have to separate this installation of az blob storage from above - # because this is newly-introduced and not part of azure-cli. We - # need a separate installed check for this. - commands.append( - 'pip list | grep azure-storage-blob > /dev/null 2>&1 || ' - 'pip install azure-storage-blob msgraph-sdk > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.GCP): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) + cloud_python_dependencies: List[str] = dependencies.extras_require[ + cloud.canonical_name()] + + if isinstance(cloud, clouds.Azure): + # azure-cli cannot be normally installed by uv. + # See comments in sky/skylet/constants.py. + azure_cli_dep = [ + dep for dep in cloud_python_dependencies + if dep.startswith('azure-cli') + ] + assert len(azure_cli_dep) == 1, cloud_python_dependencies + azure_cli_dep = azure_cli_dep[0] + cloud_python_dependencies.remove(azure_cli_dep) + + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( - f'echo -en "\\r{step_prefix}GCP{empty_str}" && ' - 'pip list | grep google-api-python-client > /dev/null 2>&1 || ' - 'pip install "google-api-python-client>=2.69.0" ' + f'echo -en "\\r{step_prefix}azure-cli{empty_str}" &&' + f'uv pip install --prerelease=allow "{azure_cli_dep}" ' '> /dev/null 2>&1') - # Have to separate the installation of google-cloud-storage from - # above because for a VM launched on GCP, the VM may have - # google-api-python-client installed alone. - commands.append( - 'pip list | grep google-cloud-storage > /dev/null 2>&1 || ' - 'pip install google-cloud-storage > /dev/null 2>&1') - commands.append(f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') - setup_clouds.append(str(cloud)) elif isinstance(cloud, clouds.Kubernetes): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( f'echo -en "\\r{step_prefix}Kubernetes{empty_str}" && ' - 'pip list | grep kubernetes > /dev/null 2>&1 || ' - 'pip install "kubernetes>=20.0.0" > /dev/null 2>&1 &&' # Install k8s + skypilot dependencies 'sudo bash -c "if ' '! command -v curl &> /dev/null || ' @@ -275,54 +252,35 @@ def _get_cloud_dependencies_installation_commands( '/bin/linux/amd64/kubectl" && ' 'sudo install -o root -g root -m 0755 ' 'kubectl /usr/local/bin/kubectl))') - setup_clouds.append(str(cloud)) elif isinstance(cloud, clouds.Cudo): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( - f'echo -en "\\r{step_prefix}Cudo{empty_str}" && ' - 'pip list | grep cudo-compute > /dev/null 2>&1 || ' - 'pip install "cudo-compute>=0.1.10" > /dev/null 2>&1 && ' + f'echo -en "\\r{step_prefix}cudoctl{empty_str}" && ' 'wget https://download.cudo.org/compute/cudoctl-0.3.2-amd64.deb -O ~/cudoctl.deb > /dev/null 2>&1 && ' # pylint: disable=line-too-long 'sudo dpkg -i ~/cudoctl.deb > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.RunPod): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{step_prefix}RunPod{empty_str}" && ' - 'pip list | grep runpod > /dev/null 2>&1 || ' - 'pip install "runpod>=1.5.1" > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - elif isinstance(cloud, clouds.OCI): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append(f'echo -en "\\r{prefix_str}OCI{empty_str}" && ' - 'pip list | grep oci > /dev/null 2>&1 || ' - 'pip install oci > /dev/null 2>&1') - setup_clouds.append(str(cloud)) - if controller == Controllers.JOBS_CONTROLLER: - if isinstance(cloud, clouds.IBM): - step_prefix = prefix_str.replace('', - str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}IBM{empty_str}" ' - '&& pip list | grep ibm-cloud-sdk-core > /dev/null 2>&1 || ' - 'pip install ibm-cloud-sdk-core ibm-vpc ' - 'ibm-platform-services ibm-cos-sdk > /dev/null 2>&1') - setup_clouds.append(str(cloud)) + elif isinstance(cloud, clouds.IBM): + if controller != Controllers.JOBS_CONTROLLER: + # We only need IBM deps on the jobs controller. + cloud_python_dependencies = [] + + python_packages.update(cloud_python_dependencies) + if (cloudflare.NAME in storage_lib.get_cached_enabled_storage_clouds_or_refresh()): - step_prefix = prefix_str.replace('', str(len(setup_clouds) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}Cloudflare{empty_str}" && ' + - aws_dependencies_installation) - setup_clouds.append(cloudflare.NAME) + python_packages.update(dependencies.extras_require['cloudflare']) + packages_string = ' '.join([f'"{package}"' for package in python_packages]) + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append( + f'echo -en "\\r{step_prefix}python dependencies{empty_str}" && ' + f'uv pip install {packages_string} > /dev/null 2>&1') + + total_commands = len(commands) finish_prefix = prefix_str.replace('[/] ', ' ') commands.append(f'echo -e "\\r{finish_prefix}done.{empty_str}"') + commands = [ - command.replace('', str(len(setup_clouds))) - for command in commands + command.replace('', str(total_commands)) for command in commands ] return commands From 46b7e15e4ed8680fa9980398c60b6fa9e3217c32 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 09:56:21 -0800 Subject: [PATCH 07/16] fix python 3.8 --- sky/utils/controller_utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 052f9be7d8b..d327a55f955 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -203,15 +203,14 @@ def _get_cloud_dependencies_installation_commands( # All python dependencies will be accumulated and then installed in one # command at the end. This is very fast if the packages are already # installed, so we don't check that. - python_packages: set[str] = set() + python_packages: Set[str] = set() step_prefix = prefix_str.replace('', str(len(commands) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}uv{empty_str}" &&' - 'export PATH="$PATH:$HOME/.local/bin" &&' - f'uv -V > /dev/null 2>&1 ||' - 'curl -LsSf https://astral.sh/uv/install.sh 2>/dev/null |' - 'UV_INSTALL_DIR="$HOME/.local/bin" sh >/dev/null 2>&1') + commands.append(f'echo -en "\\r{step_prefix}uv{empty_str}" &&' + 'export PATH="$PATH:$HOME/.local/bin" &&' + f'uv -V > /dev/null 2>&1 ||' + 'curl -LsSf https://astral.sh/uv/install.sh 2>/dev/null |' + 'UV_INSTALL_DIR="$HOME/.local/bin" sh >/dev/null 2>&1') for cloud in sky_check.get_cached_enabled_clouds_or_refresh(): cloud_python_dependencies: List[str] = dependencies.extras_require[ From b06b77ca5985b533b15e2f6b9a4cbeedf0446191 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 09:58:32 -0800 Subject: [PATCH 08/16] lint --- sky/setup_files/setup.py | 2 +- sky/utils/controller_utils.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index c0ff6b127c4..c7820dd34a9 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -36,7 +36,7 @@ # setuptools does not include the script dir on the search path, so manually add # it so that we can import the dependencies file. sys.path.append(SETUP_FILE_DIR) -import dependencies +import dependencies # pylint: disable=wrong-import-position original_init_content = None diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index d327a55f955..9e881ac1b40 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -18,7 +18,6 @@ from sky import sky_logging from sky import skypilot_config from sky.adaptors import cloudflare -from sky.clouds import gcp from sky.data import data_utils from sky.data import storage as storage_lib from sky.jobs import constants as managed_job_constants From 62a780a079bf97df381af5e10ed2f907eb2be6b5 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 09:59:58 -0800 Subject: [PATCH 09/16] add missing file --- sky/setup_files/dependencies.py | 135 ++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 sky/setup_files/dependencies.py diff --git a/sky/setup_files/dependencies.py b/sky/setup_files/dependencies.py new file mode 100644 index 00000000000..18f4bdabf35 --- /dev/null +++ b/sky/setup_files/dependencies.py @@ -0,0 +1,135 @@ +"""Dependencies for SkyPilot. + +This file is imported by setup.py, so: +- It may not be able to import other skypilot modules, since sys.path may not be + correct. +- It should not import any dependencies, as they may not be installed yet. +""" +from typing import Dict, List + +install_requires = [ + 'wheel', + 'cachetools', + # NOTE: ray requires click>=7.0. + 'click >= 7.0', + 'colorama', + 'cryptography', + # Jinja has a bug in older versions because of the lack of pinning + # the version of the underlying markupsafe package. See: + # https://github.com/pallets/jinja/issues/1585 + 'jinja2 >= 3.0', + 'jsonschema', + 'networkx', + 'pandas>=1.3.0', + 'pendulum', + # PrettyTable with version >=2.0.0 is required for the support of + # `add_rows` method. + 'PrettyTable >= 2.0.0', + 'python-dotenv', + 'rich', + 'tabulate', + # Light weight requirement, can be replaced with "typing" once + # we deprecate Python 3.7 (this will take a while). + 'typing_extensions', + 'filelock >= 3.6.0', + 'packaging', + 'psutil', + 'pulp', + # Cython 3.0 release breaks PyYAML 5.4.* (https://github.com/yaml/pyyaml/issues/601) + # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414 + 'pyyaml > 3.13, != 5.4.*', + 'requests', +] + +local_ray = [ + # Lower version of ray will cause dependency conflict for + # click/grpcio/protobuf. + # Excluded 2.6.0 as it has a bug in the cluster launcher: + # https://github.com/ray-project/ray/releases/tag/ray-2.6.1 + 'ray[default] >= 2.2.0, != 2.6.0', +] + +remote = [ + # Adopted from ray's setup.py: https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py + # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard fails to start when + # ray start is called (#2054). + # Tracking issue: https://github.com/ray-project/ray/issues/30984 + "grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < '3.10' and sys_platform == 'darwin'", # noqa:E501 + "grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= '3.10' and sys_platform == 'darwin'", # noqa:E501 + # Original issue: https://github.com/ray-project/ray/issues/33833 + "grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < '3.10' and sys_platform != 'darwin'", # noqa:E501 + "grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= '3.10' and sys_platform != 'darwin'", # noqa:E501 + # Adopted from ray's setup.py: + # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343 + 'protobuf >= 3.15.3, != 3.19.5', + # Some pydantic versions are not compatible with ray. Adopted from ray's + # setup.py: https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254 + 'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3', +] + +# NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the +# following packages dependencies are changed. +aws_dependencies = [ + # botocore does not work with urllib3>=2.0.0, according to https://github.com/boto/botocore/issues/2926 + # We have to explicitly pin the version to optimize the time for + # poetry install. See https://github.com/orgs/python-poetry/discussions/7937 + 'urllib3<2', + # NOTE: this installs CLI V1. To use AWS SSO (e.g., `aws sso login`), users + # should instead use CLI V2 which is not pip-installable. See + # https://docs.aws.amazon.com/cli/latest/userguide/getting-started-install.html. + 'awscli>=1.27.10', + 'botocore>=1.29.10', + 'boto3>=1.26.1', + # NOTE: required by awscli. To avoid ray automatically installing + # the latest version. + 'colorama < 0.4.5', +] + +extras_require: Dict[str, List[str]] = { + 'aws': aws_dependencies, + # TODO(zongheng): azure-cli is huge and takes a long time to install. + # Tracked in: https://github.com/Azure/azure-cli/issues/7387 + # azure-identity is needed in node_provider. + # We need azure-identity>=1.13.0 to enable the customization of the + # timeout of AzureCliCredential. + 'azure': [ + # If you update the azure-cli dependency, update in sky/skylet/constants.py + # SKYPILOT_WHEEL_INSTALLATION_COMMANDS as well. + 'azure-cli>=2.65.0', + 'azure-core>=1.31.0', + 'azure-identity>=1.19.0', + 'azure-mgmt-network>=27.0.0', + 'azure-mgmt-compute>=33.0.0', + 'azure-storage-blob>=12.23.1', + 'msgraph-sdk', + ] + local_ray, + # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd' + # parameter for stopping instances. + # Reference: https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6 + 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'], + 'ibm': [ + 'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk' + ] + local_ray, + 'docker': ['docker'] + local_ray, + 'lambda': local_ray, + 'cloudflare': aws_dependencies, + 'scp': local_ray, + 'oci': ['oci'] + local_ray, + 'kubernetes': ['kubernetes>=20.0.0'], + 'remote': remote, + 'runpod': ['runpod>=1.5.1'], + 'fluidstack': [], # No dependencies needed for fluidstack + 'cudo': ['cudo-compute>=0.1.10'], + 'paperspace': [], # No dependencies needed for paperspace + 'vsphere': [ + 'pyvmomi==8.0.1.0.2', + # vsphere-automation-sdk is also required, but it does not have + # pypi release, which cause failure of our pypi release. + # https://peps.python.org/pep-0440/#direct-references + # We have the instruction for its installation in our + # docs instead. + # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' + ], +} + +extras_require['all'] = sum(extras_require.values(), []) From 2a1b494537cd86d708960a09a7311c3fcf480731 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 11:10:15 -0800 Subject: [PATCH 10/16] update comment --- sky/skylet/constants.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index 1fc782fb1f8..b7e269d3130 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -39,7 +39,8 @@ 'which python3') # Python executable, e.g., /opt/conda/bin/python3 SKY_PYTHON_CMD = f'$({SKY_GET_PYTHON_PATH_CMD})' -# Prefer SKY_UV_PIP_CMD, which is faster. TODO(cooper): remove all usages. +# Prefer SKY_UV_PIP_CMD, which is faster. +# TODO(cooperc): remove remaining usage (GCP TPU setup). SKY_PIP_CMD = f'{SKY_PYTHON_CMD} -m pip' # Ray executable, e.g., /opt/conda/bin/ray # We need to add SKY_PYTHON_CMD before ray executable because: From 9ef8d221f2a574dae7c9ac130cd3a86e572b7b93 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 11:39:44 -0800 Subject: [PATCH 11/16] split out azure-cli dep --- sky/setup_files/dependencies.py | 8 +++++--- sky/skylet/constants.py | 4 +++- sky/utils/controller_utils.py | 10 ++-------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/sky/setup_files/dependencies.py b/sky/setup_files/dependencies.py index 18f4bdabf35..742e69aa423 100644 --- a/sky/setup_files/dependencies.py +++ b/sky/setup_files/dependencies.py @@ -85,6 +85,10 @@ 'colorama < 0.4.5', ] +# azure-cli cannot be installed normally by uv, so we need to work around it in +# a few places. +AZURE_CLI = 'azure-cli>=2.65.0' + extras_require: Dict[str, List[str]] = { 'aws': aws_dependencies, # TODO(zongheng): azure-cli is huge and takes a long time to install. @@ -93,9 +97,7 @@ # We need azure-identity>=1.13.0 to enable the customization of the # timeout of AzureCliCredential. 'azure': [ - # If you update the azure-cli dependency, update in sky/skylet/constants.py - # SKYPILOT_WHEEL_INSTALLATION_COMMANDS as well. - 'azure-cli>=2.65.0', + AZURE_CLI, 'azure-core>=1.31.0', 'azure-identity>=1.19.0', 'azure-mgmt-network>=27.0.0', diff --git a/sky/skylet/constants.py b/sky/skylet/constants.py index b7e269d3130..0b2a5b08e1b 100644 --- a/sky/skylet/constants.py +++ b/sky/skylet/constants.py @@ -4,6 +4,7 @@ from packaging import version import sky +from sky.setup_files import dependencies SKY_LOGS_DIRECTORY = '~/sky_logs' SKY_REMOTE_WORKDIR = '~/sky_workdir' @@ -226,7 +227,8 @@ # cause uv to use pre-releases for some other packages that have sufficient # stable releases. 'if [ "{cloud}" = "azure" ]; then ' - f'{SKY_UV_PIP_CMD} install --prerelease=allow "azure-cli>=2.65.0"; fi;' + f'{SKY_UV_PIP_CMD} install --prerelease=allow "{dependencies.AZURE_CLI}";' + 'fi;' # Install skypilot from wheel f'{SKY_UV_PIP_CMD} install "$(echo ~/.sky/wheels/{{sky_wheel_hash}}/' f'skypilot-{_sky_version}*.whl)[{{cloud}}, remote]" && ' diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 9e881ac1b40..19b612d0532 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -218,18 +218,12 @@ def _get_cloud_dependencies_installation_commands( if isinstance(cloud, clouds.Azure): # azure-cli cannot be normally installed by uv. # See comments in sky/skylet/constants.py. - azure_cli_dep = [ - dep for dep in cloud_python_dependencies - if dep.startswith('azure-cli') - ] - assert len(azure_cli_dep) == 1, cloud_python_dependencies - azure_cli_dep = azure_cli_dep[0] - cloud_python_dependencies.remove(azure_cli_dep) + cloud_python_dependencies.remove(dependencies.AZURE_CLI) step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( f'echo -en "\\r{step_prefix}azure-cli{empty_str}" &&' - f'uv pip install --prerelease=allow "{azure_cli_dep}" ' + f'uv pip install --prerelease=allow "{dependencies.AZURE_CLI}" ' '> /dev/null 2>&1') elif isinstance(cloud, clouds.Kubernetes): step_prefix = prefix_str.replace('', str(len(commands) + 1)) From 1c0ee290b29b33f70a6633aeefa82dac72eff3dd Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Wed, 27 Nov 2024 11:47:40 -0800 Subject: [PATCH 12/16] fix lint for dependencies --- sky/setup_files/dependencies.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/sky/setup_files/dependencies.py b/sky/setup_files/dependencies.py index 742e69aa423..18d2f5cdc08 100644 --- a/sky/setup_files/dependencies.py +++ b/sky/setup_files/dependencies.py @@ -35,7 +35,8 @@ 'packaging', 'psutil', 'pulp', - # Cython 3.0 release breaks PyYAML 5.4.* (https://github.com/yaml/pyyaml/issues/601) + # Cython 3.0 release breaks PyYAML 5.4.* + # (https://github.com/yaml/pyyaml/issues/601) # <= 3.13 may encounter https://github.com/ultralytics/yolov5/issues/414 'pyyaml > 3.13, != 5.4.*', 'requests', @@ -50,27 +51,30 @@ ] remote = [ - # Adopted from ray's setup.py: https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py - # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard fails to start when - # ray start is called (#2054). + # Adopted from ray's setup.py: + # https://github.com/ray-project/ray/blob/ray-2.4.0/python/setup.py + # SkyPilot: != 1.48.0 is required to avoid the error where ray dashboard + # fails to start when ray start is called (#2054). # Tracking issue: https://github.com/ray-project/ray/issues/30984 - "grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < '3.10' and sys_platform == 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= '3.10' and sys_platform == 'darwin'", # noqa:E501 + 'grpcio >= 1.32.0, <= 1.49.1, != 1.48.0; python_version < \'3.10\' and sys_platform == \'darwin\'', # noqa:E501 pylint: disable=line-too-long + 'grpcio >= 1.42.0, <= 1.49.1, != 1.48.0; python_version >= \'3.10\' and sys_platform == \'darwin\'', # noqa:E501 pylint: disable=line-too-long # Original issue: https://github.com/ray-project/ray/issues/33833 - "grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < '3.10' and sys_platform != 'darwin'", # noqa:E501 - "grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= '3.10' and sys_platform != 'darwin'", # noqa:E501 + 'grpcio >= 1.32.0, <= 1.51.3, != 1.48.0; python_version < \'3.10\' and sys_platform != \'darwin\'', # noqa:E501 pylint: disable=line-too-long + 'grpcio >= 1.42.0, <= 1.51.3, != 1.48.0; python_version >= \'3.10\' and sys_platform != \'darwin\'', # noqa:E501 pylint: disable=line-too-long # Adopted from ray's setup.py: # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L343 'protobuf >= 3.15.3, != 3.19.5', # Some pydantic versions are not compatible with ray. Adopted from ray's - # setup.py: https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254 + # setup.py: + # https://github.com/ray-project/ray/blob/ray-2.9.3/python/setup.py#L254 'pydantic!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,<3', ] # NOTE: Change the templates/jobs-controller.yaml.j2 file if any of the # following packages dependencies are changed. aws_dependencies = [ - # botocore does not work with urllib3>=2.0.0, according to https://github.com/boto/botocore/issues/2926 + # botocore does not work with urllib3>=2.0.0, according to + # https://github.com/boto/botocore/issues/2926 # We have to explicitly pin the version to optimize the time for # poetry install. See https://github.com/orgs/python-poetry/discussions/7937 'urllib3<2', @@ -106,8 +110,8 @@ 'msgraph-sdk', ] + local_ray, # We need google-api-python-client>=2.69.0 to enable 'discardLocalSsd' - # parameter for stopping instances. - # Reference: https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6 + # parameter for stopping instances. Reference: + # https://github.com/googleapis/google-api-python-client/commit/f6e9d3869ed605b06f7cbf2e8cf2db25108506e6 'gcp': ['google-api-python-client>=2.69.0', 'google-cloud-storage'], 'ibm': [ 'ibm-cloud-sdk-core', 'ibm-vpc', 'ibm-platform-services', 'ibm-cos-sdk' @@ -130,7 +134,7 @@ # https://peps.python.org/pep-0440/#direct-references # We have the instruction for its installation in our # docs instead. - # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' + # 'vsphere-automation-sdk @ git+https://github.com/vmware/vsphere-automation-sdk-python.git@v8.0.1.0' pylint: disable=line-too-long ], } From a392afd15d87f04047f02e792fb0c6f83f535fee Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 3 Dec 2024 11:32:51 -0800 Subject: [PATCH 13/16] use runpy.run_path rather than modifying sys.path --- sky/setup_files/setup.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index c7820dd34a9..921c0574e6d 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -18,6 +18,7 @@ import os import platform import re +import runpy import subprocess import sys @@ -26,17 +27,17 @@ # __file__ is setup.py at the root of the repo. We shouldn't assume it's a # symlink - e.g. in the sdist it's resolved to a normal file. ROOT_DIR = os.path.dirname(__file__) -SETUP_FILE_DIR = os.path.join(ROOT_DIR, 'sky', 'setup_files') +DEPENDENCIES_FILE_PATH = os.path.join(ROOT_DIR, 'sky', 'setup_files', 'dependencies.py') INIT_FILE_PATH = os.path.join(ROOT_DIR, 'sky', '__init__.py') _COMMIT_FAILURE_MESSAGE = ( 'WARNING: SkyPilot fail to {verb} the commit hash in ' f'{INIT_FILE_PATH!r} (SkyPilot can still be normally used): ' '{error}') -# setuptools does not include the script dir on the search path, so manually add -# it so that we can import the dependencies file. -sys.path.append(SETUP_FILE_DIR) -import dependencies # pylint: disable=wrong-import-position +# setuptools does not include the script dir on the search path, so we can't +# just do `import dependencies`. Instead, use runpy to manually load it. Note: +# dependencies here is a dict, not a module, so we access it by subscripting. +dependencies = runpy.run_path(DEPENDENCIES_FILE_PATH) original_init_content = None @@ -163,8 +164,8 @@ def parse_readme(readme: str) -> str: long_description_content_type='text/markdown', setup_requires=['wheel'], requires_python='>=3.7', - install_requires=dependencies.install_requires, - extras_require=dependencies.extras_require, + install_requires=dependencies['install_requires'], + extras_require=dependencies['extras_require'], entry_points={ 'console_scripts': ['sky = sky.cli:cli'], }, From 4fda5fef2dffca9694ea4312e6b5798be5dec879 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 3 Dec 2024 11:57:39 -0800 Subject: [PATCH 14/16] fix cloud dependency installation commands --- sky/utils/controller_utils.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 19b612d0532..4b44b0d25bf 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -18,6 +18,7 @@ from sky import sky_logging from sky import skypilot_config from sky.adaptors import cloudflare +from sky.clouds import gcp from sky.data import data_utils from sky.data import storage as storage_lib from sky.jobs import constants as managed_job_constants @@ -206,10 +207,7 @@ def _get_cloud_dependencies_installation_commands( step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append(f'echo -en "\\r{step_prefix}uv{empty_str}" &&' - 'export PATH="$PATH:$HOME/.local/bin" &&' - f'uv -V > /dev/null 2>&1 ||' - 'curl -LsSf https://astral.sh/uv/install.sh 2>/dev/null |' - 'UV_INSTALL_DIR="$HOME/.local/bin" sh >/dev/null 2>&1') + f'{constants.SKY_UV_INSTALL_CMD} >/dev/null 2>&1') for cloud in sky_check.get_cached_enabled_clouds_or_refresh(): cloud_python_dependencies: List[str] = dependencies.extras_require[ @@ -223,8 +221,13 @@ def _get_cloud_dependencies_installation_commands( step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( f'echo -en "\\r{step_prefix}azure-cli{empty_str}" &&' - f'uv pip install --prerelease=allow "{dependencies.AZURE_CLI}" ' - '> /dev/null 2>&1') + f'{constants.SKY_UV_PIP_CMD} install --prerelease=allow ' + f'"{dependencies.AZURE_CLI}" > /dev/null 2>&1') + elif isinstance(cloud, clouds.GCP): + step_prefix = prefix_str.replace('', str(len(commands) + 1)) + commands.append( + f'echo -en "\\r{step_prefix}GCP SDK{empty_str}" &&' + f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') elif isinstance(cloud, clouds.Kubernetes): step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( @@ -265,7 +268,8 @@ def _get_cloud_dependencies_installation_commands( step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( f'echo -en "\\r{step_prefix}python dependencies{empty_str}" && ' - f'uv pip install {packages_string} > /dev/null 2>&1') + f'{constants.SKY_UV_PIP_CMD} install {packages_string} > /dev/null 2>&1' + ) total_commands = len(commands) finish_prefix = prefix_str.replace('[/] ', ' ') From 9e188a6cf2dedf0e4b7bd2a071cd34f4965b2d44 Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 3 Dec 2024 12:03:58 -0800 Subject: [PATCH 15/16] lint --- sky/setup_files/setup.py | 3 ++- sky/utils/controller_utils.py | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index 921c0574e6d..121f96d8e8b 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -27,7 +27,8 @@ # __file__ is setup.py at the root of the repo. We shouldn't assume it's a # symlink - e.g. in the sdist it's resolved to a normal file. ROOT_DIR = os.path.dirname(__file__) -DEPENDENCIES_FILE_PATH = os.path.join(ROOT_DIR, 'sky', 'setup_files', 'dependencies.py') +DEPENDENCIES_FILE_PATH = os.path.join(ROOT_DIR, 'sky', 'setup_files', + 'dependencies.py') INIT_FILE_PATH = os.path.join(ROOT_DIR, 'sky', '__init__.py') _COMMIT_FAILURE_MESSAGE = ( 'WARNING: SkyPilot fail to {verb} the commit hash in ' diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 4b44b0d25bf..8ac96565862 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -225,9 +225,8 @@ def _get_cloud_dependencies_installation_commands( f'"{dependencies.AZURE_CLI}" > /dev/null 2>&1') elif isinstance(cloud, clouds.GCP): step_prefix = prefix_str.replace('', str(len(commands) + 1)) - commands.append( - f'echo -en "\\r{step_prefix}GCP SDK{empty_str}" &&' - f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') + commands.append(f'echo -en "\\r{step_prefix}GCP SDK{empty_str}" &&' + f'{gcp.GOOGLE_SDK_INSTALLATION_COMMAND}') elif isinstance(cloud, clouds.Kubernetes): step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( From 478c56e9d107b609003d7790b6d8bb31c5c0047b Mon Sep 17 00:00:00 2001 From: Christopher Cooper Date: Tue, 3 Dec 2024 13:32:26 -0800 Subject: [PATCH 16/16] Update sky/utils/controller_utils.py Co-authored-by: Zhanghao Wu --- sky/utils/controller_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index 8ac96565862..624c8cd94f0 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -266,7 +266,7 @@ def _get_cloud_dependencies_installation_commands( packages_string = ' '.join([f'"{package}"' for package in python_packages]) step_prefix = prefix_str.replace('', str(len(commands) + 1)) commands.append( - f'echo -en "\\r{step_prefix}python dependencies{empty_str}" && ' + f'echo -en "\\r{step_prefix}cloud python packages{empty_str}" && ' f'{constants.SKY_UV_PIP_CMD} install {packages_string} > /dev/null 2>&1' )