Skip to content

Commit

Permalink
merge master
Browse files Browse the repository at this point in the history
  • Loading branch information
zpoint committed Dec 4, 2024
2 parents bef1cf1 + 51a7e17 commit 60c5c97
Show file tree
Hide file tree
Showing 41 changed files with 688 additions and 444 deletions.
20 changes: 13 additions & 7 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,35 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install yapf==0.32.0
pip install toml==0.10.2
pip install black==22.10.0
pip install isort==5.12.0
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install yapf==0.32.0
uv pip install toml==0.10.2
uv pip install black==22.10.0
uv pip install isort==5.12.0
- name: Running yapf
run: |
source ~/test-env/bin/activate
yapf --diff --recursive ./ --exclude 'sky/skylet/ray_patches/**' \
--exclude 'sky/skylet/providers/ibm/**'
- name: Running black
run: |
source ~/test-env/bin/activate
black --diff --check sky/skylet/providers/ibm/
- name: Running isort for black formatted files
run: |
source ~/test-env/bin/activate
isort --diff --check --profile black -l 88 -m 3 \
sky/skylet/providers/ibm/
- name: Running isort for yapf formatted files
run: |
source ~/test-env/bin/activate
isort --diff --check ./ --sg 'sky/skylet/ray_patches/**' \
--sg 'sky/skylet/providers/ibm/**'
22 changes: 0 additions & 22 deletions .github/workflows/mypy-generic.yml

This file was deleted.

15 changes: 10 additions & 5 deletions .github/workflows/mypy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ on:
branches:
- master
- 'releases/**'
merge_group:

jobs:
mypy:
runs-on: ubuntu-latest
Expand All @@ -19,15 +21,18 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install mypy==$(grep mypy requirements-dev.txt | cut -d'=' -f3)
pip install $(grep types- requirements-dev.txt | tr '\n' ' ')
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install mypy==$(grep mypy requirements-dev.txt | cut -d'=' -f3)
uv pip install $(grep types- requirements-dev.txt | tr '\n' ' ')
- name: Running mypy
run: |
source ~/test-env/bin/activate
mypy $(cat tests/mypy_files.txt)
16 changes: 10 additions & 6 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,20 @@ jobs:
python-version: ["3.8"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install ".[all]"
pip install pylint==2.14.5
pip install pylint-quotes==0.2.3
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
uv pip install ".[all]"
uv pip install pylint==2.14.5
uv pip install pylint-quotes==0.2.3
- name: Analysing the code with pylint
run: |
source ~/test-env/bin/activate
pylint --load-plugins pylint_quotes sky
31 changes: 13 additions & 18 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,26 +35,21 @@ jobs:
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}

- name: Cache dependencies
uses: actions/cache@v3
if: startsWith(runner.os, 'Linux')
with:
path: ~/.cache/pip
key: ${{ runner.os }}-pip-pytest-${{ matrix.python-version }}
restore-keys: |
${{ runner.os }}-pip-pytest-${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -e ".[all]"
pip install pytest pytest-xdist pytest-env>=0.6 memory-profiler==0.61.0
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
# Use -e to include examples and tests folder in the path for unit
# tests to access them.
uv pip install -e ".[all]"
uv pip install pytest pytest-xdist pytest-env>=0.6 memory-profiler==0.61.0
- name: Run tests with pytest
run: SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 0 --dist no ${{ matrix.test-path }}
run: |
source ~/test-env/bin/activate
SKYPILOT_DISABLE_USAGE_COLLECTION=1 SKYPILOT_SKIP_CLOUD_IDENTITY_CHECK=1 pytest -n 0 --dist no ${{ matrix.test-path }}
16 changes: 10 additions & 6 deletions .github/workflows/test-doc-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,28 @@ on:
merge_group:

jobs:
format:
doc-build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
- name: Install the latest version of uv
uses: astral-sh/setup-uv@v4
with:
version: "latest"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .
uv venv --seed ~/test-env
source ~/test-env/bin/activate
uv pip install --prerelease=allow "azure-cli>=2.65.0"
uv pip install ".[all]"
cd docs
pip install -r ./requirements-docs.txt
uv pip install -r ./requirements-docs.txt
- name: Build documentation
run: |
source ~/test-env/bin/activate
cd ./docs
./build.sh
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,10 +155,11 @@ SkyPilot then performs the heavy-lifting for you, including:
Refer to [Quickstart](https://skypilot.readthedocs.io/en/latest/getting-started/quickstart.html) to get started with SkyPilot.

## More Information
To learn more, see our [documentation](https://skypilot.readthedocs.io/en/latest/), [blog](https://blog.skypilot.co/), and [community integrations](https://blog.skypilot.co/community/).
To learn more, see [Concept: Sky Computing](https://docs.skypilot.co/en/latest/sky-computing.html), [SkyPilot docs](https://skypilot.readthedocs.io/en/latest/), and [SkyPilot blog](https://blog.skypilot.co/).

<!-- Keep this section in sync with index.rst in SkyPilot Docs -->
Runnable examples:
- [**AI Gallery**](https://docs.skypilot.co/en/latest/gallery/index.html)
- LLMs on SkyPilot
- [Llama 3.2: lightweight and vision models](./llm/llama-3_2/)
- [Pixtral](./llm/pixtral/)
Expand Down
4 changes: 4 additions & 0 deletions docs/source/docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,14 @@ You can chat with the SkyPilot team and community on the `SkyPilot Slack <http:/
Learn more
--------------------------

To learn more, see :ref:`Concept: Sky Computing <sky-computing>` and `SkyPilot blog <https://blog.skypilot.co/>`_.

Runnable examples:

.. Keep this section in sync with README.md in SkyPilot repo
* :ref:`AI Gallery <ai-gallery>`

* **LLMs on SkyPilot**

* `Llama 3.2: lightweight and vision models <https://github.com/skypilot-org/skypilot/tree/master/llm/llama-3_2>`_
Expand Down
2 changes: 2 additions & 0 deletions sky/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def set_proxy_env_var(proxy_var: str, urllib_var: Optional[str]):
from sky.data import StoreType
from sky.execution import exec # pylint: disable=redefined-builtin
from sky.execution import launch
from sky.jobs import ManagedJobStatus
# TODO (zhwu): These imports are for backward compatibility, and spot APIs
# should be called with `sky.spot.xxx` instead. Remove in release 0.8.0
from sky.jobs.core import spot_cancel
Expand Down Expand Up @@ -163,6 +164,7 @@ def set_proxy_env_var(proxy_var: str, urllib_var: Optional[str]):
'StoreType',
'ClusterStatus',
'JobStatus',
'ManagedJobStatus',
# APIs
'Dag',
'Task',
Expand Down
9 changes: 5 additions & 4 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,14 +1612,14 @@ def check_can_clone_disk_and_override_task(
The task to use and the resource handle of the source cluster.
Raises:
ValueError: If the source cluster does not exist.
exceptions.ClusterDoesNotExist: If the source cluster does not exist.
exceptions.NotSupportedError: If the source cluster is not valid or the
task is not compatible to clone disk from the source cluster.
"""
source_cluster_status, handle = refresh_cluster_status_handle(cluster_name)
if source_cluster_status is None:
with ux_utils.print_exception_no_traceback():
raise ValueError(
raise exceptions.ClusterDoesNotExist(
f'Cannot find cluster {cluster_name!r} to clone disk from.')

if not isinstance(handle, backends.CloudVmRayResourceHandle):
Expand Down Expand Up @@ -2136,7 +2136,7 @@ def check_cluster_available(
"""Check if the cluster is available.
Raises:
ValueError: if the cluster does not exist.
exceptions.ClusterDoesNotExist: if the cluster does not exist.
exceptions.ClusterNotUpError: if the cluster is not UP.
exceptions.NotSupportedError: if the cluster is not based on
CloudVmRayBackend.
Expand Down Expand Up @@ -2201,7 +2201,8 @@ def check_cluster_available(
error_msg += message

with ux_utils.print_exception_no_traceback():
raise ValueError(f'{colorama.Fore.YELLOW}{error_msg}{reset}')
raise exceptions.ClusterDoesNotExist(
f'{colorama.Fore.YELLOW}{error_msg}{reset}')
assert cluster_status is not None, 'handle is not None but status is None'
backend = get_backend_from_handle(handle)
if check_cloud_vm_ray_backend and not isinstance(
Expand Down
34 changes: 27 additions & 7 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ def add_prologue(self, job_id: int) -> None:
)
def get_or_fail(futures, pg) -> List[int]:
\"\"\"Wait for tasks, if any fails, cancel all unready.\"\"\"
if not futures:
return []
returncodes = [1] * len(futures)
# Wait for 1 task to be ready.
ready = []
Expand Down Expand Up @@ -3460,15 +3462,33 @@ def _execute(
Returns:
Job id if the task is submitted to the cluster, None otherwise.
"""
if task.run is None:
if task.run is None and self._setup_cmd is None:
# This message is fine without mentioning setup, as there are three
# cases when run section is empty:
# 1. setup specified, no --detach-setup: setup is executed and this
# message is fine for saying no run command specified.
# 2. setup specified, with --detach-setup: setup is executed in
# detached mode and this message will not be shown.
# 3. no setup specified: this message is fine as a user is likely
# creating a cluster only, and ok with the empty run command.
logger.info('Run commands not specified or empty.')
return None
# Check the task resources vs the cluster resources. Since `sky exec`
# will not run the provision and _check_existing_cluster
# We need to check ports here since sky.exec shouldn't change resources
valid_resource = self.check_resources_fit_cluster(handle,
task,
check_ports=True)
if task.run is None:
# If the task has no run command, we still need to execute the
# generated ray driver program to run the setup command in detached
# mode.
# In this case, we reset the resources for the task, so that the
# detached setup does not need to wait for the task resources to be
# ready (which is not used for setup anyway).
valid_resource = sky.Resources()
else:
# Check the task resources vs the cluster resources. Since
# `sky exec` will not run the provision and _check_existing_cluster
# We need to check ports here since sky.exec shouldn't change
# resources.
valid_resource = self.check_resources_fit_cluster(handle,
task,
check_ports=True)
task_copy = copy.copy(task)
# Handle multiple resources exec case.
task_copy.set_resources(valid_resource)
Expand Down
13 changes: 11 additions & 2 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3914,16 +3914,25 @@ def jobs_cancel(name: Optional[str], job_ids: Tuple[int], all: bool, yes: bool):
default=False,
help=('Show the controller logs of this job; useful for debugging '
'launching/recoveries, etc.'))
@click.option(
'--refresh',
'-r',
default=False,
is_flag=True,
required=False,
help='Query the latest job logs, restarting the jobs controller if stopped.'
)
@click.argument('job_id', required=False, type=int)
@usage_lib.entrypoint
def jobs_logs(name: Optional[str], job_id: Optional[int], follow: bool,
controller: bool):
controller: bool, refresh: bool):
"""Tail the log of a managed job."""
try:
managed_jobs.tail_logs(name=name,
job_id=job_id,
follow=follow,
controller=controller)
controller=controller,
refresh=refresh)
except exceptions.ClusterNotUpError:
with ux_utils.print_exception_no_traceback():
raise
Expand Down
5 changes: 3 additions & 2 deletions sky/clouds/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class Kubernetes(clouds.Cloud):
SKY_SSH_KEY_SECRET_NAME = 'sky-ssh-keys'
SKY_SSH_JUMP_NAME = 'sky-ssh-jump-pod'

LEGACY_SINGLETON_REGION = 'kubernetes'

# Limit the length of the cluster name to avoid exceeding the limit of 63
# characters for Kubernetes resources. We limit to 42 characters (63-21) to
# allow additional characters for creating ingress services to expose ports.
Expand All @@ -54,7 +56,6 @@ class Kubernetes(clouds.Cloud):
_DEFAULT_MEMORY_CPU_RATIO = 1
_DEFAULT_MEMORY_CPU_RATIO_WITH_GPU = 4 # Allocate more memory for GPU tasks
_REPR = 'Kubernetes'
_LEGACY_SINGLETON_REGION = 'kubernetes'
_CLOUD_UNSUPPORTED_FEATURES = {
# TODO(romilb): Stopping might be possible to implement with
# container checkpointing introduced in Kubernetes v1.25. See:
Expand Down Expand Up @@ -630,7 +631,7 @@ def instance_type_exists(self, instance_type: str) -> bool:
instance_type)

def validate_region_zone(self, region: Optional[str], zone: Optional[str]):
if region == self._LEGACY_SINGLETON_REGION:
if region == self.LEGACY_SINGLETON_REGION:
# For backward compatibility, we allow the region to be set to the
# legacy singleton region.
# TODO: Remove this after 0.9.0.
Expand Down
Loading

0 comments on commit 60c5c97

Please sign in to comment.