Skip to content

Commit

Permalink
[Core] Execute setup when --detach-setup and no run section (#4430)
Browse files Browse the repository at this point in the history
* Execute setup when --detach-setup and no run section

* Update sky/backends/cloud_vm_ray_backend.py

Co-authored-by: Tian Xia <[email protected]>

* add comments

* Fix types

* format

* minor

* Add test for detach setup only

---------

Co-authored-by: Tian Xia <[email protected]>
  • Loading branch information
Michaelvll and cblmemo authored Dec 3, 2024
1 parent 6f96e7a commit c3c1fde
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 76 deletions.
2 changes: 2 additions & 0 deletions sky/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ def set_proxy_env_var(proxy_var: str, urllib_var: Optional[str]):
from sky.data import StoreType
from sky.execution import exec # pylint: disable=redefined-builtin
from sky.execution import launch
from sky.jobs import ManagedJobStatus
# TODO (zhwu): These imports are for backward compatibility, and spot APIs
# should be called with `sky.spot.xxx` instead. Remove in release 0.8.0
from sky.jobs.core import spot_cancel
Expand Down Expand Up @@ -163,6 +164,7 @@ def set_proxy_env_var(proxy_var: str, urllib_var: Optional[str]):
'StoreType',
'ClusterStatus',
'JobStatus',
'ManagedJobStatus',
# APIs
'Dag',
'Task',
Expand Down
34 changes: 27 additions & 7 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,8 @@ def add_prologue(self, job_id: int) -> None:
)
def get_or_fail(futures, pg) -> List[int]:
\"\"\"Wait for tasks, if any fails, cancel all unready.\"\"\"
if not futures:
return []
returncodes = [1] * len(futures)
# Wait for 1 task to be ready.
ready = []
Expand Down Expand Up @@ -3460,15 +3462,33 @@ def _execute(
Returns:
Job id if the task is submitted to the cluster, None otherwise.
"""
if task.run is None:
if task.run is None and self._setup_cmd is None:
# This message is fine without mentioning setup, as there are three
# cases when run section is empty:
# 1. setup specified, no --detach-setup: setup is executed and this
# message is fine for saying no run command specified.
# 2. setup specified, with --detach-setup: setup is executed in
# detached mode and this message will not be shown.
# 3. no setup specified: this message is fine as a user is likely
# creating a cluster only, and ok with the empty run command.
logger.info('Run commands not specified or empty.')
return None
# Check the task resources vs the cluster resources. Since `sky exec`
# will not run the provision and _check_existing_cluster
# We need to check ports here since sky.exec shouldn't change resources
valid_resource = self.check_resources_fit_cluster(handle,
task,
check_ports=True)
if task.run is None:
# If the task has no run command, we still need to execute the
# generated ray driver program to run the setup command in detached
# mode.
# In this case, we reset the resources for the task, so that the
# detached setup does not need to wait for the task resources to be
# ready (which is not used for setup anyway).
valid_resource = sky.Resources()
else:
# Check the task resources vs the cluster resources. Since
# `sky exec` will not run the provision and _check_existing_cluster
# We need to check ports here since sky.exec shouldn't change
# resources.
valid_resource = self.check_resources_fit_cluster(handle,
task,
check_ports=True)
task_copy = copy.copy(task)
# Handle multiple resources exec case.
task_copy.set_resources(valid_resource)
Expand Down
Loading

0 comments on commit c3c1fde

Please sign in to comment.