diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index b5e8f5e358a..74685e344e2 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -127,6 +127,10 @@ class ReservedClusterGroup(enum.Enum): f'sky spot queue{colorama.Style.RESET_ALL}'), decline_stop_hint=('Spot controller will be auto-stopped after all ' 'spot jobs finish.'), + decline_cancel_hint=( + 'Cancelling the spot controller\'s jobs is not allowed.\nTo cancel ' + f'spot jobs, use: {colorama.Style.BRIGHT}sky spot cancel [--all]{colorama.Style.RESET_ALL}'), check_cluster_name_hint=( f'Cluster {spot_lib.SPOT_CONTROLLER_NAME} is reserved for ' 'managed spot controller. ')) @@ -138,6 +142,8 @@ class ReservedClusterGroup(enum.Enum): f'sky serve status{colorama.Style.RESET_ALL}'), decline_stop_hint=(f'To teardown a service, use {colorama.Style.BRIGHT}' f'sky serve down{colorama.Style.RESET_ALL}.'), + decline_cancel_hint=( + 'Cancelling the sky serve controller\'s jobs is not allowed.'), check_cluster_name_hint=( f'Cluster prefix {serve_lib.CONTROLLER_PREFIX} is reserved for ' 'sky serve controller. ')) diff --git a/sky/cli.py b/sky/cli.py index 4ce6159fb79..d8b9b23d74d 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -51,7 +51,6 @@ from sky import core from sky import exceptions from sky import global_user_state -from sky import serve as serve_lib from sky import sky_logging from sky import spot as spot_lib from sky import status_lib @@ -2102,8 +2101,6 @@ def cancel(cluster: str, all: bool, jobs: List[int], yes: bool): # pylint: disa Job IDs can be looked up by ``sky queue cluster_name``. """ - bold = colorama.Style.BRIGHT - reset = colorama.Style.RESET_ALL job_identity_str = None job_ids_to_cancel = None if not jobs and not all: @@ -2131,18 +2128,9 @@ def cancel(cluster: str, all: bool, jobs: List[int], yes: bool): # pylint: disa try: core.cancel(cluster, all=all, job_ids=job_ids_to_cancel) except exceptions.NotSupportedError: - if cluster == spot_lib.SPOT_CONTROLLER_NAME: - # Friendly message for usage like 'sky cancel - # -a/'. - error_str = ( - 'Cancelling the spot controller\'s jobs is not allowed.' - f'\nTo cancel spot jobs, use: {bold}sky spot cancel [--all]{reset}') - else: - assert cluster.startswith(serve_lib.CONTROLLER_PREFIX) - error_str = ( - 'Cancelling the sky serve controller\'s jobs is not allowed.') - click.echo(error_str) + group = backend_utils.ReservedClusterGroup.get_group(cluster) + assert group is not None + click.echo(group.value.decline_cancel_hint) sys.exit(1) except ValueError as e: raise click.UsageError(str(e)) diff --git a/sky/core.py b/sky/core.py index a0cbc01207c..26a227a3add 100644 --- a/sky/core.py +++ b/sky/core.py @@ -184,17 +184,20 @@ def _start( f'Starting cluster {cluster_name!r} with backend {backend.NAME} ' 'is not supported.') - if cluster_name == spot.SPOT_CONTROLLER_NAME: + if backend_utils.ReservedClusterGroup.get_group(cluster_name) is not None: if down: raise ValueError('Using autodown (rather than autostop) is not ' - 'supported for the spot controller. Pass ' + 'supported for skypilot controllers. Pass ' '`down=False` or omit it instead.') if idle_minutes_to_autostop is not None: raise ValueError( 'Passing a custom autostop setting is currently not ' - 'supported when starting the spot controller. To ' + 'supported when starting skypilot controllers. To ' 'fix: omit the `idle_minutes_to_autostop` argument to use the ' f'default autostop settings (got: {idle_minutes_to_autostop}).') + # TODO(tian): Maybe we should merge the two MINUTES_TO_AUTOSTOP + # together. Currently, the two value is the same so we just use spot + # constant here. idle_minutes_to_autostop = spot.SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP # NOTE: if spot_queue() calls _start() and hits here, that entrypoint