Skip to content

Commit

Permalink
use flag to control logging
Browse files Browse the repository at this point in the history
  • Loading branch information
cblmemo committed Sep 13, 2023
1 parent f4bdbdb commit 78d7342
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 22 deletions.
19 changes: 9 additions & 10 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2593,6 +2593,7 @@ def __init__(self):
self._dag = None
self._optimize_target = None
self._requested_features = set()
self._minimize_logging = False

# Command for running the setup script. It is only set when the
# setup needs to be run outside the self._setup() and as part of
Expand All @@ -2608,6 +2609,8 @@ def register_info(self, **kwargs) -> None:
self._optimize_target) or optimizer.OptimizeTarget.COST
self._requested_features = kwargs.pop('requested_features',
self._requested_features)
self._minimize_logging = kwargs.pop('minimize_logging',
self._minimize_logging)
assert len(kwargs) == 0, f'Unexpected kwargs: {kwargs}'

def check_resources_fit_cluster(self, handle: CloudVmRayResourceHandle,
Expand Down Expand Up @@ -3226,7 +3229,7 @@ def _exec_code_on_head(
f'Failed to submit job {job_id}.',
stderr=stdout + stderr)

if not handle.cluster_name.startswith(serve_lib.CONTROLLER_PREFIX):
if not self._minimize_logging:
logger.info('Job submitted with Job ID: '
f'{style.BRIGHT}{job_id}{style.RESET_ALL}')

Expand Down Expand Up @@ -3259,9 +3262,7 @@ def _exec_code_on_head(
'\nTo view the spot job dashboard:\t'
f'{backend_utils.BOLD}sky spot dashboard'
f'{backend_utils.RESET_BOLD}')
elif not name.startswith(serve_lib.CONTROLLER_PREFIX):
# Skip logging for submit controller & load balancer jobs
# to skyserve controller cluster
elif not self._minimize_logging:
logger.info(f'{fore.CYAN}Job ID: '
f'{style.BRIGHT}{job_id}{style.RESET_ALL}'
'\nTo cancel the job:\t'
Expand Down Expand Up @@ -3384,8 +3385,7 @@ def _post_execute(self, handle: CloudVmRayResourceHandle,
fore = colorama.Fore
style = colorama.Style
name = handle.cluster_name
if (name == spot_lib.SPOT_CONTROLLER_NAME or down or
name.startswith(serve_lib.CONTROLLER_PREFIX)):
if down or self._minimize_logging:
return
stop_str = ('\nTo stop the cluster:'
f'\t{backend_utils.BOLD}sky stop {name}'
Expand Down Expand Up @@ -3503,8 +3503,7 @@ def get_job_status(
def cancel_jobs(self,
handle: CloudVmRayResourceHandle,
jobs: Optional[List[int]],
cancel_all: bool = False,
silent: bool = False) -> None:
cancel_all: bool = False) -> None:
"""Cancels jobs.
CloudVMRayBackend specific method.
Expand Down Expand Up @@ -3537,7 +3536,7 @@ def cancel_jobs(self,

cancelled_ids = common_utils.decode_payload(stdout)

if silent:
if self._minimize_logging:
return

if cancelled_ids:
Expand Down Expand Up @@ -4284,7 +4283,7 @@ def _check_existing_cluster(
f'{cluster_name!r} [Username: {ssh_user}].'
f'{colorama.Style.RESET_ALL}\n'
'Run `sky status` to see existing clusters.')
elif not cluster_name.startswith(serve_lib.CONTROLLER_PREFIX):
elif not self._minimize_logging:
logger.info(
f'{colorama.Fore.CYAN}Creating a new cluster: "{cluster_name}" '
f'[{task.num_nodes}x {to_provision}].'
Expand Down
3 changes: 2 additions & 1 deletion sky/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,13 +1179,14 @@ def serve_down(service_name: str, purge: bool = False) -> None:
if handle is not None:
assert isinstance(handle, backends.CloudVmRayResourceHandle)
backend = backends.CloudVmRayBackend()
backend.register_info(minimize_logging=True)

# Cancel the controller and load balancer jobs.
# For the case when controller / load_balancer job failed to submit.
jobs = []
if service_handle.job_id is not None:
jobs.append(service_handle.job_id)
backend.cancel_jobs(handle, jobs=jobs, silent=True)
backend.cancel_jobs(handle, jobs=jobs)

# Cleanup all files on controller related to this service.
# We have a 10-min grace period for the controller to autostop,
Expand Down
22 changes: 11 additions & 11 deletions sky/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,7 @@ def _execute(
idle_minutes_to_autostop: Optional[int] = None,
no_setup: bool = False,
clone_disk_from: Optional[str] = None,
minimize_logging: bool = False,
# Internal only:
# pylint: disable=invalid-name
_is_launched_by_spot_controller: bool = False,
Expand Down Expand Up @@ -323,7 +324,8 @@ def _execute(

backend.register_info(dag=dag,
optimize_target=optimize_target,
requested_features=requested_features)
requested_features=requested_features,
minimize_logging=minimize_logging)

if task.storage_mounts is not None:
# Optimizer should eventually choose where to store bucket
Expand Down Expand Up @@ -380,26 +382,21 @@ def _execute(
backend.teardown_ephemeral_storage(task)
backend.teardown(handle, terminate=True)
finally:
if (cluster_name != spot.SPOT_CONTROLLER_NAME and
cluster_name is not None and
not cluster_name.startswith(serve.CONTROLLER_PREFIX)):
if not minimize_logging:
# UX: print live clusters to make users aware (to save costs).
#
# Don't print if this job is launched by the spot controller,
# because spot jobs are serverless, there can be many of them, and
# users tend to continuously monitor spot jobs using `sky spot
# status`.
# status`. Also don't print if this job is a skyserve controller
# job.
#
# Disable the usage collection for this status command.
env = dict(os.environ,
**{env_options.Options.DISABLE_LOGGING.value: '1'})
subprocess_utils.run('sky status --no-show-spot-jobs', env=env)
# UX: Don't show cursor if we are initializing a skyserve controller,
# since it will mess up the progress bar.
if (cluster_name is None or
not cluster_name.startswith(serve.CONTROLLER_PREFIX)):
print()
print('\x1b[?25h', end='') # Show cursor.
print()
print('\x1b[?25h', end='') # Show cursor.
return job_id


Expand Down Expand Up @@ -798,6 +795,7 @@ def spot_launch(
idle_minutes_to_autostop=spot.
SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
retry_until_up=True,
minimize_logging=True,
)


Expand Down Expand Up @@ -1126,6 +1124,7 @@ def serve_up(
# value and a previous controller could be reused.
idle_minutes_to_autostop=serve.CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
retry_until_up=True,
minimize_logging=True,
)

controller_record = global_user_state.get_cluster_from_name(
Expand All @@ -1140,6 +1139,7 @@ def serve_up(
assert isinstance(handle, backends.CloudVmRayResourceHandle)
backend = backend_utils.get_backend_from_handle(handle)
assert isinstance(backend, backends.CloudVmRayBackend), backend
backend.register_info(minimize_logging=True)
service_handle.endpoint_ip = handle.head_ip
global_user_state.set_service_handle(service_name, service_handle)

Expand Down

0 comments on commit 78d7342

Please sign in to comment.