Skip to content

Commit c8ea12b

Browse files
authored
[perf] optimizations for sky jobs launch (#4341)
* cache AWS get_user_identities With SSO enabled (and maybe without?) this takes about a second. We already use an lru_cache for Azure, do the same here. * skip optimization for sky jobs launch --yes The only reason we call optimize for jobs_launch is to give a preview of the resources we expect to use, and give the user an opportunity to back out if it's not what they expect. If you use --yes or -y, you don't have a chance to back out and you're probably running from a script, where you don't care. Optimization can take ~2 seconds, so just skip it. * update logging * address PR comments
1 parent 334b268 commit c8ea12b

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

sky/cli.py

+15-4
Original file line numberDiff line numberDiff line change
@@ -3699,13 +3699,24 @@ def jobs_launch(
36993699
dag_utils.maybe_infer_and_fill_dag_and_task_names(dag)
37003700
dag_utils.fill_default_config_in_dag_for_job_launch(dag)
37013701

3702-
click.secho(f'Managed job {dag.name!r} will be launched on (estimated):',
3703-
fg='cyan')
37043702
dag, _ = admin_policy_utils.apply(
37053703
dag, use_mutated_config_in_current_request=False)
3706-
dag = sky.optimize(dag)
37073704

3708-
if not yes:
3705+
if yes:
3706+
# Skip resource preview if -y is set, since we are probably running in
3707+
# a script and the user won't have a chance to review it anyway.
3708+
# This can save a couple of seconds.
3709+
click.secho(
3710+
f'Resources for managed job {dag.name!r} will be computed on the '
3711+
'managed jobs controller, since --yes is set.',
3712+
fg='cyan')
3713+
3714+
else:
3715+
click.secho(
3716+
f'Managed job {dag.name!r} will be launched on (estimated):',
3717+
fg='cyan')
3718+
dag = sky.optimize(dag)
3719+
37093720
prompt = f'Launching a managed job {dag.name!r}. Proceed?'
37103721
if prompt is not None:
37113722
click.confirm(prompt, default=True, abort=True, show_default=True)

sky/clouds/aws.py

+1
Original file line numberDiff line numberDiff line change
@@ -663,6 +663,7 @@ def _is_access_key_of_type(type_str: str) -> bool:
663663
return AWSIdentityType.SHARED_CREDENTIALS_FILE
664664

665665
@classmethod
666+
@functools.lru_cache(maxsize=1) # Cache since getting identity is slow.
666667
def get_user_identities(cls) -> Optional[List[List[str]]]:
667668
"""Returns a [UserId, Account] list that uniquely identifies the user.
668669

sky/execution.py

+6
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,12 @@ def _execute(
267267
# no-credential machine should not enter optimize(), which
268268
# would directly error out ('No cloud is enabled...'). Fix
269269
# by moving `sky check` checks out of optimize()?
270+
271+
controller = controller_utils.Controllers.from_name(
272+
cluster_name)
273+
if controller is not None:
274+
logger.info(
275+
f'Choosing resources for {controller.name}...')
270276
dag = sky.optimize(dag, minimize=optimize_target)
271277
task = dag.tasks[0] # Keep: dag may have been deep-copied.
272278
assert task.best_resources is not None, task

0 commit comments

Comments
 (0)