diff --git a/sky/backends/backend.py b/sky/backends/backend.py index 59887bcc847..7bb6091ea84 100644 --- a/sky/backends/backend.py +++ b/sky/backends/backend.py @@ -3,8 +3,13 @@ from typing import Dict, Generic, Optional import sky +from sky import sky_logging from sky.usage import usage_lib from sky.utils import timeline +from sky.utils.common_utils import adjust_cluster_name +from sky.utils.common_utils import check_cluster_name_is_valid + +logger = sky_logging.init_logger(__name__) if typing.TYPE_CHECKING: from sky import resources @@ -51,6 +56,14 @@ def provision( retry_until_up: bool = False) -> Optional[_ResourceHandleType]: if cluster_name is None: cluster_name = sky.backends.backend_utils.generate_cluster_name() + else: + prev_cluster_name = cluster_name + cluster_name = adjust_cluster_name(cluster_name) + if prev_cluster_name != cluster_name: + logger.info(f'Changed cluster name from ' + f'"{prev_cluster_name}" to "{cluster_name}" ' + f'to ensure name is valid for provisioning.') + check_cluster_name_is_valid(cluster_name) usage_lib.record_cluster_name_for_current_operation(cluster_name) usage_lib.messages.usage.update_actual_task(task) return self._provision(task, to_provision, dryrun, stream_logs, diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index b1518bb9340..5686ace890a 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2009,9 +2009,6 @@ def provision_with_retries( # Retrying launchable resources. while True: try: - # Recheck cluster name as the 'except:' block below may - # change the cloud assignment. - to_provision.cloud.check_cluster_name_is_valid(cluster_name) if dryrun: cloud_user = None else: @@ -4340,12 +4337,6 @@ def _check_existing_cluster( prev_handle=handle, prev_cluster_ever_up=cluster_ever_up) usage_lib.messages.usage.set_new_cluster() - # Use the task_cloud, because the cloud in `to_provision` can be changed - # later during the retry. - for resources in task.resources: - task_cloud = (resources.cloud - if resources.cloud is not None else clouds.Cloud) - task_cloud.check_cluster_name_is_valid(cluster_name) if to_provision is None: # The cluster is recently terminated either by autostop or manually diff --git a/sky/cli.py b/sky/cli.py index 8aa224ffc2c..6b75b480947 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4074,17 +4074,6 @@ def spot_launch( if prompt is not None: click.confirm(prompt, default=True, abort=True, show_default=True) - for task in dag.tasks: - # We try our best to validate the cluster name before we launch the - # task. If the cloud is not specified, this will only validate the - # cluster name against the regex, and the cloud-specific validation will - # be done by the spot controller when actually launching the spot - # cluster. - for resources in task.resources: - task_cloud = (resources.cloud - if resources.cloud is not None else clouds.Cloud) - task_cloud.check_cluster_name_is_valid(name) - sky.spot_launch(dag, name, detach_run=detach_run, diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 3f807019d4f..ba19b0e3eff 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -21,6 +21,7 @@ import jsonschema import yaml +from sky import exceptions from sky import sky_logging from sky.skylet import constants from sky.utils import ux_utils @@ -30,6 +31,11 @@ USER_HASH_LENGTH = 8 USER_HASH_LENGTH_IN_CLUSTER_NAME = 4 +# Arbitrary letter to prepend to cloud cluster name if proposed +# cluster name does not start with a letter. Certain clouds +# require a name that starts with a letter +CLUSTER_NAME_VALID_PREFIX = 'sky-' + # We are using base36 to reduce the length of the hash. 2 chars -> 36^2 = 1296 # possibilities. considering the final cluster name contains the prefix as well, # we should be fine with 2 chars. @@ -117,6 +123,40 @@ def _base36_encode(num: int) -> str: return _base36_encode(int_value) +def adjust_cluster_name(cluster_name: str) -> str: + adjusted_cluster_name_arr = [] + for ch in cluster_name: + if ch.isalnum() or ch == '-': + adjusted_cluster_name_arr.append(ch.lower()) + elif ch in ('.', '_'): + adjusted_cluster_name_arr.append('-') + if not adjusted_cluster_name_arr[0].isalpha(): + adjusted_cluster_name_arr.insert(0, CLUSTER_NAME_VALID_PREFIX) + return ''.join(adjusted_cluster_name_arr) + + +def check_cluster_name_is_valid(cluster_name: str) -> None: + """Errors out on invalid cluster names not supported by cloud providers. + + Bans (including but not limited to) names that: + - are digits-only + - contain underscore (_) + + Raises: + exceptions.InvalidClusterNameError: If the cluster name is invalid. + """ + if cluster_name is None: + return + valid_regex = constants.CLUSTER_NAME_VALID_REGEX + if re.fullmatch(valid_regex, cluster_name) is None: + with ux_utils.print_exception_no_traceback(): + raise exceptions.InvalidClusterNameError( + f'Cluster name "{cluster_name}" is invalid; ' + 'ensure it is fully matched by regex (e.g., ' + 'only contains lower letters, numbers and dash): ' + f'{valid_regex}') + + def make_cluster_name_on_cloud(cluster_name: str, max_length: Optional[int] = 15, add_user_hash: bool = True) -> str: diff --git a/tests/unit_tests/test_common_utils.py b/tests/unit_tests/test_common_utils.py new file mode 100644 index 00000000000..af6317d859f --- /dev/null +++ b/tests/unit_tests/test_common_utils.py @@ -0,0 +1,20 @@ +"""Tests for methods in common_utils.py""" +from sky.utils.common_utils import adjust_cluster_name + + +class TestAdjustClusterName: + + def test_adjust_cluster_name_with_uppercase_letters(self): + assert adjust_cluster_name("LoRA") == "lora" + + def test_adjust_cluster_name_with_underscore(self): + assert adjust_cluster_name("seed_1") == "seed-1" + + def test_adjust_cluster_name_with_period(self): + assert adjust_cluster_name("cuda11-8") == "cuda11-8" + + def test_adjust_cluster_names_starting_with_number(self): + assert adjust_cluster_name("2bert") == "sky-2bert" + + def test_adjust_cluster_name_with_multiple_invalid_characters(self): + assert adjust_cluster_name("2Lo_R.A") == "sky-2lo-r-a"