Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core] Relax cluster name restrictions and process cluster names #2743

Closed
Closed
Show file tree
Hide file tree
Changes from 31 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
71ba999
remove cluster name validation
Oct 29, 2023
7779065
Merge branch 'master' into remove-cluster-name-validation
dtran24 Nov 12, 2023
69fcedc
adjust cluster name
Nov 13, 2023
66cf85b
use single quotes
Nov 13, 2023
59f284b
Add cloud regex check
Nov 13, 2023
3e4a47f
renaming
Nov 13, 2023
bd832c2
make method private
Nov 13, 2023
ad1cc10
move import
Nov 13, 2023
29027e5
use single quotes
Nov 13, 2023
944e0b6
shorten comment
Nov 13, 2023
14c11cb
shorten comment
Nov 13, 2023
683bb3d
remove regex check
Nov 13, 2023
ce4f1e4
add regex check
Nov 13, 2023
4825f26
refactor cluster name validation method
Nov 13, 2023
cad5280
remove unused import
Nov 13, 2023
2392021
yapf formatting
Nov 13, 2023
d063745
add new line at EOF
Nov 13, 2023
cb80757
use adjusted cluster name
Nov 15, 2023
3ba668a
add tests
Nov 15, 2023
edf95c7
add more tests
Nov 15, 2023
0f36181
use single quotes
Nov 15, 2023
2dadb30
add module docstring
Nov 15, 2023
0bcd832
move test file
Nov 16, 2023
aca1f80
yapf test file
Nov 16, 2023
7dab47c
change cluster name locally
Dec 2, 2023
0b28bc0
yapf
Dec 2, 2023
da51f41
fix imports and shorten var name
Dec 2, 2023
7dd34d4
fix line
Dec 2, 2023
0432d50
log when cluster name changed
Dec 2, 2023
e78f750
remove lines
Dec 2, 2023
dd0acb2
change quotes
Dec 2, 2023
f570bc1
Merge branch 'master' into remove-cluster-name-validation
dtran24 Feb 8, 2024
4bc944b
[core] add imports missing from fixing merge conflict
Feb 8, 2024
084db05
[core] replace . and _ with -
Feb 8, 2024
bf59427
[core] prepend sky- when cluster name does not start with letter
Feb 8, 2024
fd6c7b7
[core] move tests to unit_tests dir
Feb 8, 2024
b090b31
[core] add period to log
Feb 8, 2024
c34eaf5
[core] fix usage of generate_cluster_cluster
Feb 8, 2024
280ff9e
Revert "[core] fix usage of generate_cluster_cluster"
Feb 8, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions sky/backends/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,13 @@
from typing import Dict, Generic, Optional

import sky
from sky import sky_logging
from sky.usage import usage_lib
from sky.utils import timeline
from sky.utils.common_utils import adjust_cluster_name
from sky.utils.common_utils import check_cluster_name_is_valid

logger = sky_logging.init_logger(__name__)

if typing.TYPE_CHECKING:
from sky import resources
Expand Down Expand Up @@ -51,6 +56,14 @@ def provision(
retry_until_up: bool = False) -> Optional[_ResourceHandleType]:
if cluster_name is None:
cluster_name = sky.backends.backend_utils.generate_cluster_name()
else:
prev_cluster_name = cluster_name
cluster_name = adjust_cluster_name(cluster_name)
dtran24 marked this conversation as resolved.
Show resolved Hide resolved
if prev_cluster_name != cluster_name:
logger.info(f'Changed cluster name from '
f'"{prev_cluster_name}" to "{cluster_name}" '
f'to ensure name is valid for provisioning')
dtran24 marked this conversation as resolved.
Show resolved Hide resolved
check_cluster_name_is_valid(cluster_name)
usage_lib.record_cluster_name_for_current_operation(cluster_name)
usage_lib.messages.usage.update_actual_task(task)
return self._provision(task, to_provision, dryrun, stream_logs,
Expand Down
9 changes: 0 additions & 9 deletions sky/backends/cloud_vm_ray_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -2185,9 +2185,6 @@ def provision_with_retries(
# Retrying launchable resources.
while True:
try:
# Recheck cluster name as the 'except:' block below may
# change the cloud assignment.
to_provision.cloud.check_cluster_name_is_valid(cluster_name)
if dryrun:
cloud_user = None
else:
Expand Down Expand Up @@ -4386,12 +4383,6 @@ def _check_existing_cluster(
prev_cluster_status=prev_cluster_status,
prev_handle=handle)
usage_lib.messages.usage.set_new_cluster()
# Use the task_cloud, because the cloud in `to_provision` can be changed
# later during the retry.
for resources in task.resources:
task_cloud = (resources.cloud
if resources.cloud is not None else clouds.Cloud)
task_cloud.check_cluster_name_is_valid(cluster_name)

if to_provision is None:
# The cluster is recently terminated either by autostop or manually
Expand Down
11 changes: 0 additions & 11 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3744,17 +3744,6 @@ def spot_launch(
if prompt is not None:
click.confirm(prompt, default=True, abort=True, show_default=True)

for task in dag.tasks:
# We try our best to validate the cluster name before we launch the
# task. If the cloud is not specified, this will only validate the
# cluster name against the regex, and the cloud-specific validation will
# be done by the spot controller when actually launching the spot
# cluster.
for resources in task.resources:
task_cloud = (resources.cloud
if resources.cloud is not None else clouds.Cloud)
task_cloud.check_cluster_name_is_valid(name)

sky.spot_launch(dag,
name,
detach_run=detach_run,
Expand Down
24 changes: 0 additions & 24 deletions sky/clouds/cloud.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
"""Interfaces: clouds, regions, and zones."""
import collections
import enum
import re
import typing
from typing import Dict, Iterator, List, Optional, Set, Tuple

from sky import exceptions
from sky import skypilot_config
from sky.clouds import service_catalog
from sky.skylet import constants
from sky.utils import log_utils
from sky.utils import ux_utils

Expand Down Expand Up @@ -486,28 +484,6 @@ def check_features_are_supported(
f'The following features are not supported by {cls._REPR}:'
'\n\t' + table.get_string().replace('\n', '\n\t'))

@classmethod
def check_cluster_name_is_valid(cls, cluster_name: str) -> None:
"""Errors out on invalid cluster names not supported by cloud providers.

Bans (including but not limited to) names that:
- are digits-only
- contain underscore (_)

Raises:
exceptions.InvalidClusterNameError: If the cluster name is invalid.
"""
if cluster_name is None:
return
valid_regex = constants.CLUSTER_NAME_VALID_REGEX
if re.fullmatch(valid_regex, cluster_name) is None:
with ux_utils.print_exception_no_traceback():
raise exceptions.InvalidClusterNameError(
f'Cluster name "{cluster_name}" is invalid; '
'ensure it is fully matched by regex (e.g., '
'only contains lower letters, numbers and dash): '
f'{valid_regex}')

@classmethod
def check_disk_tier_enabled(cls, instance_type: str,
disk_tier: str) -> None:
Expand Down
38 changes: 38 additions & 0 deletions sky/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import jsonschema
import yaml

from sky import exceptions
from sky import sky_logging
from sky.skylet import constants
from sky.utils import ux_utils
Expand All @@ -29,6 +30,11 @@
USER_HASH_LENGTH = 8
USER_HASH_LENGTH_IN_CLUSTER_NAME = 4

# Arbitrary letter to prepend to cloud cluster name if proposed
# cluster name does not start with a letter. Certain clouds
# require a name that starts with a letter
CLUSTER_NAME_VALID_PREFIX = 'x'

# We are using base36 to reduce the length of the hash. 2 chars -> 36^2 = 1296
# possibilities. considering the final cluster name contains the prefix as well,
# we should be fine with 2 chars.
Expand Down Expand Up @@ -116,6 +122,38 @@ def _base36_encode(num: int) -> str:
return _base36_encode(int_value)


def adjust_cluster_name(cluster_name: str) -> str:
adjusted_cluster_name_arr = []
dtran24 marked this conversation as resolved.
Show resolved Hide resolved
for ch in cluster_name:
if ch.isalnum() or ch == '-':
adjusted_cluster_name_arr.append(ch.lower())
if not adjusted_cluster_name_arr[0].isalpha():
adjusted_cluster_name_arr.insert(0, CLUSTER_NAME_VALID_PREFIX)
dtran24 marked this conversation as resolved.
Show resolved Hide resolved
return ''.join(adjusted_cluster_name_arr)


def check_cluster_name_is_valid(cluster_name: str) -> None:
"""Errors out on invalid cluster names not supported by cloud providers.

Bans (including but not limited to) names that:
- are digits-only
- contain underscore (_)

Raises:
exceptions.InvalidClusterNameError: If the cluster name is invalid.
"""
if cluster_name is None:
return
valid_regex = constants.CLUSTER_NAME_VALID_REGEX
if re.fullmatch(valid_regex, cluster_name) is None:
with ux_utils.print_exception_no_traceback():
raise exceptions.InvalidClusterNameError(
f'Cluster name "{cluster_name}" is invalid; '
'ensure it is fully matched by regex (e.g., '
'only contains lower letters, numbers and dash): '
f'{valid_regex}')


def make_cluster_name_on_cloud(cluster_name: str,
max_length: Optional[int] = 15,
add_user_hash: bool = True) -> str:
Expand Down
20 changes: 20 additions & 0 deletions tests/test_common_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Tests for methods in common_utils.py"""
from sky.utils.common_utils import adjust_cluster_name


class TestAdjustClusterName:

def test_adjust_cluster_name_with_uppercase_letters(self):
assert adjust_cluster_name("LoRA") == "lora"

def test_adjust_cluster_name_with_underscore(self):
assert adjust_cluster_name("seed_1") == "seed1"

def test_adjust_cluster_name_with_period(self):
assert adjust_cluster_name("cuda11.8") == "cuda118"

def test_adjust_cluster_names_starting_with_number(self):
assert adjust_cluster_name("2bert") == "x2bert"

def test_adjust_cluster_name_with_multiple_invalid_characters(self):
assert adjust_cluster_name("2Lo_R.A") == "x2lora"
dtran24 marked this conversation as resolved.
Show resolved Hide resolved