Skip to content

Commit

Permalink
[k8s] Add support for autoscaling kubernetes clusters (#3513)
Browse files Browse the repository at this point in the history
* Add Karpenter label formatter.

* add autoscaler support

* lint

* lint

* comments

* comments

* lint
  • Loading branch information
romilbhardwaj authored May 7, 2024
1 parent 5a0ecc7 commit 10340f8
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 18 deletions.
19 changes: 19 additions & 0 deletions docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,25 @@ Available fields and semantics:
# Default: 10 seconds
provision_timeout: 10
# Autoscaler configured in the Kubernetes cluster (optional)
#
# This field informs SkyPilot about the cluster autoscaler used in the
# Kubernetes cluster. Setting this field disables pre-launch checks for
# GPU capacity in the cluster and SkyPilot relies on the autoscaler to
# provision nodes with the required GPU capacity.
#
# Remember to set provision_timeout accordingly when using an autoscaler.
#
# Supported values: gke, karpenter, generic
# gke: uses cloud.google.com/gke-accelerator label to identify GPUs on nodes
# karpenter: uses karpenter.k8s.aws/instance-gpu-name label to identify GPUs on nodes
# generic: uses skypilot.co/accelerator labels to identify GPUs on nodes
# Refer to https://skypilot.readthedocs.io/en/latest/reference/kubernetes/kubernetes-setup.html#setting-up-gpu-support
# for more details on setting up labels for GPU support.
#
# Default: null (no autoscaler, autodetect label format for GPU nodes)
autoscaler: gke
# Additional fields to override the pod fields used by SkyPilot (optional)
#
# Any key:value pairs added here would get added to the pod spec used to
Expand Down
21 changes: 17 additions & 4 deletions sky/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2998,23 +2998,29 @@ def _output():

name, quantity = None, None

# Kubernetes specific bools
cloud_is_kubernetes = isinstance(cloud_obj, clouds.Kubernetes)
kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type(
) is not None

if accelerator_str is None:
result = service_catalog.list_accelerator_counts(
gpus_only=True,
clouds=cloud,
region_filter=region,
)

if (len(result) == 0 and cloud_obj is not None and
cloud_obj.is_same_cloud(clouds.Kubernetes())):
if len(result) == 0 and cloud_is_kubernetes:
yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
if kubernetes_autoscaling:
yield '\n'
yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
return

# "Common" GPUs
# If cloud is kubernetes, we want to show all GPUs here, even if
# they are not listed as common in SkyPilot.
if (cloud_obj is not None and
cloud_obj.is_same_cloud(clouds.Kubernetes())):
if cloud_is_kubernetes:
for gpu, _ in sorted(result.items()):
gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
else:
Expand All @@ -3038,9 +3044,16 @@ def _output():
other_table.add_row([gpu, _list_to_str(qty)])
yield from other_table.get_string()
yield '\n\n'
if (cloud_is_kubernetes or
cloud is None) and kubernetes_autoscaling:
yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
yield '\n\n'
else:
yield ('\n\nHint: use -a/--all to see all accelerators '
'(including non-common ones) and pricing.')
if (cloud_is_kubernetes or
cloud is None) and kubernetes_autoscaling:
yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
return
else:
# Parse accelerator string
Expand Down
22 changes: 14 additions & 8 deletions sky/clouds/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,14 +337,20 @@ def _make(instance_list):
gpu_task_cpus, gpu_task_memory, acc_count, acc_type).name)

# Check if requested instance type will fit in the cluster.
# TODO(romilb): This will fail early for autoscaling clusters.
fits, reason = kubernetes_utils.check_instance_fits(
chosen_instance_type)
if not fits:
logger.debug(f'Instance type {chosen_instance_type} does '
'not fit in the Kubernetes cluster. '
f'Reason: {reason}')
return [], []
autoscaler_type = kubernetes_utils.get_autoscaler_type()
if autoscaler_type is None:
# If autoscaler is not set, check if the instance type fits in the
# cluster. Else, rely on the autoscaler to provision the right
# instance type without running checks. Worst case, if autoscaling
# fails, the pod will be stuck in pending state until
# provision_timeout, after which failover will be triggered.
fits, reason = kubernetes_utils.check_instance_fits(
chosen_instance_type)
if not fits:
logger.debug(f'Instance type {chosen_instance_type} does '
'not fit in the Kubernetes cluster. '
f'Reason: {reason}')
return [], []

# No fuzzy lists for Kubernetes
return _make([chosen_instance_type]), []
Expand Down
63 changes: 57 additions & 6 deletions sky/provision/kubernetes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@
(e.g., skypilot.co/accelerator) are setup correctly. \
To further debug, run: sky check.'

KUBERNETES_AUTOSCALER_NOTE = (
'Note: Kubernetes cluster autoscaling is enabled. '
'All GPUs that can be provisioned may not be listed '
'here. Refer to your autoscaler\'s node pool '
'configuration to see the list of supported GPUs.')

# TODO(romilb): Add links to docs for configuration instructions when ready.
ENDPOINTS_DEBUG_MESSAGE = ('Additionally, make sure your {endpoint_type} '
'is configured correctly. '
Expand Down Expand Up @@ -178,13 +184,31 @@ def get_accelerator_from_label_value(cls, value: str) -> str:
f'Invalid accelerator name in GKE cluster: {value}')


class KarpenterLabelFormatter(SkyPilotLabelFormatter):
"""Karpeneter label formatter
Karpenter uses the label `karpenter.k8s.aws/instance-gpu-name` to identify
the GPU type. Details: https://karpenter.sh/docs/reference/instance-types/
The naming scheme is same as the SkyPilot formatter, so we inherit from it.
"""
LABEL_KEY = 'karpenter.k8s.aws/instance-gpu-name'


# LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to
# discover the accelerator type from. The order of the list is important, as
# it will be used to determine the priority of the label formats.
# it will be used to determine the priority of the label formats when
# auto-detecting the GPU label type.
LABEL_FORMATTER_REGISTRY = [
SkyPilotLabelFormatter, CoreWeaveLabelFormatter, GKELabelFormatter
SkyPilotLabelFormatter, CoreWeaveLabelFormatter, GKELabelFormatter,
KarpenterLabelFormatter
]

# Mapping of autoscaler type to label formatter
AUTOSCALER_TO_LABEL_FORMATTER = {
kubernetes_enums.KubernetesAutoscalerType.GKE: GKELabelFormatter,
kubernetes_enums.KubernetesAutoscalerType.KARPENTER: KarpenterLabelFormatter, # pylint: disable=line-too-long
kubernetes_enums.KubernetesAutoscalerType.GENERIC: SkyPilotLabelFormatter,
}


def detect_gpu_label_formatter(
) -> Tuple[Optional[GPULabelFormatter], Dict[str, List[Tuple[str, str]]]]:
Expand Down Expand Up @@ -348,10 +372,26 @@ def get_gpu_label_key_value(acc_type: str, check_mode=False) -> Tuple[str, str]:
# Check if the cluster has GPU resources
# TODO(romilb): This assumes the accelerator is a nvidia GPU. We
# need to support TPUs and other accelerators as well.
# TODO(romilb): This will fail early for autoscaling clusters.
# For AS clusters, we may need a way for users to specify GPU node pools
# to use since the cluster may be scaling up from zero nodes and may not
# have any GPU nodes yet.
# TODO(romilb): Currently, we broadly disable all GPU checks if autoscaling
# is configured in config.yaml since the cluster may be scaling up from
# zero nodes and may not have any GPU nodes yet. In the future, we should
# support pollingthe clusters for autoscaling information, such as the
# node pools configured etc.

autoscaler_type = get_autoscaler_type()
if autoscaler_type is not None:
# If autoscaler is set in config.yaml, override the label key and value
# to the autoscaler's format and bypass the GPU checks.
if check_mode:
# If check mode is enabled and autoscaler is set, we can return
# early since we assume the cluster autoscaler will handle GPU
# node provisioning.
return '', ''
formatter = AUTOSCALER_TO_LABEL_FORMATTER.get(autoscaler_type)
assert formatter is not None, ('Unsupported autoscaler type:'
f' {autoscaler_type}')
return formatter.get_label_key(), formatter.get_label_value(acc_type)

has_gpus, cluster_resources = detect_gpu_resource()
if has_gpus:
# Check if the cluster has GPU labels setup correctly
Expand Down Expand Up @@ -1310,3 +1350,14 @@ def get_head_pod_name(cluster_name_on_cloud: str):
# label, but since we know the naming convention, we can directly return the
# head pod name.
return f'{cluster_name_on_cloud}-head'


def get_autoscaler_type(
) -> Optional[kubernetes_enums.KubernetesAutoscalerType]:
"""Returns the autoscaler type by reading from config"""
autoscaler_type = skypilot_config.get_nested(['kubernetes', 'autoscaler'],
None)
if autoscaler_type is not None:
autoscaler_type = kubernetes_enums.KubernetesAutoscalerType(
autoscaler_type)
return autoscaler_type
7 changes: 7 additions & 0 deletions sky/utils/kubernetes_enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,10 @@ class KubernetesPortMode(enum.Enum):
INGRESS = 'ingress'
LOADBALANCER = 'loadbalancer'
PODIP = 'podip'


class KubernetesAutoscalerType(enum.Enum):
"""Enum for the different types of cluster autoscalers for Kubernetes."""
GKE = 'gke'
KARPENTER = 'karpenter'
GENERIC = 'generic'
7 changes: 7 additions & 0 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -645,6 +645,13 @@ def get_config_schema():
'provision_timeout': {
'type': 'integer',
},
'autoscaler': {
'type': 'string',
'case_insensitive_enum': [
type.value
for type in kubernetes_enums.KubernetesAutoscalerType
]
},
}
},
'oci': {
Expand Down

0 comments on commit 10340f8

Please sign in to comment.