Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[k8s] GPU Feature discovery label formatter #3493

Merged
merged 27 commits into from
Jun 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 60 additions & 6 deletions sky/provision/kubernetes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,62 @@ def get_accelerator_from_label_value(cls, value: str) -> str:
f'Invalid accelerator name in GKE cluster: {value}')


class GFDLabelFormatter(GPULabelFormatter):
"""GPU Feature Discovery label formatter

NVIDIA GPUs nodes are labeled by GPU feature discovery
e.g. nvidia.com/gpu.product=NVIDIA-H100-80GB-HBM3
https://github.com/NVIDIA/gpu-feature-discovery

GPU feature discovery is included as part of the
NVIDIA GPU Operator:
https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/overview.html

This LabelFormatter can't be used in autoscaling clusters since accelerators
may map to multiple label, so we're not implementing `get_label_value`
"""

LABEL_KEY = 'nvidia.com/gpu.product'

@classmethod
def get_label_key(cls) -> str:
return cls.LABEL_KEY

@classmethod
def get_label_value(cls, accelerator: str) -> str:
"""An accelerator can map to many Nvidia GFD labels
(e.g., A100-80GB-PCIE vs. A100-SXM4-80GB).
As a result, we do not support get_label_value for GFDLabelFormatter."""
raise NotImplementedError

@classmethod
def get_accelerator_from_label_value(cls, value: str) -> str:
asaiacai marked this conversation as resolved.
Show resolved Hide resolved
"""Searches against a canonical list of NVIDIA GPUs and pattern
matches the canonical GPU name against the GFD label.
"""
canonical_gpu_names = [
'A100-80GB', 'A100', 'A10G', 'H100', 'K80', 'M60', 'T4g', 'T4',
'V100', 'A10', 'P4000', 'P100', 'P40', 'P4', 'L4'
]
for canonical_name in canonical_gpu_names:
# A100-80G accelerator is A100-SXM-80GB or A100-PCIE-80GB
if canonical_name == 'A100-80GB' and re.search(
r'A100.*-80GB', value):
return canonical_name
elif canonical_name in value:
return canonical_name

# If we didn't find a canonical name:
# 1. remove 'NVIDIA-' (e.g., 'NVIDIA-RTX-A6000' -> 'RTX-A6000')
# 2. remove 'GEFORCE-' (e.g., 'NVIDIA-GEFORCE-RTX-3070' -> 'RTX-3070')
# 3. remove 'RTX-' (e.g. 'RTX-6000' -> 'RTX6000')
# Same logic, but uppercased, as the Skypilot labeler job found in
# sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml
return value.upper().replace('NVIDIA-',
'').replace('GEFORCE-',
'').replace('RTX-', 'RTX')


class KarpenterLabelFormatter(SkyPilotLabelFormatter):
"""Karpeneter label formatter
Karpenter uses the label `karpenter.k8s.aws/instance-gpu-name` to identify
Expand All @@ -200,7 +256,7 @@ class KarpenterLabelFormatter(SkyPilotLabelFormatter):
# auto-detecting the GPU label type.
LABEL_FORMATTER_REGISTRY = [
SkyPilotLabelFormatter, CoreWeaveLabelFormatter, GKELabelFormatter,
KarpenterLabelFormatter
KarpenterLabelFormatter, GFDLabelFormatter
]

# Mapping of autoscaler type to label formatter
Expand Down Expand Up @@ -431,7 +487,6 @@ def get_gpu_label_key_value(acc_type: str, check_mode=False) -> Tuple[str, str]:
# conclude that the cluster is setup correctly and return.
return '', ''
k8s_acc_label_key = label_formatter.get_label_key()
k8s_acc_label_value = label_formatter.get_label_value(acc_type)
# Search in node_labels to see if any node has the requested
# GPU type.
# Note - this only checks if the label is available on a
Expand All @@ -441,10 +496,9 @@ def get_gpu_label_key_value(acc_type: str, check_mode=False) -> Tuple[str, str]:
for node_name, label_list in node_labels.items():
for label, value in label_list:
if (label == k8s_acc_label_key and
value == k8s_acc_label_value):
# If a node is found, we can break out of the loop
# and proceed to deploy.
return k8s_acc_label_key, k8s_acc_label_value
label_formatter.get_accelerator_from_label_value(
value) == acc_type):
asaiacai marked this conversation as resolved.
Show resolved Hide resolved
return label, value
# If no node is found with the requested acc_type, raise error
with ux_utils.print_exception_no_traceback():
suffix = ''
Expand Down
17 changes: 12 additions & 5 deletions tests/kubernetes/scripts/deploy_k3s.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
# sky launch -c k3s --cloud gcp --gpus T4:1
# scp deploy_k3s.sh k3s:~/
# ssh k3s
# # (optional) skip the skypilot labeler job
# export SKY_SKIP_K8S_LABEL=1
# # deploy k3s
# chmod +x deploy_k3s.sh && ./deploy_k3s.sh

set -e
Expand Down Expand Up @@ -71,6 +74,7 @@ sudo chown $(id -u):$(id -g) $HOME/.kube/config

# Wait for k3s to be ready
echo "Waiting for k3s to be ready"
sleep 5
kubectl wait --for=condition=ready node --all --timeout=5m

# =========== GPU support ===========
Expand Down Expand Up @@ -113,11 +117,14 @@ metadata:
handler: nvidia
EOF

# Label nodes with GPUs
echo "Labelling nodes with GPUs..."
python -m sky.utils.kubernetes.gpu_labeler
if [ ! "$SKY_SKIP_K8S_LABEL" == "1" ]
asaiacai marked this conversation as resolved.
Show resolved Hide resolved
then
# Label nodes with GPUs
echo "Labelling nodes with GPUs..."
python -m sky.utils.kubernetes.gpu_labeler

# Wait for all the GPU labeling jobs to complete
wait_for_gpu_labeling_jobs
# Wait for all the GPU labeling jobs to complete
wait_for_gpu_labeling_jobs
fi

echo "K3s cluster ready! Run sky check to setup Kubernetes access."
Loading