skypilot-org · romilbhardwaj · Jun 6, 2024 · Apr 27, 2024 · Apr 27, 2024 · May 1, 2024
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
@@ -185,6 +185,62 @@ def get_accelerator_from_label_value(cls, value: str) -> str:
                 f'Invalid accelerator name in GKE cluster: {value}')
 
 
+class GFDLabelFormatter(GPULabelFormatter):
+    """GPU Feature Discovery label formatter
+
+    NVIDIA GPUs nodes are labeled by GPU feature discovery
+    e.g. nvidia.com/gpu.product=NVIDIA-H100-80GB-HBM3
+    https://github.com/NVIDIA/gpu-feature-discovery
+
+    GPU feature discovery is included as part of the
+    NVIDIA GPU Operator:
+    https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/overview.html
+
+    This LabelFormatter can't be used in autoscaling clusters since accelerators
+    may map to multiple label, so we're not implementing `get_label_value`
+    """
+
+    LABEL_KEY = 'nvidia.com/gpu.product'
+
+    @classmethod
+    def get_label_key(cls) -> str:
+        return cls.LABEL_KEY
+
+    @classmethod
+    def get_label_value(cls, accelerator: str) -> str:
+        """An accelerator can map to many Nvidia GFD labels
+        (e.g., A100-80GB-PCIE vs. A100-SXM4-80GB).
+        As a result, we do not support get_label_value for GFDLabelFormatter."""
+        raise NotImplementedError
+
+    @classmethod
+    def get_accelerator_from_label_value(cls, value: str) -> str:
+        """Searches against a canonical list of NVIDIA GPUs and pattern
+        matches the canonical GPU name against the GFD label.
+        """
+        canonical_gpu_names = [
+            'A100-80GB', 'A100', 'A10G', 'H100', 'K80', 'M60', 'T4g', 'T4',
+            'V100', 'A10', 'P4000', 'P100', 'P40', 'P4', 'L4'
+        ]
+        for canonical_name in canonical_gpu_names:
+            # A100-80G accelerator is A100-SXM-80GB or A100-PCIE-80GB
+            if canonical_name == 'A100-80GB' and re.search(
+                    r'A100.*-80GB', value):
+                return canonical_name
+            elif canonical_name in value:
+                return canonical_name
+
+        # If we didn't find a canonical name:
+        # 1. remove 'NVIDIA-' (e.g., 'NVIDIA-RTX-A6000' -> 'RTX-A6000')
+        # 2. remove 'GEFORCE-' (e.g., 'NVIDIA-GEFORCE-RTX-3070' -> 'RTX-3070')
+        # 3. remove 'RTX-' (e.g. 'RTX-6000' -> 'RTX6000')
+        # Same logic, but uppercased, as the Skypilot labeler job found in
+        # sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml
+        return value.upper().replace('NVIDIA-',
+                                     '').replace('GEFORCE-',
+                                                 '').replace('RTX-', 'RTX')
+
+
 class KarpenterLabelFormatter(SkyPilotLabelFormatter):
     """Karpeneter label formatter
     Karpenter uses the label `karpenter.k8s.aws/instance-gpu-name` to identify
@@ -200,7 +256,7 @@ class KarpenterLabelFormatter(SkyPilotLabelFormatter):
 # auto-detecting the GPU label type.
 LABEL_FORMATTER_REGISTRY = [
     SkyPilotLabelFormatter, CoreWeaveLabelFormatter, GKELabelFormatter,
-    KarpenterLabelFormatter
+    KarpenterLabelFormatter, GFDLabelFormatter
 ]
 
 # Mapping of autoscaler type to label formatter
@@ -431,7 +487,6 @@ def get_gpu_label_key_value(acc_type: str, check_mode=False) -> Tuple[str, str]:
                 # conclude that the cluster is setup correctly and return.
                 return '', ''
             k8s_acc_label_key = label_formatter.get_label_key()
-            k8s_acc_label_value = label_formatter.get_label_value(acc_type)
             # Search in node_labels to see if any node has the requested
             # GPU type.
             # Note - this only checks if the label is available on a
@@ -441,10 +496,9 @@ def get_gpu_label_key_value(acc_type: str, check_mode=False) -> Tuple[str, str]:
             for node_name, label_list in node_labels.items():
                 for label, value in label_list:
                     if (label == k8s_acc_label_key and
-                            value == k8s_acc_label_value):
-                        # If a node is found, we can break out of the loop
-                        # and proceed to deploy.
-                        return k8s_acc_label_key, k8s_acc_label_value
+                            label_formatter.get_accelerator_from_label_value(
+                                value) == acc_type):
+                        return label, value
             # If no node is found with the requested acc_type, raise error
             with ux_utils.print_exception_no_traceback():
                 suffix = ''

diff --git a/tests/kubernetes/scripts/deploy_k3s.sh b/tests/kubernetes/scripts/deploy_k3s.sh
@@ -5,6 +5,9 @@
 #   sky launch -c k3s --cloud gcp --gpus T4:1
 #   scp deploy_k3s.sh k3s:~/
 #   ssh k3s
+#   # (optional) skip the skypilot labeler job
+#   export SKY_SKIP_K8S_LABEL=1
+#   # deploy k3s
 #   chmod +x deploy_k3s.sh && ./deploy_k3s.sh
 
 set -e
@@ -71,6 +74,7 @@ sudo chown $(id -u):$(id -g) $HOME/.kube/config
 
 # Wait for k3s to be ready
 echo "Waiting for k3s to be ready"
+sleep 5
 kubectl wait --for=condition=ready node --all --timeout=5m
 
 # =========== GPU support ===========
@@ -113,11 +117,14 @@ metadata:
 handler: nvidia
 EOF
 
-# Label nodes with GPUs
-echo "Labelling nodes with GPUs..."
-python -m sky.utils.kubernetes.gpu_labeler
+if [ ! "$SKY_SKIP_K8S_LABEL" == "1" ]
+then
+    # Label nodes with GPUs
+    echo "Labelling nodes with GPUs..."
+    python -m sky.utils.kubernetes.gpu_labeler
 
-# Wait for all the GPU labeling jobs to complete
-wait_for_gpu_labeling_jobs
+    # Wait for all the GPU labeling jobs to complete
+    wait_for_gpu_labeling_jobs
+fi
 
 echo "K3s cluster ready! Run sky check to setup Kubernetes access."