skypilot-org
diff --git a/‎examples/tpu/tpuvm_mnist.yaml
+2-2 b/‎examples/tpu/tpuvm_mnist.yaml
+2-2
diff --git a/‎sky/cli.py
+18-6 b/‎sky/cli.py
+18-6
diff --git a/‎sky/clouds/kubernetes.py
+19-3 b/‎sky/clouds/kubernetes.py
+19-3
diff --git a/‎sky/clouds/service_catalog/kubernetes_catalog.py
+83-65 b/‎sky/clouds/service_catalog/kubernetes_catalog.py
+83-65
diff --git a/‎sky/clouds/utils/gcp_utils.py
+5-1 b/‎sky/clouds/utils/gcp_utils.py
+5-1
@@ -5,7 +5,7 @@ resources:
 
 # The setup command.  Will be run under the working directory.
 setup: |
-  git clone https://github.com/google/flax.git --branch v0.8.2
+  git clone https://github.com/google/flax.git --branch v0.10.1
 
   conda activate flax
   if [ $? -eq 0 ]; then
@@ -15,7 +15,7 @@ setup: |
     conda activate flax
     # Make sure to install TPU related packages in a conda env to avoid package conflicts.
     pip install \
-      -f https://storage.googleapis.com/jax-releases/libtpu_releases.html "jax[tpu]==0.4.25" \
+      -f https://storage.googleapis.com/jax-releases/libtpu_releases.html "jax[tpu]==0.4.35" \
       clu \
       tensorflow tensorflow-datasets
     pip install -e flax
 
@@ -3143,7 +3143,8 @@ def _get_kubernetes_realtime_gpu_table(
                            'in Kubernetes cluster. ')
                 debug_msg = ('To show available accelerators on kubernetes,'
                              ' run: sky show-gpus --cloud kubernetes ')
-            full_err_msg = (err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE +
+            full_err_msg = (err_msg +
+                            kubernetes_utils.NO_ACCELERATOR_HELP_MESSAGE +
                             debug_msg)
             raise ValueError(full_err_msg)
         for gpu, _ in sorted(counts.items()):
@@ -3161,11 +3162,12 @@ def _get_kubernetes_node_info_table(context: Optional[str]):
 
         node_info_dict = kubernetes_utils.get_kubernetes_node_info(context)
         for node_name, node_info in node_info_dict.items():
-            available = node_info.free['nvidia.com/gpu'] if node_info.free[
-                'nvidia.com/gpu'] != -1 else no_permissions_str
+            available = node_info.free[
+                'accelerators_available'] if node_info.free[
+                    'accelerators_available'] != -1 else no_permissions_str
             node_table.add_row([
-                node_name, node_info.gpu_type,
-                node_info.total['nvidia.com/gpu'], available
+                node_name, node_info.accelerator_type,
+                node_info.total['accelerator_count'], available
             ])
         return node_table
 
@@ -3220,8 +3222,18 @@ def _output():
                     yield from k8s_realtime_table.get_string()
                     k8s_node_table = _get_kubernetes_node_info_table(context)
                     yield '\n\n'
+                    # TODO(Doyoung): Update the message with the multi-host TPU
+                    # support.
+                    k8s_per_node_acc_message = (
+                        'Kubernetes per node accelerator availability ')
+                    if kubernetes_utils.multi_host_tpu_exists_in_cluster(
+                            context):
+                        k8s_per_node_acc_message += (
+                            '(Note: Multi-host TPUs are detected and excluded '
+                            'from the display as multi-host TPUs are not '
+                            'supported.)')
                     yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                           f'Kubernetes per node GPU availability'
+                           f'{k8s_per_node_acc_message}'
                            f'{colorama.Style.RESET_ALL}\n')
                     yield from k8s_node_table.get_string()
                 if kubernetes_autoscaling:
 
@@ -362,11 +362,23 @@ def make_deploy_resources_variables(
 
         k8s_acc_label_key = None
         k8s_acc_label_value = None
+        k8s_topology_label_key = None
+        k8s_topology_label_value = None
+        k8s_resource_key = None
+        tpu_requested = False
 
-        # If GPUs are requested, set node label to match the GPU type.
+        # If GPU/TPUs are requested, set node label to match the GPU/TPU type.
         if acc_count > 0 and acc_type is not None:
-            k8s_acc_label_key, k8s_acc_label_value = \
-                kubernetes_utils.get_gpu_label_key_value(context, acc_type)
+            (k8s_acc_label_key, k8s_acc_label_value, k8s_topology_label_key,
+             k8s_topology_label_value) = (
+                 kubernetes_utils.get_accelerator_label_key_value(
+                     context, acc_type, acc_count))
+            if (k8s_acc_label_key ==
+                    kubernetes_utils.GKELabelFormatter.TPU_LABEL_KEY):
+                tpu_requested = True
+                k8s_resource_key = kubernetes_utils.TPU_RESOURCE_KEY
+            else:
+                k8s_resource_key = kubernetes_utils.GPU_RESOURCE_KEY
 
         port_mode = network_utils.get_port_mode(None)
 
@@ -428,6 +440,10 @@ def make_deploy_resources_variables(
             'k8s_skypilot_system_namespace': _SKYPILOT_SYSTEM_NAMESPACE,
             'k8s_spot_label_key': spot_label_key,
             'k8s_spot_label_value': spot_label_value,
+            'tpu_requested': tpu_requested,
+            'k8s_topology_label_key': k8s_topology_label_key,
+            'k8s_topology_label_value': k8s_topology_label_value,
+            'k8s_resource_key': k8s_resource_key,
             'image_id': image_id,
         }
 
 
@@ -104,16 +104,16 @@ def list_accelerators_realtime(
     ) or not kubernetes_utils.check_credentials(context)[0]:
         return {}, {}, {}
 
-    has_gpu = kubernetes_utils.detect_gpu_resource(context)
+    has_gpu = kubernetes_utils.detect_accelerator_resource(context)
     if not has_gpu:
         return {}, {}, {}
 
-    label_formatter, _ = kubernetes_utils.detect_gpu_label_formatter(context)
-    if not label_formatter:
+    lf, _ = kubernetes_utils.detect_gpu_label_formatter(context)
+    if not lf:
         return {}, {}, {}
 
     accelerators_qtys: Set[Tuple[str, int]] = set()
-    key = label_formatter.get_label_key()
+    keys = lf.get_label_keys()
     nodes = kubernetes_utils.get_kubernetes_nodes(context)
     # Get the pods to get the real-time GPU usage
     try:
@@ -134,67 +134,85 @@ def list_accelerators_realtime(
     min_quantity_filter = quantity_filter if quantity_filter else 1
 
     for node in nodes:
-        if key in node.metadata.labels:
-            allocated_qty = 0
-            accelerator_name = label_formatter.get_accelerator_from_label_value(
-                node.metadata.labels.get(key))
-
-            # Check if name_filter regex matches the accelerator_name
-            regex_flags = 0 if case_sensitive else re.IGNORECASE
-            if name_filter and not re.match(
-                    name_filter, accelerator_name, flags=regex_flags):
-                continue
-
-            accelerator_count = int(
-                node.status.allocatable.get('nvidia.com/gpu', 0))
-
-            # Generate the GPU quantities for the accelerators
-            if accelerator_name and accelerator_count > 0:
-                count = 1
-                while count <= accelerator_count:
-                    accelerators_qtys.add((accelerator_name, count))
-                    count *= 2
-                # Add the accelerator count if it's not already in the set
-                # (e.g., if there's 12 GPUs, we should have qtys 1, 2, 4, 8, 12)
-                if accelerator_count not in accelerators_qtys:
-                    accelerators_qtys.add((accelerator_name, accelerator_count))
-
-            if accelerator_count >= min_quantity_filter:
-                quantized_count = (min_quantity_filter *
-                                   (accelerator_count // min_quantity_filter))
-                if accelerator_name not in total_accelerators_capacity:
-                    total_accelerators_capacity[
-                        accelerator_name] = quantized_count
-                else:
-                    total_accelerators_capacity[
-                        accelerator_name] += quantized_count
-
-            if pods is None:
-                # If we can't get the pods, we can't get the GPU usage
-                total_accelerators_available[accelerator_name] = -1
-                continue
-
-            for pod in pods:
-                # Get all the pods running on the node
-                if (pod.spec.node_name == node.metadata.name and
-                        pod.status.phase in ['Running', 'Pending']):
-                    # Iterate over all the containers in the pod and sum the
-                    # GPU requests
-                    for container in pod.spec.containers:
-                        if container.resources.requests:
-                            allocated_qty += int(
-                                container.resources.requests.get(
-                                    'nvidia.com/gpu', 0))
-
-            accelerators_available = accelerator_count - allocated_qty
-
-            if accelerator_name not in total_accelerators_available:
-                total_accelerators_available[accelerator_name] = 0
-            if accelerators_available >= min_quantity_filter:
-                quantized_availability = min_quantity_filter * (
-                    accelerators_available // min_quantity_filter)
-                total_accelerators_available[
-                    accelerator_name] += quantized_availability
+        for key in keys:
+            if key in node.metadata.labels:
+                allocated_qty = 0
+                accelerator_name = lf.get_accelerator_from_label_value(
+                    node.metadata.labels.get(key))
+
+                # Exclude multi-host TPUs from being processed.
+                # TODO(Doyoung): Remove the logic when adding support for
+                # multi-host TPUs.
+                if kubernetes_utils.is_multi_host_tpu(node.metadata.labels):
+                    continue
+
+                # Check if name_filter regex matches the accelerator_name
+                regex_flags = 0 if case_sensitive else re.IGNORECASE
+                if name_filter and not re.match(
+                        name_filter, accelerator_name, flags=regex_flags):
+                    continue
+
+                # Generate the accelerator quantities
+                accelerator_count = (
+                    kubernetes_utils.get_node_accelerator_count(
+                        node.status.allocatable))
+
+                if accelerator_name and accelerator_count > 0:
+                    # TPUs are counted in a different way compared to GPUs.
+                    # Multi-node GPUs can be split into smaller units and be
+                    # provisioned, but TPUs are considered as an atomic unit.
+                    if kubernetes_utils.is_tpu_on_gke(accelerator_name):
+                        accelerators_qtys.add(
+                            (accelerator_name, accelerator_count))
+                    else:
+                        count = 1
+                        while count <= accelerator_count:
+                            accelerators_qtys.add((accelerator_name, count))
+                            count *= 2
+                        # Add the accelerator count if it's not already in the
+                        # set (e.g., if there's 12 GPUs, we should have qtys 1,
+                        # 2, 4, 8, 12)
+                        if accelerator_count not in accelerators_qtys:
+                            accelerators_qtys.add(
+                                (accelerator_name, accelerator_count))
+
+                if accelerator_count >= min_quantity_filter:
+                    quantized_count = (
+                        min_quantity_filter *
+                        (accelerator_count // min_quantity_filter))
+                    if accelerator_name not in total_accelerators_capacity:
+                        total_accelerators_capacity[
+                            accelerator_name] = quantized_count
+                    else:
+                        total_accelerators_capacity[
+                            accelerator_name] += quantized_count
+
+                if pods is None:
+                    # If we can't get the pods, we can't get the GPU usage
+                    total_accelerators_available[accelerator_name] = -1
+                    continue
+
+                for pod in pods:
+                    # Get all the pods running on the node
+                    if (pod.spec.node_name == node.metadata.name and
+                            pod.status.phase in ['Running', 'Pending']):
+                        # Iterate over all the containers in the pod and sum
+                        # the GPU requests
+                        for container in pod.spec.containers:
+                            if container.resources.requests:
+                                allocated_qty += (
+                                    kubernetes_utils.get_node_accelerator_count(
+                                        container.resources.requests))
+
+                accelerators_available = accelerator_count - allocated_qty
+
+                if accelerator_name not in total_accelerators_available:
+                    total_accelerators_available[accelerator_name] = 0
+                if accelerators_available >= min_quantity_filter:
+                    quantized_availability = min_quantity_filter * (
+                        accelerators_available // min_quantity_filter)
+                    total_accelerators_available[
+                        accelerator_name] += quantized_availability
 
     result = []
 
 
@@ -17,6 +17,7 @@
 from sky import sky_logging
 from sky import skypilot_config
 from sky.provision.gcp import constants
+from sky.provision.kubernetes import utils as kubernetes_utils
 from sky.utils import subprocess_utils
 
 if typing.TYPE_CHECKING:
@@ -35,7 +36,10 @@ def is_tpu(resources: Optional['resources_lib.Resources']) -> bool:
 def is_tpu_vm(resources: Optional['resources_lib.Resources']) -> bool:
     if not is_tpu(resources):
         return False
-    assert resources is not None
+    assert (resources is not None and len(resources.accelerators) == 1)
+    acc, _ = list(resources.accelerators.items())[0]
+    if kubernetes_utils.is_tpu_on_gke(acc):
+        return False
     if resources.accelerator_args is None:
         return True
     return resources.accelerator_args.get('tpu_vm', True)