From e6b975d505791ebafe4aec3ffea2a2944c1b906c Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Mon, 29 Apr 2024 23:27:03 -0700
Subject: [PATCH 01/26] wip

---
 sky/cli.py                                    | 97 ++++++++++++-------
 sky/clouds/service_catalog/__init__.py        | 32 ++++++
 .../service_catalog/kubernetes_catalog.py     | 75 ++++++++++++--
 sky/provision/kubernetes/utils.py             | 12 +++
 4 files changed, 170 insertions(+), 46 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 72667cffc97..7a9167cd055 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2902,6 +2902,10 @@ def _list_to_str(lst):
     def _output():
         gpu_table = log_utils.create_table(
             ['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
+        realtime_gpu_table = log_utils.create_table([
+            'COMMON_GPU', 'REQUESTABLE_QUANTITIES', 'TOTAL_GPUS',
+            'AVAILABLE_GPUS'
+        ])
         tpu_table = log_utils.create_table(
             ['GOOGLE_TPU', 'AVAILABLE_QUANTITIES'])
         other_table = log_utils.create_table(
@@ -2910,49 +2914,68 @@ def _output():
         name, quantity = None, None
 
         if accelerator_str is None:
-            result = service_catalog.list_accelerator_counts(
-                gpus_only=True,
-                clouds=cloud,
-                region_filter=region,
-            )
 
-            if (len(result) == 0 and cloud_obj is not None and
-                    cloud_obj.is_same_cloud(clouds.Kubernetes())):
-                yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
-                return
-
-            # "Common" GPUs
-            # If cloud is kubernetes, we want to show all GPUs here, even if
-            # they are not listed as common in SkyPilot.
+            # If cloud is kubernetes, we want to show real-time capacity
             if (cloud_obj is not None and
                     cloud_obj.is_same_cloud(clouds.Kubernetes())):
-                for gpu, _ in sorted(result.items()):
-                    gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
-            else:
-                for gpu in service_catalog.get_common_gpus():
-                    if gpu in result:
-                        gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
-            yield from gpu_table.get_string()
-
-            # Google TPUs
-            for tpu in service_catalog.get_tpus():
-                if tpu in result:
-                    tpu_table.add_row([tpu, _list_to_str(result.pop(tpu))])
-            if len(tpu_table.get_string()) > 0:
-                yield '\n\n'
-            yield from tpu_table.get_string()
-
-            # Other GPUs
-            if show_all:
-                yield '\n\n'
-                for gpu, qty in sorted(result.items()):
-                    other_table.add_row([gpu, _list_to_str(qty)])
-                yield from other_table.get_string()
-                yield '\n\n'
-            else:
+                counts, capacity, available = service_catalog.list_accelerator_realtime(
+                    gpus_only=True, clouds=cloud, region_filter=region)
+                assert (set(counts.keys()) == set(capacity.keys()) == set(
+                    available.keys())), ('Keys of counts, capacity, '
+                                         'and available must be same.')
+                if len(counts) == 0:
+                    yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
+                    return
+                for gpu, _ in sorted(counts.items()):
+                    realtime_gpu_table.add_row([
+                        gpu,
+                        _list_to_str(counts.pop(gpu)), capacity[gpu],
+                        available[gpu]
+                    ])
+                yield from realtime_gpu_table.get_string()
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
                 return
+            else:
+                result = service_catalog.list_accelerator_counts(
+                    gpus_only=True,
+                    clouds=cloud,
+                    region_filter=region,
+                )
+
+                # "Common" GPUs
+                # If cloud is kubernetes, we want to show all GPUs here, even if
+                # they are not listed as common in SkyPilot.
+                if (cloud_obj is not None and
+                        cloud_obj.is_same_cloud(clouds.Kubernetes())):
+                    for gpu, _ in sorted(result.items()):
+                        gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
+                else:
+                    for gpu in service_catalog.get_common_gpus():
+                        if gpu in result:
+                            gpu_table.add_row(
+                                [gpu, _list_to_str(result.pop(gpu))])
+                yield from gpu_table.get_string()
+
+                # Google TPUs
+                for tpu in service_catalog.get_tpus():
+                    if tpu in result:
+                        tpu_table.add_row([tpu, _list_to_str(result.pop(tpu))])
+                if len(tpu_table.get_string()) > 0:
+                    yield '\n\n'
+                yield from tpu_table.get_string()
+
+                # Other GPUs
+                if show_all:
+                    yield '\n\n'
+                    for gpu, qty in sorted(result.items()):
+                        other_table.add_row([gpu, _list_to_str(qty)])
+                    yield from other_table.get_string()
+                    yield '\n\n'
+                else:
+                    yield ('\n\nHint: use -a/--all to see all accelerators '
+                           '(including non-common ones) and pricing.')
+                    return
         else:
             # Parse accelerator string
             accelerator_split = accelerator_str.split(':')
diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py
index d380cce6757..b40a56bf672 100644
--- a/sky/clouds/service_catalog/__init__.py
+++ b/sky/clouds/service_catalog/__init__.py
@@ -117,6 +117,38 @@ def list_accelerator_counts(
     return ret
 
 
+def list_accelerator_realtime(
+    gpus_only: bool = True,
+    name_filter: Optional[str] = None,
+    region_filter: Optional[str] = None,
+    quantity_filter: Optional[int] = None,
+    clouds: CloudFilter = None,
+) -> Tuple[Dict[str, List[int]], Dict[str, int], Dict[str, int]]:
+    """List all accelerators offered by Sky and their realtime availability.
+
+    Useful for fixed size clusters.
+
+    Returns:
+    """
+    qtys_map, total_accelerators_capacity, total_accelerators_available = (
+        _map_clouds_catalog(
+            clouds,
+            'list_accelerators_realtime',
+            gpus_only,
+            name_filter,
+            region_filter,
+            quantity_filter,
+            all_regions=False,
+            require_price=False))
+    accelerator_counts: Dict[str, List[int]] = collections.defaultdict(list)
+    for gpu, items in qtys_map.items():
+        for item in items:
+            accelerator_counts[gpu].append(item.accelerator_count)
+        accelerator_counts[gpu] = sorted(accelerator_counts[gpu])
+    return (accelerator_counts, total_accelerators_capacity,
+            total_accelerators_available)
+
+
 def instance_type_exists(instance_type: str,
                          clouds: CloudFilter = None) -> bool:
     """Check the existence of a instance type."""
diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index bd44847016e..436404369d2 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -46,38 +46,91 @@ def list_accelerators(
         case_sensitive: bool = True,
         all_regions: bool = False,
         require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
+    return list_accelerators_realtime(gpus_only, name_filter, region_filter,
+                                      quantity_filter, case_sensitive,
+                                      all_regions, require_price)[0]
+
+
+def list_accelerators_realtime(
+    gpus_only: bool,
+    name_filter: Optional[str],
+    region_filter: Optional[str],
+    quantity_filter: Optional[int],
+    case_sensitive: bool = True,
+    all_regions: bool = False,
+    require_price: bool = True
+) -> Tuple[Dict[str, List[common.InstanceTypeInfo]], Dict[str, int], Dict[str,
+                                                                          int]]:
     del all_regions, require_price  # Unused.
     k8s_cloud = Kubernetes()
     if not any(
             map(k8s_cloud.is_same_cloud,
                 sky_check.get_cached_enabled_clouds_or_refresh())
     ) or not kubernetes_utils.check_credentials()[0]:
-        return {}
+        return {}, {}, {}
 
     has_gpu = kubernetes_utils.detect_gpu_resource()
     if not has_gpu:
-        return {}
+        return {}, {}, {}
 
     label_formatter, _ = kubernetes_utils.detect_gpu_label_formatter()
     if not label_formatter:
-        return {}
+        return {}, {}, {}
 
-    accelerators: Set[Tuple[str, int]] = set()
+    accelerators_qtys: Set[Tuple[str, int]] = set()
     key = label_formatter.get_label_key()
     nodes = kubernetes_utils.get_kubernetes_nodes()
+    # Get the pods to get the real-time GPU usage
+    pods = kubernetes_utils.get_kubernetes_pods()
+    # Total number of GPUs in the cluster
+    total_accelerators_capacity: Dict[str, int] = {}
+    # Total number of GPUs currently available in the cluster
+    total_accelerators_available: Dict[str, int] = {}
+
     for node in nodes:
         if key in node.metadata.labels:
+            allocated_qty = 0
             accelerator_name = label_formatter.get_accelerator_from_label_value(
                 node.metadata.labels.get(key))
             accelerator_count = int(
                 node.status.allocatable.get('nvidia.com/gpu', 0))
 
+            # Generate the GPU quantities for the accelerators
             if accelerator_name and accelerator_count > 0:
                 for count in range(1, accelerator_count + 1):
-                    accelerators.add((accelerator_name, count))
+                    accelerators_qtys.add((accelerator_name, count))
+
+            for pod in pods:
+                # Get all the pods running on the node
+                if (pod.spec.node_name == node.metadata.name and
+                        pod.status.phase in ['Running', 'Pending']):
+                    # Iterate over all the containers in the pod and sum the
+                    # GPU requests
+                    for container in pod.spec.containers:
+                        if container.resources.requests:
+                            allocated_qty += int(
+                                container.resources.requests.get(
+                                    'nvidia.com/gpu', 0))
+
+            accelerators_availabe = accelerator_count - allocated_qty
+
+            if accelerator_name not in total_accelerators_capacity:
+                total_accelerators_capacity[
+                    accelerator_name] = accelerator_count
+            else:
+                total_accelerators_capacity[
+                    accelerator_name] += accelerator_count
+            if accelerator_name not in total_accelerators_available:
+                total_accelerators_available[
+                    accelerator_name] = accelerators_availabe
+            else:
+                total_accelerators_available[
+                    accelerator_name] += accelerators_availabe
 
     result = []
-    for accelerator_name, accelerator_count in accelerators:
+
+    # Generate dataframe for common.list_accelerators_impl
+    for accelerator_name, accelerator_count in accelerators_qtys:
         result.append(
             common.InstanceTypeInfo(cloud='Kubernetes',
                                     instance_type=None,
@@ -98,9 +151,13 @@ def list_accelerators(
                       ])
     df['GpuInfo'] = True
 
-    return common.list_accelerators_impl('Kubernetes', df, gpus_only,
-                                         name_filter, region_filter,
-                                         quantity_filter, case_sensitive)
+    qtys_map = common.list_accelerators_impl('Kubernetes', df, gpus_only,
+                                             name_filter, region_filter,
+                                             quantity_filter, case_sensitive)
+
+    # TODO(romilb): Add filtering for total_accelerators_capacity and total_accelerators_available
+
+    return qtys_map, total_accelerators_capacity, total_accelerators_available
 
 
 def validate_region_zone(
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
index 1cb31328d50..8a9cb2ac379 100644
--- a/sky/provision/kubernetes/utils.py
+++ b/sky/provision/kubernetes/utils.py
@@ -251,6 +251,18 @@ def get_kubernetes_nodes() -> List[Any]:
     return nodes
 
 
+def get_kubernetes_pods() -> List[Any]:
+    try:
+        ns = get_current_kube_config_context_namespace()
+        pods = kubernetes.core_api().list_namespaced_pod(
+            ns, _request_timeout=kubernetes.API_TIMEOUT).items
+    except kubernetes.max_retry_error():
+        raise exceptions.ResourcesUnavailableError(
+            'Timed out when trying to get pod info from Kubernetes cluster. '
+            'Please check if the cluster is healthy and retry.') from None
+    return pods
+
+
 def check_instance_fits(instance: str) -> Tuple[bool, Optional[str]]:
     """Checks if the instance fits on the Kubernetes cluster.
 

From a6b5bfc4965f139852c0f540e24445a9d6b13c52 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Tue, 30 Apr 2024 12:09:26 -0700
Subject: [PATCH 02/26] filtering support

---
 sky/cli.py                                    | 230 ++++++++++--------
 sky/clouds/service_catalog/__init__.py        |  11 +-
 .../service_catalog/kubernetes_catalog.py     |  46 ++--
 sky/provision/kubernetes/utils.py             |   4 +-
 4 files changed, 174 insertions(+), 117 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 7a9167cd055..57f468166bf 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2863,6 +2863,15 @@ def show_gpus(
     To show all regions for a specified accelerator, use
     ``sky show-gpus <accelerator> --all-regions``.
 
+    If ``--region`` or ``--all-regions`` is not specified, the price displayed
+    for each instance type is the lowest across all regions for both on-demand
+    and spot instances. There may be multiple regions with the same lowest
+    price.
+
+    If ``--cloud kubernetes`` is specified, it will show the maximum quantities
+    of the GPU available on a single node and the real-time availability of
+    the GPU across all nodes in the Kubernetes cluster.
+
     Definitions of certain fields:
 
     * ``DEVICE_MEM``: Memory of a single device; does not depend on the device
@@ -2870,10 +2879,15 @@ def show_gpus(
 
     * ``HOST_MEM``: Memory of the host instance (VM).
 
-    If ``--region`` or ``--all-regions`` is not specified, the price displayed
-    for each instance type is the lowest across all regions for both on-demand
-    and spot instances. There may be multiple regions with the same lowest
-    price.
+    * ``QTY_PER_NODE`` (Kubernetes only): Maximum quantity of the GPU available
+      on a single node.
+
+    * ``TOTAL_GPUS`` (Kubernetes only): Total number of GPUs available in the
+      Kubernetes cluster.
+
+    * ``AVAILABLE_GPUS`` (Kubernetes only): Number of currently available GPUs
+      in the Kubernetes cluster. This is fetched in real-time and may change
+      when other users are using the cluster.
     """
     # validation for the --region flag
     if region is not None and cloud is None:
@@ -2899,13 +2913,48 @@ def show_gpus(
     def _list_to_str(lst):
         return ', '.join([str(e) for e in lst])
 
-    def _output():
-        gpu_table = log_utils.create_table(
-            ['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
+    def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
+                                        quantity_filter: Optional[int] = None):
+        if quantity_filter:
+            qty_header = 'QTY_FILTER'
+        else:
+            qty_header = 'QTY_PER_NODE'
         realtime_gpu_table = log_utils.create_table([
-            'COMMON_GPU', 'REQUESTABLE_QUANTITIES', 'TOTAL_GPUS',
+            'GPU', qty_header, 'TOTAL_GPUS',
             'AVAILABLE_GPUS'
         ])
+        counts, capacity, available = service_catalog.list_accelerator_realtime(
+            gpus_only=True,
+            clouds=cloud,
+            name_filter=name_filter,
+            quantity_filter=quantity_filter,
+            region_filter=region)
+        assert (set(counts.keys()) == set(capacity.keys()) == set(
+            available.keys())), ('Keys of counts, capacity, '
+                                 'and available must be same.')
+        if len(counts) == 0:
+            gpu_info_msg = ''
+            debug_msg = 'To further debug, run: sky check.'
+            if name_filter is not None:
+                gpu_info_msg = f' matching name {name_filter!r}'
+                debug_msg = ('To list all available accelerators, '
+                             'run: sky show-gpus --cloud kubernetes.')
+                if quantity_filter is not None:
+                    gpu_info_msg += f' with quantity {quantity_filter}'
+            err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
+            yield err_msg
+            return
+        for gpu, _ in sorted(counts.items()):
+            realtime_gpu_table.add_row([
+                gpu,
+                _list_to_str(counts.pop(gpu)), capacity[gpu],
+                available[gpu]
+            ])
+        yield from realtime_gpu_table.get_string()
+
+    def _output():
+        gpu_table = log_utils.create_table(
+            ['COMMON_GPU', 'AVAILABLE_QUANTITIES'])
         tpu_table = log_utils.create_table(
             ['GOOGLE_TPU', 'AVAILABLE_QUANTITIES'])
         other_table = log_utils.create_table(
@@ -2914,27 +2963,10 @@ def _output():
         name, quantity = None, None
 
         if accelerator_str is None:
-
             # If cloud is kubernetes, we want to show real-time capacity
             if (cloud_obj is not None and
                     cloud_obj.is_same_cloud(clouds.Kubernetes())):
-                counts, capacity, available = service_catalog.list_accelerator_realtime(
-                    gpus_only=True, clouds=cloud, region_filter=region)
-                assert (set(counts.keys()) == set(capacity.keys()) == set(
-                    available.keys())), ('Keys of counts, capacity, '
-                                         'and available must be same.')
-                if len(counts) == 0:
-                    yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
-                    return
-                for gpu, _ in sorted(counts.items()):
-                    realtime_gpu_table.add_row([
-                        gpu,
-                        _list_to_str(counts.pop(gpu)), capacity[gpu],
-                        available[gpu]
-                    ])
-                yield from realtime_gpu_table.get_string()
-                yield ('\n\nHint: use -a/--all to see all accelerators '
-                       '(including non-common ones) and pricing.')
+                yield from _kubernetes_realtime_gpu_output()
                 return
             else:
                 result = service_catalog.list_accelerator_counts(
@@ -2998,81 +3030,85 @@ def _output():
             else:
                 name, quantity = accelerator_str, None
 
-        # Case-sensitive
-        result = service_catalog.list_accelerators(gpus_only=True,
-                                                   name_filter=name,
-                                                   quantity_filter=quantity,
-                                                   region_filter=region,
-                                                   clouds=cloud,
-                                                   case_sensitive=False,
-                                                   all_regions=all_regions)
-
-        if len(result) == 0:
-            if cloud == 'kubernetes':
-                yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
-                return
-
-            quantity_str = (f' with requested quantity {quantity}'
-                            if quantity else '')
-            yield f'Resources \'{name}\'{quantity_str} not found. '
-            yield 'Try \'sky show-gpus --all\' '
-            yield 'to show available accelerators.'
+        if (cloud_obj is not None and
+                cloud_obj.is_same_cloud(clouds.Kubernetes())):
+            # Get real-time availability of GPUs for Kubernetes
+            yield from _kubernetes_realtime_gpu_output(name_filter=name,
+                                                       quantity_filter=quantity)
             return
+        else:
+            # For clouds other than Kubernetes, get the accelerator details
+            # Case-sensitive
+            result = service_catalog.list_accelerators(gpus_only=True,
+                                                       name_filter=name,
+                                                       quantity_filter=quantity,
+                                                       region_filter=region,
+                                                       clouds=cloud,
+                                                       case_sensitive=False,
+                                                       all_regions=all_regions)
+
+            if len(result) == 0:
+                quantity_str = (f' with requested quantity {quantity}'
+                                if quantity else '')
+                yield f'Resources \'{name}\'{quantity_str} not found. '
+                yield 'Try \'sky show-gpus --all\' '
+                yield 'to show available accelerators.'
+                return
 
-        for i, (gpu, items) in enumerate(result.items()):
-            accelerator_table_headers = [
-                'GPU',
-                'QTY',
-                'CLOUD',
-                'INSTANCE_TYPE',
-                'DEVICE_MEM',
-                'vCPUs',
-                'HOST_MEM',
-                'HOURLY_PRICE',
-                'HOURLY_SPOT_PRICE',
-            ]
-            if not show_all:
-                accelerator_table_headers.append('REGION')
-            accelerator_table = log_utils.create_table(
-                accelerator_table_headers)
-            for item in items:
-                instance_type_str = item.instance_type if not pd.isna(
-                    item.instance_type) else '(attachable)'
-                cpu_count = item.cpu_count
-                if pd.isna(cpu_count):
-                    cpu_str = '-'
-                elif isinstance(cpu_count, (float, int)):
-                    if int(cpu_count) == cpu_count:
-                        cpu_str = str(int(cpu_count))
-                    else:
-                        cpu_str = f'{cpu_count:.1f}'
-                device_memory_str = (f'{item.device_memory:.0f}GB' if
-                                     not pd.isna(item.device_memory) else '-')
-                host_memory_str = f'{item.memory:.0f}GB' if not pd.isna(
-                    item.memory) else '-'
-                price_str = f'$ {item.price:.3f}' if not pd.isna(
-                    item.price) else '-'
-                spot_price_str = f'$ {item.spot_price:.3f}' if not pd.isna(
-                    item.spot_price) else '-'
-                region_str = item.region if not pd.isna(item.region) else '-'
-                accelerator_table_vals = [
-                    item.accelerator_name,
-                    item.accelerator_count,
-                    item.cloud,
-                    instance_type_str,
-                    device_memory_str,
-                    cpu_str,
-                    host_memory_str,
-                    price_str,
-                    spot_price_str,
+            for i, (gpu, items) in enumerate(result.items()):
+                accelerator_table_headers = [
+                    'GPU',
+                    'QTY',
+                    'CLOUD',
+                    'INSTANCE_TYPE',
+                    'DEVICE_MEM',
+                    'vCPUs',
+                    'HOST_MEM',
+                    'HOURLY_PRICE',
+                    'HOURLY_SPOT_PRICE',
                 ]
                 if not show_all:
-                    accelerator_table_vals.append(region_str)
-                accelerator_table.add_row(accelerator_table_vals)
-
-            if i != 0:
-                yield '\n\n'
-            yield from accelerator_table.get_string()
+                    accelerator_table_headers.append('REGION')
+                accelerator_table = log_utils.create_table(
+                    accelerator_table_headers)
+                for item in items:
+                    instance_type_str = item.instance_type if not pd.isna(
+                        item.instance_type) else '(attachable)'
+                    cpu_count = item.cpu_count
+                    if pd.isna(cpu_count):
+                        cpu_str = '-'
+                    elif isinstance(cpu_count, (float, int)):
+                        if int(cpu_count) == cpu_count:
+                            cpu_str = str(int(cpu_count))
+                        else:
+                            cpu_str = f'{cpu_count:.1f}'
+                    device_memory_str = (f'{item.device_memory:.0f}GB' if
+                                         not pd.isna(item.device_memory) else '-')
+                    host_memory_str = f'{item.memory:.0f}GB' if not pd.isna(
+                        item.memory) else '-'
+                    price_str = f'$ {item.price:.3f}' if not pd.isna(
+                        item.price) else '-'
+                    spot_price_str = f'$ {item.spot_price:.3f}' if not pd.isna(
+                        item.spot_price) else '-'
+                    region_str = item.region if not pd.isna(item.region) else '-'
+                    accelerator_table_vals = [
+                        item.accelerator_name,
+                        item.accelerator_count,
+                        item.cloud,
+                        instance_type_str,
+                        device_memory_str,
+                        cpu_str,
+                        host_memory_str,
+                        price_str,
+                        spot_price_str,
+                    ]
+                    if not show_all:
+                        accelerator_table_vals.append(region_str)
+                    accelerator_table.add_row(accelerator_table_vals)
+
+                if i != 0:
+                    yield '\n\n'
+                yield from accelerator_table.get_string()
 
     if show_all:
         click.echo_via_pager(_output())
diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py
index b40a56bf672..11063623f35 100644
--- a/sky/clouds/service_catalog/__init__.py
+++ b/sky/clouds/service_catalog/__init__.py
@@ -124,11 +124,18 @@ def list_accelerator_realtime(
     quantity_filter: Optional[int] = None,
     clouds: CloudFilter = None,
 ) -> Tuple[Dict[str, List[int]], Dict[str, int], Dict[str, int]]:
-    """List all accelerators offered by Sky and their realtime availability.
+    """List all accelerators offered by Sky with their realtime availability.
 
-    Useful for fixed size clusters.
+    Realtime availability is the total number of accelerators in the cluster
+    and number of accelerators available at the time of the call.
+
+    Used for fixed size cluster settings, such as Kubernetes.
 
     Returns:
+        A tuple of three dictionaries mapping canonical accelerator names to:
+        - A list of available counts. (e.g., [1, 2, 4])
+        - Total number of accelerators in the cluster (capacity).
+        - Number of accelerators available at the time of call (availability).
     """
     qtys_map, total_accelerators_capacity, total_accelerators_available = (
         _map_clouds_catalog(
diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index 436404369d2..a7b0a7296ba 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -3,6 +3,7 @@
 Kubernetes does not require a catalog of instances, but we need an image catalog
 mapping SkyPilot image tags to corresponding container image tags.
 """
+import re
 import typing
 from typing import Dict, List, Optional, Set, Tuple
 
@@ -86,12 +87,18 @@ def list_accelerators_realtime(
     total_accelerators_capacity: Dict[str, int] = {}
     # Total number of GPUs currently available in the cluster
     total_accelerators_available: Dict[str, int] = {}
+    min_quantity_filter = quantity_filter if quantity_filter else 1
 
     for node in nodes:
         if key in node.metadata.labels:
             allocated_qty = 0
             accelerator_name = label_formatter.get_accelerator_from_label_value(
                 node.metadata.labels.get(key))
+
+            # Check if name_filter regex matches the accelerator_name
+            if name_filter and not re.match(name_filter, accelerator_name):
+                continue
+
             accelerator_count = int(
                 node.status.allocatable.get('nvidia.com/gpu', 0))
 
@@ -112,20 +119,27 @@ def list_accelerators_realtime(
                                 container.resources.requests.get(
                                     'nvidia.com/gpu', 0))
 
-            accelerators_availabe = accelerator_count - allocated_qty
-
-            if accelerator_name not in total_accelerators_capacity:
-                total_accelerators_capacity[
-                    accelerator_name] = accelerator_count
-            else:
-                total_accelerators_capacity[
-                    accelerator_name] += accelerator_count
-            if accelerator_name not in total_accelerators_available:
-                total_accelerators_available[
-                    accelerator_name] = accelerators_availabe
-            else:
-                total_accelerators_available[
-                    accelerator_name] += accelerators_availabe
+            accelerators_available = accelerator_count - allocated_qty
+
+            if accelerator_count >= min_quantity_filter:
+                quantized_count = (min_quantity_filter *
+                                   (accelerator_count//min_quantity_filter))
+                if accelerator_name not in total_accelerators_capacity:
+                    total_accelerators_capacity[
+                        accelerator_name] = quantized_count
+                else:
+                    total_accelerators_capacity[
+                        accelerator_name] += quantized_count
+
+            if accelerators_available >= min_quantity_filter:
+                quantized_availability = min_quantity_filter * (
+                            accelerators_available // min_quantity_filter)
+                if accelerator_name not in total_accelerators_available:
+                    total_accelerators_available[
+                        accelerator_name] = quantized_availability
+                else:
+                    total_accelerators_available[
+                        accelerator_name] += quantized_availability
 
     result = []
 
@@ -151,12 +165,12 @@ def list_accelerators_realtime(
                       ])
     df['GpuInfo'] = True
 
+    # Use common.list_accelerators_impl to get InstanceTypeInfo objects used
+    # by sky show-gpus when cloud is not specified.
     qtys_map = common.list_accelerators_impl('Kubernetes', df, gpus_only,
                                              name_filter, region_filter,
                                              quantity_filter, case_sensitive)
 
-    # TODO(romilb): Add filtering for total_accelerators_capacity and total_accelerators_available
-
     return qtys_map, total_accelerators_capacity, total_accelerators_available
 
 
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
index 8a9cb2ac379..2fef2ea0552 100644
--- a/sky/provision/kubernetes/utils.py
+++ b/sky/provision/kubernetes/utils.py
@@ -30,10 +30,10 @@
     'T': 2**40,
     'P': 2**50,
 }
-NO_GPU_ERROR_MESSAGE = 'No GPUs found in Kubernetes cluster. \
+NO_GPU_ERROR_MESSAGE = 'No GPUs{gpu_info_msg} found in Kubernetes cluster. \
 If your cluster contains GPUs, make sure nvidia.com/gpu resource is available on the nodes and the node labels for identifying GPUs \
 (e.g., skypilot.co/accelerator) are setup correctly. \
-To further debug, run: sky check.'
+{debug_msg}'
 
 # TODO(romilb): Add links to docs for configuration instructions when ready.
 ENDPOINTS_DEBUG_MESSAGE = ('Additionally, make sure your {endpoint_type} '

From 13461597f38a4594d9f9d0bcae80657cf170c229 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Tue, 30 Apr 2024 12:11:30 -0700
Subject: [PATCH 03/26] lint

---
 sky/cli.py                                    | 20 +++++++++----------
 sky/clouds/service_catalog/__init__.py        | 17 ++++++++--------
 .../service_catalog/kubernetes_catalog.py     |  4 ++--
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 57f468166bf..97833eaa6e2 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2919,10 +2919,8 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             qty_header = 'QTY_FILTER'
         else:
             qty_header = 'QTY_PER_NODE'
-        realtime_gpu_table = log_utils.create_table([
-            'GPU', qty_header, 'TOTAL_GPUS',
-            'AVAILABLE_GPUS'
-        ])
+        realtime_gpu_table = log_utils.create_table(
+            ['GPU', qty_header, 'TOTAL_GPUS', 'AVAILABLE_GPUS'])
         counts, capacity, available = service_catalog.list_accelerator_realtime(
             gpus_only=True,
             clouds=cloud,
@@ -2941,14 +2939,14 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
                              'run: sky show-gpus --cloud kubernetes.')
                 if quantity_filter is not None:
                     gpu_info_msg += f' with quantity {quantity_filter}'
-            err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
+            err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
+                gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
             yield err_msg
             return
         for gpu, _ in sorted(counts.items()):
             realtime_gpu_table.add_row([
                 gpu,
-                _list_to_str(counts.pop(gpu)), capacity[gpu],
-                available[gpu]
+                _list_to_str(counts.pop(gpu)), capacity[gpu], available[gpu]
             ])
         yield from realtime_gpu_table.get_string()
 
@@ -3082,15 +3080,17 @@ def _output():
                             cpu_str = str(int(cpu_count))
                         else:
                             cpu_str = f'{cpu_count:.1f}'
-                    device_memory_str = (f'{item.device_memory:.0f}GB' if
-                                         not pd.isna(item.device_memory) else '-')
+                    device_memory_str = (f'{item.device_memory:.0f}GB'
+                                         if not pd.isna(item.device_memory) else
+                                         '-')
                     host_memory_str = f'{item.memory:.0f}GB' if not pd.isna(
                         item.memory) else '-'
                     price_str = f'$ {item.price:.3f}' if not pd.isna(
                         item.price) else '-'
                     spot_price_str = f'$ {item.spot_price:.3f}' if not pd.isna(
                         item.spot_price) else '-'
-                    region_str = item.region if not pd.isna(item.region) else '-'
+                    region_str = item.region if not pd.isna(
+                        item.region) else '-'
                     accelerator_table_vals = [
                         item.accelerator_name,
                         item.accelerator_count,
diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py
index 11063623f35..c654a66aecd 100644
--- a/sky/clouds/service_catalog/__init__.py
+++ b/sky/clouds/service_catalog/__init__.py
@@ -138,15 +138,14 @@ def list_accelerator_realtime(
         - Number of accelerators available at the time of call (availability).
     """
     qtys_map, total_accelerators_capacity, total_accelerators_available = (
-        _map_clouds_catalog(
-            clouds,
-            'list_accelerators_realtime',
-            gpus_only,
-            name_filter,
-            region_filter,
-            quantity_filter,
-            all_regions=False,
-            require_price=False))
+        _map_clouds_catalog(clouds,
+                            'list_accelerators_realtime',
+                            gpus_only,
+                            name_filter,
+                            region_filter,
+                            quantity_filter,
+                            all_regions=False,
+                            require_price=False))
     accelerator_counts: Dict[str, List[int]] = collections.defaultdict(list)
     for gpu, items in qtys_map.items():
         for item in items:
diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index a7b0a7296ba..6e43fb62612 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -123,7 +123,7 @@ def list_accelerators_realtime(
 
             if accelerator_count >= min_quantity_filter:
                 quantized_count = (min_quantity_filter *
-                                   (accelerator_count//min_quantity_filter))
+                                   (accelerator_count // min_quantity_filter))
                 if accelerator_name not in total_accelerators_capacity:
                     total_accelerators_capacity[
                         accelerator_name] = quantized_count
@@ -133,7 +133,7 @@ def list_accelerators_realtime(
 
             if accelerators_available >= min_quantity_filter:
                 quantized_availability = min_quantity_filter * (
-                            accelerators_available // min_quantity_filter)
+                    accelerators_available // min_quantity_filter)
                 if accelerator_name not in total_accelerators_available:
                     total_accelerators_available[
                         accelerator_name] = quantized_availability

From 6bbbf25a93e584c6b7501b6ca9fdeb7bbf338977 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Tue, 30 Apr 2024 12:17:07 -0700
Subject: [PATCH 04/26] update doc

---
 sky/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sky/cli.py b/sky/cli.py
index 97833eaa6e2..9254e55f41f 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2879,7 +2879,7 @@ def show_gpus(
 
     * ``HOST_MEM``: Memory of the host instance (VM).
 
-    * ``QTY_PER_NODE`` (Kubernetes only): Maximum quantity of the GPU available
+    * ``QTY_PER_NODE`` (Kubernetes only): GPU quantities that can be requested
       on a single node.
 
     * ``TOTAL_GPUS`` (Kubernetes only): Total number of GPUs available in the

From a26336522ad8acb9c60ad5832a434404c90b286b Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Wed, 1 May 2024 19:11:51 -0700
Subject: [PATCH 05/26] rename headers

---
 sky/cli.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sky/cli.py b/sky/cli.py
index 9254e55f41f..8a0da4746b7 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2917,10 +2917,12 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
                                         quantity_filter: Optional[int] = None):
         if quantity_filter:
             qty_header = 'QTY_FILTER'
+            free_header = 'FILTERED_FREE_GPUS'
         else:
             qty_header = 'QTY_PER_NODE'
+            free_header = 'TOTAL_FREE_GPUS'
         realtime_gpu_table = log_utils.create_table(
-            ['GPU', qty_header, 'TOTAL_GPUS', 'AVAILABLE_GPUS'])
+            ['GPU', qty_header, 'TOTAL_GPUS', free_header])
         counts, capacity, available = service_catalog.list_accelerator_realtime(
             gpus_only=True,
             clouds=cloud,

From 0bd06a43c88450789ca941d1045303f14caeea74 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 17 May 2024 13:50:28 -0700
Subject: [PATCH 06/26] comments

---
 sky/cli.py | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 6bb00b3d42b..d19e58f2b94 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -2988,7 +2988,7 @@ def show_gpus(
     * ``TOTAL_GPUS`` (Kubernetes only): Total number of GPUs available in the
       Kubernetes cluster.
 
-    * ``AVAILABLE_GPUS`` (Kubernetes only): Number of currently available GPUs
+    * ``TOTAL_FREE_GPUS`` (Kubernetes only): Number of currently free GPUs
       in the Kubernetes cluster. This is fetched in real-time and may change
       when other users are using the cluster.
     """
@@ -3047,6 +3047,10 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
                 gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
             yield err_msg
+            if kubernetes_utils.get_autoscaler_type() is not None:
+                # If using autoscaling cluster, show note
+                yield '\n'
+                yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
             return
         for gpu, _ in sorted(counts.items()):
             realtime_gpu_table.add_row([
@@ -3082,13 +3086,6 @@ def _output():
                 region_filter=region,
             )
 
-            if len(result) == 0 and cloud_is_kubernetes:
-                yield kubernetes_utils.NO_GPU_ERROR_MESSAGE
-                if kubernetes_autoscaling:
-                    yield '\n'
-                    yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
-                return
-
             # "Common" GPUs
             for gpu in service_catalog.get_common_gpus():
                 if gpu in result:
@@ -3110,15 +3107,13 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if (cloud_is_kubernetes or
-                        cloud is None) and kubernetes_autoscaling:
+                if cloud is None and kubernetes_autoscaling:
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                     yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
-                if (cloud_is_kubernetes or
-                        cloud is None) and kubernetes_autoscaling:
+                if cloud is None and kubernetes_autoscaling:
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return
         else:

From 6bf3045ebdb45cc804c5c9a37c1970d05328f824 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 17 May 2024 16:55:05 -0700
Subject: [PATCH 07/26] add TODO

---
 sky/clouds/service_catalog/kubernetes_catalog.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index 6e43fb62612..b6727d38c02 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -47,6 +47,9 @@ def list_accelerators(
         case_sensitive: bool = True,
         all_regions: bool = False,
         require_price: bool = True) -> Dict[str, List[common.InstanceTypeInfo]]:
+    # TODO(romilb): We should consider putting a lru_cache() with TTL to
+    #   avoid multiple calls to kubernetes API in a short period of time (e.g.,
+    #   from the optimizer).
     return list_accelerators_realtime(gpus_only, name_filter, region_filter,
                                       quantity_filter, case_sensitive,
                                       all_regions, require_price)[0]

From f96032245779820801201c8ed87482b14534e3c1 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 17 May 2024 17:15:05 -0700
Subject: [PATCH 08/26] Add autoscaler note

---
 sky/cli.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index d19e58f2b94..49c56f94304 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3047,10 +3047,6 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
                 gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
             yield err_msg
-            if kubernetes_utils.get_autoscaler_type() is not None:
-                # If using autoscaling cluster, show note
-                yield '\n'
-                yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
             return
         for gpu, _ in sorted(counts.items()):
             realtime_gpu_table.add_row([
@@ -3078,6 +3074,9 @@ def _output():
             # If cloud is kubernetes, we want to show real-time capacity
             if cloud_is_kubernetes:
                 yield from _kubernetes_realtime_gpu_output()
+                if kubernetes_utils.get_autoscaler_type() is not None:
+                    yield '\n'
+                    yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return
 
             result = service_catalog.list_accelerator_counts(

From 8878254aa556a996e71eb20886412a1f1d6521f1 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 17:41:24 -0700
Subject: [PATCH 09/26] case sensitive fix

---
 sky/cli.py                                       | 3 ++-
 sky/clouds/service_catalog/__init__.py           | 2 ++
 sky/clouds/service_catalog/kubernetes_catalog.py | 3 ++-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index d07cb3b86b7..622e5c0584d 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3030,8 +3030,9 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             gpus_only=True,
             clouds=cloud,
             name_filter=name_filter,
+            region_filter=region,
             quantity_filter=quantity_filter,
-            region_filter=region)
+            case_sensitive=False)
         assert (set(counts.keys()) == set(capacity.keys()) == set(
             available.keys())), ('Keys of counts, capacity, '
                                  'and available must be same.')
diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py
index c654a66aecd..7479cd77cf7 100644
--- a/sky/clouds/service_catalog/__init__.py
+++ b/sky/clouds/service_catalog/__init__.py
@@ -123,6 +123,7 @@ def list_accelerator_realtime(
     region_filter: Optional[str] = None,
     quantity_filter: Optional[int] = None,
     clouds: CloudFilter = None,
+    case_sensitive: bool = True,
 ) -> Tuple[Dict[str, List[int]], Dict[str, int], Dict[str, int]]:
     """List all accelerators offered by Sky with their realtime availability.
 
@@ -144,6 +145,7 @@ def list_accelerator_realtime(
                             name_filter,
                             region_filter,
                             quantity_filter,
+                            case_sensitive=case_sensitive,
                             all_regions=False,
                             require_price=False))
     accelerator_counts: Dict[str, List[int]] = collections.defaultdict(list)
diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index b6727d38c02..cba689ae648 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -99,7 +99,8 @@ def list_accelerators_realtime(
                 node.metadata.labels.get(key))
 
             # Check if name_filter regex matches the accelerator_name
-            if name_filter and not re.match(name_filter, accelerator_name):
+            regex_flags = 0 if case_sensitive else re.IGNORECASE
+            if not re.match(name_filter, accelerator_name, flags=regex_flags):
                 continue
 
             accelerator_count = int(

From 3fe8fc6c579aea9c9cabf6280b727a228bdd62d6 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 18:00:38 -0700
Subject: [PATCH 10/26] case sensitive fix

---
 sky/clouds/service_catalog/kubernetes_catalog.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index cba689ae648..70e1b463460 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -100,7 +100,7 @@ def list_accelerators_realtime(
 
             # Check if name_filter regex matches the accelerator_name
             regex_flags = 0 if case_sensitive else re.IGNORECASE
-            if not re.match(name_filter, accelerator_name, flags=regex_flags):
+            if name_filter and not re.match(name_filter, accelerator_name, flags=regex_flags):
                 continue
 
             accelerator_count = int(

From 2203d6b3ebf68cebf4a4de17aa8f936346acbdbd Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 18:54:29 -0700
Subject: [PATCH 11/26] show kubernetes GPUs in a separate table in sky
 show-gpus

---
 sky/cli.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 622e5c0584d..ace46d1f2ce 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3017,7 +3017,10 @@ def _list_to_str(lst):
         return ', '.join([str(e) for e in lst])
 
     def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
-                                        quantity_filter: Optional[int] = None):
+                                        quantity_filter: Optional[int] = None,
+                                        gpu_col_name: Optional[str] = None):
+        if gpu_col_name is None:
+            gpu_col_name = 'GPU'
         if quantity_filter:
             qty_header = 'QTY_FILTER'
             free_header = 'FILTERED_FREE_GPUS'
@@ -3025,10 +3028,10 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             qty_header = 'QTY_PER_NODE'
             free_header = 'TOTAL_FREE_GPUS'
         realtime_gpu_table = log_utils.create_table(
-            ['GPU', qty_header, 'TOTAL_GPUS', free_header])
+            [gpu_col_name, qty_header, 'TOTAL_GPUS', free_header])
         counts, capacity, available = service_catalog.list_accelerator_realtime(
             gpus_only=True,
-            clouds=cloud,
+            clouds='kubernetes',
             name_filter=name_filter,
             region_filter=region,
             quantity_filter=quantity_filter,
@@ -3070,6 +3073,7 @@ def _output():
         cloud_is_kubernetes = isinstance(cloud_obj, sky_clouds.Kubernetes)
         kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type(
         ) is not None
+        kubernetes_is_enabled = sky_clouds.cloud_in_iterable(sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
 
         if accelerator_str is None:
             # If cloud is kubernetes, we want to show real-time capacity
@@ -3080,9 +3084,15 @@ def _output():
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return
 
+            # Optimization - do not poll for Kubernetes API for fetching
+            # common GPUs because that will be fetched later for the table after
+            # common GPUs.
+            clouds_to_list = cloud
+            if cloud is None and not show_all:
+                clouds_to_list = (c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes')
             result = service_catalog.list_accelerator_counts(
                 gpus_only=True,
-                clouds=cloud,
+                clouds=clouds_to_list,
                 region_filter=region,
             )
 
@@ -3092,6 +3102,11 @@ def _output():
                     gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
             yield from gpu_table.get_string()
 
+            # Kubernetes GPUs with realtime information
+            if kubernetes_is_enabled:
+                yield '\n\n'
+                yield from _kubernetes_realtime_gpu_output(gpu_col_name='KUBERNETES_GPU')
+
             # Google TPUs
             for tpu in service_catalog.get_tpus():
                 if tpu in result:
@@ -3107,13 +3122,14 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if cloud is None and kubernetes_autoscaling:
+                if cloud is None and kubernetes_is_enabled and kubernetes_autoscaling:
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                     yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
-                if cloud is None and kubernetes_autoscaling:
+                if cloud is None and kubernetes_is_enabled and kubernetes_autoscaling:
+                    yield '\n'
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return
         else:

From b75e471dd6976412e802232737d1aa981a002a51 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 18:57:25 -0700
Subject: [PATCH 12/26] lint

---
 sky/cli.py                                       | 16 +++++++++++-----
 sky/clouds/service_catalog/kubernetes_catalog.py |  3 ++-
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index ace46d1f2ce..aa644c51abb 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3073,7 +3073,9 @@ def _output():
         cloud_is_kubernetes = isinstance(cloud_obj, sky_clouds.Kubernetes)
         kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type(
         ) is not None
-        kubernetes_is_enabled = sky_clouds.cloud_in_iterable(sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
+        kubernetes_is_enabled = sky_clouds.cloud_in_iterable(
+            sky_clouds.Kubernetes(),
+            global_user_state.get_cached_enabled_clouds())
 
         if accelerator_str is None:
             # If cloud is kubernetes, we want to show real-time capacity
@@ -3089,7 +3091,8 @@ def _output():
             # common GPUs.
             clouds_to_list = cloud
             if cloud is None and not show_all:
-                clouds_to_list = (c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes')
+                clouds_to_list = (
+                    c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes')
             result = service_catalog.list_accelerator_counts(
                 gpus_only=True,
                 clouds=clouds_to_list,
@@ -3105,7 +3108,8 @@ def _output():
             # Kubernetes GPUs with realtime information
             if kubernetes_is_enabled:
                 yield '\n\n'
-                yield from _kubernetes_realtime_gpu_output(gpu_col_name='KUBERNETES_GPU')
+                yield from _kubernetes_realtime_gpu_output(
+                    gpu_col_name='KUBERNETES_GPU')
 
             # Google TPUs
             for tpu in service_catalog.get_tpus():
@@ -3122,13 +3126,15 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if cloud is None and kubernetes_is_enabled and kubernetes_autoscaling:
+                if (cloud is None and kubernetes_is_enabled
+                        and kubernetes_autoscaling):
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                     yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
-                if cloud is None and kubernetes_is_enabled and kubernetes_autoscaling:
+                if (cloud is None and kubernetes_is_enabled
+                        and kubernetes_autoscaling):
                     yield '\n'
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return
diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py
index 70e1b463460..602e19b5ff0 100644
--- a/sky/clouds/service_catalog/kubernetes_catalog.py
+++ b/sky/clouds/service_catalog/kubernetes_catalog.py
@@ -100,7 +100,8 @@ def list_accelerators_realtime(
 
             # Check if name_filter regex matches the accelerator_name
             regex_flags = 0 if case_sensitive else re.IGNORECASE
-            if name_filter and not re.match(name_filter, accelerator_name, flags=regex_flags):
+            if name_filter and not re.match(
+                    name_filter, accelerator_name, flags=regex_flags):
                 continue
 
             accelerator_count = int(

From ba98957e22918f6d35551fe1c4e1358b29d12470 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 18:58:35 -0700
Subject: [PATCH 13/26] lint

---
 sky/cli.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index aa644c51abb..e2c1b87f548 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3126,15 +3126,15 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if (cloud is None and kubernetes_is_enabled
-                        and kubernetes_autoscaling):
+                if (cloud is None and kubernetes_is_enabled and
+                        kubernetes_autoscaling):
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                     yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
-                if (cloud is None and kubernetes_is_enabled
-                        and kubernetes_autoscaling):
+                if (cloud is None and kubernetes_is_enabled and
+                        kubernetes_autoscaling):
                     yield '\n'
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
                 return

From b44b7597f336f9245e9d5fba00b7aad04bad09a5 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 19:11:33 -0700
Subject: [PATCH 14/26] fix for non-k8s cloud specified

---
 sky/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sky/cli.py b/sky/cli.py
index e2c1b87f548..b26548da412 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3106,7 +3106,7 @@ def _output():
             yield from gpu_table.get_string()
 
             # Kubernetes GPUs with realtime information
-            if kubernetes_is_enabled:
+            if cloud is None and kubernetes_is_enabled:
                 yield '\n\n'
                 yield from _kubernetes_realtime_gpu_output(
                     gpu_col_name='KUBERNETES_GPU')

From 57cc132ece7ef53208440e3f755bac04d0f24f4f Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 19:18:34 -0700
Subject: [PATCH 15/26] fix for region specified with k8s

---
 sky/cli.py | 23 +++++++++++++----------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index b26548da412..c7c29586b7e 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3013,6 +3013,15 @@ def show_gpus(
     if show_all and accelerator_str is not None:
         raise click.UsageError('--all is only allowed without a GPU name.')
 
+    # Kubernetes specific bools
+    cloud_is_kubernetes = isinstance(cloud_obj, sky_clouds.Kubernetes)
+    kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type() is not None
+    kubernetes_is_enabled = sky_clouds.cloud_in_iterable(sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
+
+    if cloud_is_kubernetes and region is not None:
+        raise click.UsageError(
+            'The --region flag cannot be set with --cloud kubernetes.')
+
     def _list_to_str(lst):
         return ', '.join([str(e) for e in lst])
 
@@ -3037,8 +3046,10 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             quantity_filter=quantity_filter,
             case_sensitive=False)
         assert (set(counts.keys()) == set(capacity.keys()) == set(
-            available.keys())), ('Keys of counts, capacity, '
-                                 'and available must be same.')
+            available.keys())), (f'Keys of counts ({list(counts.keys())}), '
+                                 f'capacity ({list(capacity.keys())}), '
+                                 f'and available ({list(available.keys())}) '
+                                 'must be same.')
         if len(counts) == 0:
             gpu_info_msg = ''
             debug_msg = 'To further debug, run: sky check.'
@@ -3069,14 +3080,6 @@ def _output():
 
         name, quantity = None, None
 
-        # Kubernetes specific bools
-        cloud_is_kubernetes = isinstance(cloud_obj, sky_clouds.Kubernetes)
-        kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type(
-        ) is not None
-        kubernetes_is_enabled = sky_clouds.cloud_in_iterable(
-            sky_clouds.Kubernetes(),
-            global_user_state.get_cached_enabled_clouds())
-
         if accelerator_str is None:
             # If cloud is kubernetes, we want to show real-time capacity
             if cloud_is_kubernetes:

From 46653864818eda93e8939ee82f5e98e0ed4745ab Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Thu, 23 May 2024 19:23:22 -0700
Subject: [PATCH 16/26] lint

---
 sky/cli.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sky/cli.py b/sky/cli.py
index c7c29586b7e..e9e0766e36f 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3016,7 +3016,8 @@ def show_gpus(
     # Kubernetes specific bools
     cloud_is_kubernetes = isinstance(cloud_obj, sky_clouds.Kubernetes)
     kubernetes_autoscaling = kubernetes_utils.get_autoscaler_type() is not None
-    kubernetes_is_enabled = sky_clouds.cloud_in_iterable(sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
+    kubernetes_is_enabled = sky_clouds.cloud_in_iterable(
+        sky_clouds.Kubernetes(), global_user_state.get_cached_enabled_clouds())
 
     if cloud_is_kubernetes and region is not None:
         raise click.UsageError(

From 400336fb1437cfe6bd1df092df0a1ae92407281e Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 12:23:33 -0700
Subject: [PATCH 17/26] show kubernetes in separate section

---
 sky/cli.py                        | 66 ++++++++++++++++++++-----------
 sky/provision/kubernetes/utils.py |  2 +-
 2 files changed, 43 insertions(+), 25 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index e9e0766e36f..a9b474153ac 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3055,11 +3055,11 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             gpu_info_msg = ''
             debug_msg = 'To further debug, run: sky check.'
             if name_filter is not None:
-                gpu_info_msg = f' matching name {name_filter!r}'
-                debug_msg = ('To list all available accelerators, '
-                             'run: sky show-gpus --cloud kubernetes.')
+                gpu_info_msg = f' {name_filter!r}'
                 if quantity_filter is not None:
-                    gpu_info_msg += f' with quantity {quantity_filter}'
+                    gpu_info_msg += f' with requested quantity {quantity_filter}'
+                debug_msg = ('To show available accelerators on kubernetes,'
+                             ' run: sky show-gpus --cloud kubernetes')
             err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
                 gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
             yield err_msg
@@ -3081,40 +3081,49 @@ def _output():
 
         name, quantity = None, None
 
+        # Optimization - do not poll for Kubernetes API for fetching
+        # common GPUs because that will be fetched later for the table after
+        # common GPUs.
+        clouds_to_list = cloud
+        if cloud is None:
+            clouds_to_list = [
+                c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes']
+
         if accelerator_str is None:
             # If cloud is kubernetes, we want to show real-time capacity
-            if cloud_is_kubernetes:
+            if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes):
+                yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                       f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
                 yield from _kubernetes_realtime_gpu_output()
+                yield '\n\n'
                 if kubernetes_utils.get_autoscaler_type() is not None:
                     yield '\n'
                     yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+            if cloud_is_kubernetes:
+                # Do not show clouds if --cloud kubernetes is specified
+                if not kubernetes_is_enabled:
+                    yield ('Kubernetes is not enabled. To fix, run: '
+                           'sky check kubernetes ')
                 return
 
-            # Optimization - do not poll for Kubernetes API for fetching
-            # common GPUs because that will be fetched later for the table after
-            # common GPUs.
-            clouds_to_list = cloud
-            if cloud is None and not show_all:
-                clouds_to_list = (
-                    c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes')
             result = service_catalog.list_accelerator_counts(
                 gpus_only=True,
                 clouds=clouds_to_list,
                 region_filter=region,
             )
 
+            if kubernetes_is_enabled and cloud is None:
+                # Show section headers only if Kubernetes is enabled and
+                # a cloud is not specified
+                yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                       f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
+
             # "Common" GPUs
             for gpu in service_catalog.get_common_gpus():
                 if gpu in result:
                     gpu_table.add_row([gpu, _list_to_str(result.pop(gpu))])
             yield from gpu_table.get_string()
 
-            # Kubernetes GPUs with realtime information
-            if cloud is None and kubernetes_is_enabled:
-                yield '\n\n'
-                yield from _kubernetes_realtime_gpu_output(
-                    gpu_col_name='KUBERNETES_GPU')
-
             # Google TPUs
             for tpu in service_catalog.get_tpus():
                 if tpu in result:
@@ -3164,18 +3173,22 @@ def _output():
             else:
                 name, quantity = accelerator_str, None
 
-        if cloud_is_kubernetes:
+        if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and not show_all:
+            yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                   f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
             # Get real-time availability of GPUs for Kubernetes
             yield from _kubernetes_realtime_gpu_output(name_filter=name,
                                                        quantity_filter=quantity)
-            return
+            if cloud_is_kubernetes:
+                return
+            yield '\n\n'
         # For clouds other than Kubernetes, get the accelerator details
         # Case-sensitive
         result = service_catalog.list_accelerators(gpus_only=True,
                                                    name_filter=name,
                                                    quantity_filter=quantity,
                                                    region_filter=region,
-                                                   clouds=cloud,
+                                                   clouds=clouds_to_list,
                                                    case_sensitive=False,
                                                    all_regions=all_regions)
         # Import here to save module load speed.
@@ -3207,14 +3220,19 @@ def _output():
             new_result[gpu] = sorted_dataclasses
         result = new_result
 
+        if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and not show_all:
+            yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                   f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
+
         if len(result) == 0:
             quantity_str = (f' with requested quantity {quantity}'
                             if quantity else '')
-            yield f'Resources \'{name}\'{quantity_str} not found. '
-            yield 'Try \'sky show-gpus --all\' '
-            yield 'to show available accelerators.'
+            cloud_str = f' on {cloud_obj}.' if cloud else ' in cloud catalogs.'
+            yield f'Resources \'{name}\'{quantity_str} not found{cloud_str} '
+            yield 'To show available accelerators, run: sky show-gpus --all'
             return
 
+
         for i, (gpu, items) in enumerate(result.items()):
             accelerator_table_headers = [
                 'GPU',
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
index c306a546e0d..af5cc1cb1aa 100644
--- a/sky/provision/kubernetes/utils.py
+++ b/sky/provision/kubernetes/utils.py
@@ -35,7 +35,7 @@
     'T': 2**40,
     'P': 2**50,
 }
-NO_GPU_ERROR_MESSAGE = 'No GPUs{gpu_info_msg} found in Kubernetes cluster. \
+NO_GPU_ERROR_MESSAGE = 'Resources{gpu_info_msg} not found in Kubernetes cluster. \
 If your cluster contains GPUs, make sure nvidia.com/gpu resource is available on the nodes and the node labels for identifying GPUs \
 (e.g., skypilot.co/accelerator) are setup correctly. \
 {debug_msg}'

From 3d3e1214369d43d8605bdaa9144b38968d8bb41a Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 12:46:11 -0700
Subject: [PATCH 18/26] wip

---
 sky/cli.py | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index a9b474153ac..d996040e823 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3028,9 +3028,7 @@ def _list_to_str(lst):
 
     def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
                                         quantity_filter: Optional[int] = None,
-                                        gpu_col_name: Optional[str] = None):
-        if gpu_col_name is None:
-            gpu_col_name = 'GPU'
+                                        raise_if_not_found: bool = False):
         if quantity_filter:
             qty_header = 'QTY_FILTER'
             free_header = 'FILTERED_FREE_GPUS'
@@ -3038,7 +3036,7 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
             qty_header = 'QTY_PER_NODE'
             free_header = 'TOTAL_FREE_GPUS'
         realtime_gpu_table = log_utils.create_table(
-            [gpu_col_name, qty_header, 'TOTAL_GPUS', free_header])
+            ['GPU', qty_header, 'TOTAL_GPUS', free_header])
         counts, capacity, available = service_catalog.list_accelerator_realtime(
             gpus_only=True,
             clouds='kubernetes',
@@ -3062,6 +3060,8 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
                              ' run: sky show-gpus --cloud kubernetes')
             err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
                 gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
+            if raise_if_not_found:
+                raise ValueError(err_msg)
             yield err_msg
             return
         for gpu, _ in sorted(counts.items()):
@@ -3090,15 +3090,27 @@ def _output():
                 c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes']
 
         if accelerator_str is None:
+            k8s_messages = ''
+            print_section_titles = False
             # If cloud is kubernetes, we want to show real-time capacity
             if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes):
-                yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                       f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
-                yield from _kubernetes_realtime_gpu_output()
-                yield '\n\n'
+                try:
+                    # If --cloud kubernetes is not specified, we want to catch
+                    # the case where no GPUs are available on the cluster and
+                    # print the warning at the end.
+                    k8s_output_generator = _kubernetes_realtime_gpu_output(
+                        raise_if_not_found=(cloud is None)
+                    )
+                except ValueError as e:
+                    k8s_messages += f'Note: {str(e)}\n'
+                else:
+                    print_section_titles = True
+                    yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                           f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
+                    yield from k8s_output_generator
+                    yield '\n\n'
                 if kubernetes_utils.get_autoscaler_type() is not None:
-                    yield '\n'
-                    yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                    k8s_messages += kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
             if cloud_is_kubernetes:
                 # Do not show clouds if --cloud kubernetes is specified
                 if not kubernetes_is_enabled:
@@ -3112,9 +3124,8 @@ def _output():
                 region_filter=region,
             )
 
-            if kubernetes_is_enabled and cloud is None:
-                # Show section headers only if Kubernetes is enabled and
-                # a cloud is not specified
+            if print_section_titles:
+                # If section titles were printed above, print again here
                 yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                        f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
 

From e13ba3d86dbf046e9b26ccb86875cfd54ba6524d Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 15:49:43 -0700
Subject: [PATCH 19/26] move messages to the end

---
 sky/cli.py                        | 84 ++++++++++++++++++-------------
 sky/provision/kubernetes/utils.py |  6 +--
 2 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index d996040e823..6823224b5b3 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3026,9 +3026,9 @@ def show_gpus(
     def _list_to_str(lst):
         return ', '.join([str(e) for e in lst])
 
-    def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
-                                        quantity_filter: Optional[int] = None,
-                                        raise_if_not_found: bool = False):
+    def _get_kubernetes_realtime_gpu_table(name_filter: Optional[str] = None,
+                                           quantity_filter: Optional[int] = None
+                                           ):
         if quantity_filter:
             qty_header = 'QTY_FILTER'
             free_header = 'FILTERED_FREE_GPUS'
@@ -3050,26 +3050,23 @@ def _kubernetes_realtime_gpu_output(name_filter: Optional[str] = None,
                                  f'and available ({list(available.keys())}) '
                                  'must be same.')
         if len(counts) == 0:
-            gpu_info_msg = ''
+            err_msg = 'No GPUs found in Kubernetes cluster. '
             debug_msg = 'To further debug, run: sky check.'
             if name_filter is not None:
                 gpu_info_msg = f' {name_filter!r}'
                 if quantity_filter is not None:
                     gpu_info_msg += f' with requested quantity {quantity_filter}'
+                err_msg = f'Resources{gpu_info_msg} not found in Kubernetes cluster. '
                 debug_msg = ('To show available accelerators on kubernetes,'
                              ' run: sky show-gpus --cloud kubernetes')
-            err_msg = kubernetes_utils.NO_GPU_ERROR_MESSAGE.format(
-                gpu_info_msg=gpu_info_msg, debug_msg=debug_msg)
-            if raise_if_not_found:
-                raise ValueError(err_msg)
-            yield err_msg
-            return
+            full_err_msg = err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE + debug_msg
+            raise ValueError(full_err_msg)
         for gpu, _ in sorted(counts.items()):
             realtime_gpu_table.add_row([
                 gpu,
                 _list_to_str(counts.pop(gpu)), capacity[gpu], available[gpu]
             ])
-        yield from realtime_gpu_table.get_string()
+        return realtime_gpu_table
 
     def _output():
         gpu_table = log_utils.create_table(
@@ -3089,8 +3086,9 @@ def _output():
             clouds_to_list = [
                 c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes']
 
+        k8s_messages = ''
         if accelerator_str is None:
-            k8s_messages = ''
+            # Collect k8s related messages in k8s_messages and print them at end
             print_section_titles = False
             # If cloud is kubernetes, we want to show real-time capacity
             if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes):
@@ -3098,24 +3096,27 @@ def _output():
                     # If --cloud kubernetes is not specified, we want to catch
                     # the case where no GPUs are available on the cluster and
                     # print the warning at the end.
-                    k8s_output_generator = _kubernetes_realtime_gpu_output(
-                        raise_if_not_found=(cloud is None)
-                    )
+                    k8s_realtime_table = _get_kubernetes_realtime_gpu_table()
                 except ValueError as e:
-                    k8s_messages += f'Note: {str(e)}\n'
+                    if cloud_is_kubernetes:
+                        # Immediately show the error msg if --cloud kubernetes
+                        yield str(e)
+                    else:
+                        # Show the error message at the end if not specified
+                        k8s_messages += f'Note: {str(e)}'
                 else:
                     print_section_titles = True
                     yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                            f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
-                    yield from k8s_output_generator
-                    yield '\n\n'
+                    yield from k8s_realtime_table.get_string()
                 if kubernetes_utils.get_autoscaler_type() is not None:
-                    k8s_messages += kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                    k8s_messages += '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
             if cloud_is_kubernetes:
                 # Do not show clouds if --cloud kubernetes is specified
                 if not kubernetes_is_enabled:
                     yield ('Kubernetes is not enabled. To fix, run: '
                            'sky check kubernetes ')
+                yield k8s_messages
                 return
 
             result = service_catalog.list_accelerator_counts(
@@ -3126,6 +3127,7 @@ def _output():
 
             if print_section_titles:
                 # If section titles were printed above, print again here
+                yield '\n\n'
                 yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                        f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
 
@@ -3150,17 +3152,15 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if (cloud is None and kubernetes_is_enabled and
-                        kubernetes_autoscaling):
-                    yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                if k8s_messages:
+                    yield k8s_messages
                     yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
-                if (cloud is None and kubernetes_is_enabled and
-                        kubernetes_autoscaling):
-                    yield '\n'
-                    yield kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                if k8s_messages:
+                    yield '\n\n'
+                    yield k8s_messages
                 return
         else:
             # Parse accelerator string
@@ -3184,15 +3184,30 @@ def _output():
             else:
                 name, quantity = accelerator_str, None
 
+        print_section_titles = False
         if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and not show_all:
-            yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                   f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
-            # Get real-time availability of GPUs for Kubernetes
-            yield from _kubernetes_realtime_gpu_output(name_filter=name,
+            try:
+                k8s_realtime_table = _get_kubernetes_realtime_gpu_table(name_filter=name,
                                                        quantity_filter=quantity)
-            if cloud_is_kubernetes:
-                return
-            yield '\n\n'
+            except ValueError as e:
+                if cloud_is_kubernetes:
+                    yield str(e)
+                else:
+                    k8s_messages += f'Note: {str(e)}'
+            else:
+                print_section_titles = True
+                yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                       f'Kubernetes GPUs{colorama.Style.RESET_ALL}')
+                yield from k8s_realtime_table.get_string()
+            if kubernetes_utils.get_autoscaler_type() is not None:
+                k8s_messages += '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+        if cloud_is_kubernetes:
+            # Do not show clouds if --cloud kubernetes is specified
+            if not kubernetes_is_enabled:
+                yield ('Kubernetes is not enabled. To fix, run: '
+                       'sky check kubernetes ')
+            yield k8s_messages
+            return
         # For clouds other than Kubernetes, get the accelerator details
         # Case-sensitive
         result = service_catalog.list_accelerators(gpus_only=True,
@@ -3231,7 +3246,8 @@ def _output():
             new_result[gpu] = sorted_dataclasses
         result = new_result
 
-        if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and not show_all:
+        if print_section_titles and not show_all:
+            yield '\n\n'
             yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                    f'Cloud GPUs{colorama.Style.RESET_ALL}\n')
 
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
index af5cc1cb1aa..5e0564f9707 100644
--- a/sky/provision/kubernetes/utils.py
+++ b/sky/provision/kubernetes/utils.py
@@ -35,10 +35,8 @@
     'T': 2**40,
     'P': 2**50,
 }
-NO_GPU_ERROR_MESSAGE = 'Resources{gpu_info_msg} not found in Kubernetes cluster. \
-If your cluster contains GPUs, make sure nvidia.com/gpu resource is available on the nodes and the node labels for identifying GPUs \
-(e.g., skypilot.co/accelerator) are setup correctly. \
-{debug_msg}'
+NO_GPU_HELP_MESSAGE = 'If your cluster contains GPUs, make sure nvidia.com/gpu resource is available on the nodes and the node labels for identifying GPUs \
+(e.g., skypilot.co/accelerator) are setup correctly. '
 
 KUBERNETES_AUTOSCALER_NOTE = (
     'Note: Kubernetes cluster autoscaling is enabled. '

From 9e308e08a3642cff60cb9935994ab4c24aa9ba2a Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 15:53:43 -0700
Subject: [PATCH 20/26] lint

---
 sky/cli.py                        | 39 +++++++++++++++++++------------
 sky/provision/kubernetes/utils.py |  6 +++--
 2 files changed, 28 insertions(+), 17 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 6823224b5b3..e11f890ced5 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3026,9 +3026,9 @@ def show_gpus(
     def _list_to_str(lst):
         return ', '.join([str(e) for e in lst])
 
-    def _get_kubernetes_realtime_gpu_table(name_filter: Optional[str] = None,
-                                           quantity_filter: Optional[int] = None
-                                           ):
+    def _get_kubernetes_realtime_gpu_table(
+            name_filter: Optional[str] = None,
+            quantity_filter: Optional[int] = None):
         if quantity_filter:
             qty_header = 'QTY_FILTER'
             free_header = 'FILTERED_FREE_GPUS'
@@ -3055,11 +3055,15 @@ def _get_kubernetes_realtime_gpu_table(name_filter: Optional[str] = None,
             if name_filter is not None:
                 gpu_info_msg = f' {name_filter!r}'
                 if quantity_filter is not None:
-                    gpu_info_msg += f' with requested quantity {quantity_filter}'
-                err_msg = f'Resources{gpu_info_msg} not found in Kubernetes cluster. '
+                    gpu_info_msg += (' with requested quantity'
+                                     f' {quantity_filter}')
+                err_msg = (f'Resources{gpu_info_msg} not found '
+                           'in Kubernetes cluster. ')
                 debug_msg = ('To show available accelerators on kubernetes,'
                              ' run: sky show-gpus --cloud kubernetes')
-            full_err_msg = err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE + debug_msg
+            full_err_msg = (err_msg +
+                            kubernetes_utils.NO_GPU_HELP_MESSAGE +
+                            debug_msg)
             raise ValueError(full_err_msg)
         for gpu, _ in sorted(counts.items()):
             realtime_gpu_table.add_row([
@@ -3084,7 +3088,8 @@ def _output():
         clouds_to_list = cloud
         if cloud is None:
             clouds_to_list = [
-                c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes']
+                c for c in service_catalog.ALL_CLOUDS if c != 'kubernetes'
+            ]
 
         k8s_messages = ''
         if accelerator_str is None:
@@ -3109,8 +3114,10 @@ def _output():
                     yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                            f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
                     yield from k8s_realtime_table.get_string()
-                if kubernetes_utils.get_autoscaler_type() is not None:
-                    k8s_messages += '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                if kubernetes_autoscaling:
+                    k8s_messages += ('\n' +
+                                     kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+                                     )
             if cloud_is_kubernetes:
                 # Do not show clouds if --cloud kubernetes is specified
                 if not kubernetes_is_enabled:
@@ -3185,10 +3192,11 @@ def _output():
                 name, quantity = accelerator_str, None
 
         print_section_titles = False
-        if kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and not show_all:
+        if kubernetes_is_enabled and (cloud is None or
+                                      cloud_is_kubernetes) and not show_all:
             try:
-                k8s_realtime_table = _get_kubernetes_realtime_gpu_table(name_filter=name,
-                                                       quantity_filter=quantity)
+                k8s_realtime_table = _get_kubernetes_realtime_gpu_table(
+                    name_filter=name, quantity_filter=quantity)
             except ValueError as e:
                 if cloud_is_kubernetes:
                     yield str(e)
@@ -3199,8 +3207,9 @@ def _output():
                 yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
                        f'Kubernetes GPUs{colorama.Style.RESET_ALL}')
                 yield from k8s_realtime_table.get_string()
-            if kubernetes_utils.get_autoscaler_type() is not None:
-                k8s_messages += '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
+            if kubernetes_autoscaling:
+                k8s_messages += ('\n' +
+                                 kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
         if cloud_is_kubernetes:
             # Do not show clouds if --cloud kubernetes is specified
             if not kubernetes_is_enabled:
@@ -3208,6 +3217,7 @@ def _output():
                        'sky check kubernetes ')
             yield k8s_messages
             return
+
         # For clouds other than Kubernetes, get the accelerator details
         # Case-sensitive
         result = service_catalog.list_accelerators(gpus_only=True,
@@ -3259,7 +3269,6 @@ def _output():
             yield 'To show available accelerators, run: sky show-gpus --all'
             return
 
-
         for i, (gpu, items) in enumerate(result.items()):
             accelerator_table_headers = [
                 'GPU',
diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py
index 5e0564f9707..d5f91f639f6 100644
--- a/sky/provision/kubernetes/utils.py
+++ b/sky/provision/kubernetes/utils.py
@@ -35,8 +35,10 @@
     'T': 2**40,
     'P': 2**50,
 }
-NO_GPU_HELP_MESSAGE = 'If your cluster contains GPUs, make sure nvidia.com/gpu resource is available on the nodes and the node labels for identifying GPUs \
-(e.g., skypilot.co/accelerator) are setup correctly. '
+NO_GPU_HELP_MESSAGE = ('If your cluster contains GPUs, make sure '
+                       'nvidia.com/gpu resource is available on the nodes and '
+                       'the node labels for identifying GPUs '
+                       '(e.g., skypilot.co/accelerator) are setup correctly. ')
 
 KUBERNETES_AUTOSCALER_NOTE = (
     'Note: Kubernetes cluster autoscaling is enabled. '

From 8a36851f79850363a805b4ac3968a7c9e5a61350 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 15:55:55 -0700
Subject: [PATCH 21/26] lint

---
 sky/cli.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index e11f890ced5..07fd39792ff 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3061,8 +3061,7 @@ def _get_kubernetes_realtime_gpu_table(
                            'in Kubernetes cluster. ')
                 debug_msg = ('To show available accelerators on kubernetes,'
                              ' run: sky show-gpus --cloud kubernetes')
-            full_err_msg = (err_msg +
-                            kubernetes_utils.NO_GPU_HELP_MESSAGE +
+            full_err_msg = (err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE +
                             debug_msg)
             raise ValueError(full_err_msg)
         for gpu, _ in sorted(counts.items()):
@@ -3115,9 +3114,8 @@ def _output():
                            f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
                     yield from k8s_realtime_table.get_string()
                 if kubernetes_autoscaling:
-                    k8s_messages += ('\n' +
-                                     kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE
-                                     )
+                    k8s_messages += (
+                        '\n' + kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
             if cloud_is_kubernetes:
                 # Do not show clouds if --cloud kubernetes is specified
                 if not kubernetes_is_enabled:

From db958959f7fe8f61532f1876e7c6dff56aba20f8 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Fri, 24 May 2024 16:19:02 -0700
Subject: [PATCH 22/26] show sections if name is specified

---
 sky/cli.py | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 07fd39792ff..39c29d9ce36 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3192,19 +3192,19 @@ def _output():
         print_section_titles = False
         if kubernetes_is_enabled and (cloud is None or
                                       cloud_is_kubernetes) and not show_all:
+            # Print section title if not showing all and instead a specific
+            # accelerator is requested
+            print_section_titles = True
+            yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
+                   f'Kubernetes GPUs{colorama.Style.RESET_ALL}\n')
             try:
                 k8s_realtime_table = _get_kubernetes_realtime_gpu_table(
                     name_filter=name, quantity_filter=quantity)
-            except ValueError as e:
-                if cloud_is_kubernetes:
-                    yield str(e)
-                else:
-                    k8s_messages += f'Note: {str(e)}'
-            else:
-                print_section_titles = True
-                yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
-                       f'Kubernetes GPUs{colorama.Style.RESET_ALL}')
                 yield from k8s_realtime_table.get_string()
+            except ValueError as e:
+                # In the case of a specific accelerator, show the error message
+                # immediately (e.g., "Resources H100 not found ...")
+                yield str(e)
             if kubernetes_autoscaling:
                 k8s_messages += ('\n' +
                                  kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
@@ -3265,6 +3265,10 @@ def _output():
             cloud_str = f' on {cloud_obj}.' if cloud else ' in cloud catalogs.'
             yield f'Resources \'{name}\'{quantity_str} not found{cloud_str} '
             yield 'To show available accelerators, run: sky show-gpus --all'
+
+            if k8s_messages:
+                yield '\n'
+                yield k8s_messages
             return
 
         for i, (gpu, items) in enumerate(result.items()):
@@ -3322,6 +3326,9 @@ def _output():
             if i != 0:
                 yield '\n\n'
             yield from accelerator_table.get_string()
+            if k8s_messages:
+                yield '\n'
+                yield k8s_messages
 
     if show_all:
         click.echo_via_pager(_output())

From 91a43565ce63e108201f07b1b481d136dc189959 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Mon, 27 May 2024 10:30:43 -0700
Subject: [PATCH 23/26] comments

---
 sky/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 39c29d9ce36..2cd243e86b6 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3190,8 +3190,8 @@ def _output():
                 name, quantity = accelerator_str, None
 
         print_section_titles = False
-        if kubernetes_is_enabled and (cloud is None or
-                                      cloud_is_kubernetes) and not show_all:
+        if (kubernetes_is_enabled and (cloud is None or
+                                       cloud_is_kubernetes) and not show_all):
             # Print section title if not showing all and instead a specific
             # accelerator is requested
             print_section_titles = True

From 8e48e683c4a215e1cfd3b8cce02250926b54ba89 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Mon, 27 May 2024 11:22:46 -0700
Subject: [PATCH 24/26] lint

---
 sky/cli.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index 2cd243e86b6..aa118c3af01 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3190,8 +3190,8 @@ def _output():
                 name, quantity = accelerator_str, None
 
         print_section_titles = False
-        if (kubernetes_is_enabled and (cloud is None or
-                                       cloud_is_kubernetes) and not show_all):
+        if (kubernetes_is_enabled and (cloud is None or cloud_is_kubernetes) and
+                not show_all):
             # Print section title if not showing all and instead a specific
             # accelerator is requested
             print_section_titles = True

From 997bec180eb0452f785b06aeb461d4054eb0d834 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Mon, 27 May 2024 12:59:53 -0700
Subject: [PATCH 25/26] fix bugs and move warning for show_all to the top

---
 sky/cli.py | 34 ++++++++++++++--------------------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/sky/cli.py b/sky/cli.py
index aa118c3af01..74b6d38545a 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3051,7 +3051,7 @@ def _get_kubernetes_realtime_gpu_table(
                                  'must be same.')
         if len(counts) == 0:
             err_msg = 'No GPUs found in Kubernetes cluster. '
-            debug_msg = 'To further debug, run: sky check.'
+            debug_msg = 'To further debug, run: sky check '
             if name_filter is not None:
                 gpu_info_msg = f' {name_filter!r}'
                 if quantity_filter is not None:
@@ -3060,7 +3060,7 @@ def _get_kubernetes_realtime_gpu_table(
                 err_msg = (f'Resources{gpu_info_msg} not found '
                            'in Kubernetes cluster. ')
                 debug_msg = ('To show available accelerators on kubernetes,'
-                             ' run: sky show-gpus --cloud kubernetes')
+                             ' run: sky show-gpus --cloud kubernetes ')
             full_err_msg = (err_msg + kubernetes_utils.NO_GPU_HELP_MESSAGE +
                             debug_msg)
             raise ValueError(full_err_msg)
@@ -3102,12 +3102,10 @@ def _output():
                     # print the warning at the end.
                     k8s_realtime_table = _get_kubernetes_realtime_gpu_table()
                 except ValueError as e:
-                    if cloud_is_kubernetes:
-                        # Immediately show the error msg if --cloud kubernetes
-                        yield str(e)
-                    else:
-                        # Show the error message at the end if not specified
-                        k8s_messages += f'Note: {str(e)}'
+                    if not cloud_is_kubernetes:
+                        # Make it a note if cloud is not kubernetes
+                        k8s_messages += f'Note: '
+                    k8s_messages += str(e)
                 else:
                     print_section_titles = True
                     yield (f'{colorama.Fore.CYAN}{colorama.Style.BRIGHT}'
@@ -3124,6 +3122,12 @@ def _output():
                 yield k8s_messages
                 return
 
+            # For show_all, show the k8s message at the start since output is
+            # long and the user may not scroll to the end.
+            if show_all and k8s_messages:
+                yield k8s_messages
+                yield '\n\n'
+
             result = service_catalog.list_accelerator_counts(
                 gpus_only=True,
                 clouds=clouds_to_list,
@@ -3157,14 +3161,11 @@ def _output():
                     other_table.add_row([gpu, _list_to_str(qty)])
                 yield from other_table.get_string()
                 yield '\n\n'
-                if k8s_messages:
-                    yield k8s_messages
-                    yield '\n\n'
             else:
                 yield ('\n\nHint: use -a/--all to see all accelerators '
                        '(including non-common ones) and pricing.')
                 if k8s_messages:
-                    yield '\n\n'
+                    yield '\n'
                     yield k8s_messages
                 return
         else:
@@ -3208,12 +3209,12 @@ def _output():
             if kubernetes_autoscaling:
                 k8s_messages += ('\n' +
                                  kubernetes_utils.KUBERNETES_AUTOSCALER_NOTE)
+            yield k8s_messages
         if cloud_is_kubernetes:
             # Do not show clouds if --cloud kubernetes is specified
             if not kubernetes_is_enabled:
                 yield ('Kubernetes is not enabled. To fix, run: '
                        'sky check kubernetes ')
-            yield k8s_messages
             return
 
         # For clouds other than Kubernetes, get the accelerator details
@@ -3265,10 +3266,6 @@ def _output():
             cloud_str = f' on {cloud_obj}.' if cloud else ' in cloud catalogs.'
             yield f'Resources \'{name}\'{quantity_str} not found{cloud_str} '
             yield 'To show available accelerators, run: sky show-gpus --all'
-
-            if k8s_messages:
-                yield '\n'
-                yield k8s_messages
             return
 
         for i, (gpu, items) in enumerate(result.items()):
@@ -3326,9 +3323,6 @@ def _output():
             if i != 0:
                 yield '\n\n'
             yield from accelerator_table.get_string()
-            if k8s_messages:
-                yield '\n'
-                yield k8s_messages
 
     if show_all:
         click.echo_via_pager(_output())

From 72f08d91c03386dc55b13ceecc113505017c30f1 Mon Sep 17 00:00:00 2001
From: Romil Bhardwaj <romil.bhardwaj@gmail.com>
Date: Mon, 27 May 2024 13:02:15 -0700
Subject: [PATCH 26/26] lint

---
 sky/cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sky/cli.py b/sky/cli.py
index 74b6d38545a..0bcec3d2f4b 100644
--- a/sky/cli.py
+++ b/sky/cli.py
@@ -3104,7 +3104,7 @@ def _output():
                 except ValueError as e:
                     if not cloud_is_kubernetes:
                         # Make it a note if cloud is not kubernetes
-                        k8s_messages += f'Note: '
+                        k8s_messages += 'Note: '
                     k8s_messages += str(e)
                 else:
                     print_section_titles = True