diff --git a/sky/provision/kubernetes/instance.py b/sky/provision/kubernetes/instance.py index 7c07ea0fb86..41b4aaa6236 100644 --- a/sky/provision/kubernetes/instance.py +++ b/sky/provision/kubernetes/instance.py @@ -180,7 +180,8 @@ def _raise_pod_scheduling_errors(namespace, context, new_nodes): # case we will need to update this logic. # TODO(Doyoung): Update the error message raised # with the multi-host TPU support. - gpu_resource_key = kubernetes_utils.get_gpu_resource_key() + gpu_resource_key = kubernetes_utils.get_gpu_resource_key( + ) if 'Insufficient google.com/tpu' in event_message: extra_msg = ( f'Verify if ' @@ -199,8 +200,10 @@ def _raise_pod_scheduling_errors(namespace, context, new_nodes): in event_message)): extra_msg = ( f'Verify if any node matching label ' - f'{pod.spec.node_selector[label_key]} and sufficient ' - f'resource {gpu_resource_key} is available in the cluster.') + f'{pod.spec.node_selector[label_key]} and ' + f'sufficient resource {gpu_resource_key} is ' + f'available in the cluster.' + ) raise config_lib.KubernetesError( _lack_resource_msg('GPU', pod, diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index b7a10d9c173..2df199de8e1 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -2427,6 +2427,7 @@ def get_gpu_resource_key(): Returns: str: The selected GPU resource name. """ - # Retrieve GPU resource name from environment variable, if set. Else use default. + # Retrieve GPU resource name from environment variable, if set. + # Else use default. # E.g., can be nvidia.com/gpu-h100, amd.com/gpu etc. - return os.getenv('CUSTOM_GPU_RESOURCE_KEY', default = GPU_RESOURCE_KEY) + return os.getenv('CUSTOM_GPU_RESOURCE_KEY', default=GPU_RESOURCE_KEY)