From 47d3dc0067e92b3676268353dae67cbda247dfc5 Mon Sep 17 00:00:00 2001 From: Andrew Aikawa Date: Tue, 2 Jul 2024 11:09:32 -0700 Subject: [PATCH] map gke h100 megas to 'H100' (#3691) * map gke h100 megas to 'H100' * patch comment about H100 vs H100-mega * format --- sky/provision/kubernetes/utils.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/sky/provision/kubernetes/utils.py b/sky/provision/kubernetes/utils.py index fbf79130424..cfa3581fb02 100644 --- a/sky/provision/kubernetes/utils.py +++ b/sky/provision/kubernetes/utils.py @@ -193,8 +193,13 @@ def get_accelerator_from_label_value(cls, value: str) -> str: return value.replace('nvidia-tesla-', '').upper() elif value.startswith('nvidia-'): acc = value.replace('nvidia-', '').upper() - if acc == 'H100-80GB': - # H100 is named as H100-80GB in GKE. + if acc in ['H100-80GB', 'H100-MEGA-80GB']: + # H100 is named H100-80GB or H100-MEGA-80GB in GKE, + # where the latter has improved bandwidth. + # See a3-mega instances on GCP. + # TODO: we do not distinguish the two GPUs for simplicity, + # but we can evaluate whether we should distinguish + # them based on users' requests. return 'H100' return acc else: