From 68723df97c7c981887ba9100c510aca953f45c11 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 27 Nov 2024 17:13:32 -0800 Subject: [PATCH] [k8s] Fix resources.image_id backward compatibility (#4425) * Fix back compat * Fix back compat for image_id + regions * lint * comments --- sky/clouds/kubernetes.py | 5 +++-- sky/resources.py | 23 ++++++++++++++++++++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index ace14bc0c51..471639626eb 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -41,6 +41,8 @@ class Kubernetes(clouds.Cloud): SKY_SSH_KEY_SECRET_NAME = 'sky-ssh-keys' SKY_SSH_JUMP_NAME = 'sky-ssh-jump-pod' + LEGACY_SINGLETON_REGION = 'kubernetes' + # Limit the length of the cluster name to avoid exceeding the limit of 63 # characters for Kubernetes resources. We limit to 42 characters (63-21) to # allow additional characters for creating ingress services to expose ports. @@ -54,7 +56,6 @@ class Kubernetes(clouds.Cloud): _DEFAULT_MEMORY_CPU_RATIO = 1 _DEFAULT_MEMORY_CPU_RATIO_WITH_GPU = 4 # Allocate more memory for GPU tasks _REPR = 'Kubernetes' - _LEGACY_SINGLETON_REGION = 'kubernetes' _CLOUD_UNSUPPORTED_FEATURES = { # TODO(romilb): Stopping might be possible to implement with # container checkpointing introduced in Kubernetes v1.25. See: @@ -630,7 +631,7 @@ def instance_type_exists(self, instance_type: str) -> bool: instance_type) def validate_region_zone(self, region: Optional[str], zone: Optional[str]): - if region == self._LEGACY_SINGLETON_REGION: + if region == self.LEGACY_SINGLETON_REGION: # For backward compatibility, we allow the region to be set to the # legacy singleton region. # TODO: Remove this after 0.9.0. diff --git a/sky/resources.py b/sky/resources.py index 729b9d62a28..5184278e02e 100644 --- a/sky/resources.py +++ b/sky/resources.py @@ -45,7 +45,7 @@ class Resources: """ # If any fields changed, increment the version. For backward compatibility, # modify the __setstate__ method to handle the old version. - _VERSION = 19 + _VERSION = 20 def __init__( self, @@ -1607,4 +1607,25 @@ def __setstate__(self, state): self._cluster_config_overrides = state.pop( '_cluster_config_overrides', None) + if version < 20: + # Pre-0.7.0, we used 'kubernetes' as the default region for + # Kubernetes clusters. With the introduction of support for + # multiple contexts, we now set the region to the context name. + # Since we do not have information on which context the cluster + # was run in, we default it to the current active context. + legacy_region = clouds.Kubernetes().LEGACY_SINGLETON_REGION + original_cloud = state.get('_cloud', None) + original_region = state.get('_region', None) + if (isinstance(original_cloud, clouds.Kubernetes) and + original_region == legacy_region): + current_context = ( + kubernetes_utils.get_current_kube_config_context_name()) + state['_region'] = current_context + # Also update the image_id dict if it contains the old region + if isinstance(state['_image_id'], dict): + if legacy_region in state['_image_id']: + state['_image_id'][current_context] = ( + state['_image_id'][legacy_region]) + del state['_image_id'][legacy_region] + self.__dict__.update(state)