From 142cfdc86747a57793875186001fce698438724f Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Mon, 29 Jul 2024 01:21:16 +0000 Subject: [PATCH 01/10] add subscription id hash on default storage account name --- sky/adaptors/azure.py | 10 ++++++++++ sky/data/storage.py | 9 +++++++-- tests/test_smoke.py | 26 ++++++++++++++++++-------- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/sky/adaptors/azure.py b/sky/adaptors/azure.py index 0cadb0b2bd8..b71643d5bee 100644 --- a/sky/adaptors/azure.py +++ b/sky/adaptors/azure.py @@ -4,6 +4,7 @@ import asyncio import datetime import functools +import hashlib import logging import threading import time @@ -28,6 +29,7 @@ _session_creation_lock = threading.RLock() _MAX_RETRY_FOR_GET_SUBSCRIPTION_ID = 5 +_SUBSCRIPTION_HASH_LENGTH = 4 @common.load_lazy_modules(modules=_LAZY_MODULES) @@ -459,3 +461,11 @@ def deployment_mode(): """Azure deployment mode.""" from azure.mgmt.resource.resources.models import DeploymentMode return DeploymentMode + + +def get_subscription_hash() -> str: + """Returns user's subsciprion ID specific hash.""" + subscription_id = get_subscription_id() + hash_obj = hashlib.md5(subscription_id.encode('utf-8')) + subscription_hash = hash_obj.hexdigest()[:_SUBSCRIPTION_HASH_LENGTH] + return subscription_hash \ No newline at end of file diff --git a/sky/data/storage.py b/sky/data/storage.py index 0caeef2bc7a..97961f67c63 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1942,7 +1942,7 @@ class AzureBlobStore(AbstractStore): """Represents the backend for Azure Blob Storage Container.""" _ACCESS_DENIED_MESSAGE = 'Access Denied' - DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}' + DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}{subscription_hash}' DEFAULT_RESOURCE_GROUP_NAME = 'sky{user_hash}' class AzureBlobStoreMetadata(AbstractStore.StoreMetadata): @@ -2239,10 +2239,15 @@ def _get_storage_account_and_resource_group( else: # If storage account name is not provided from config, then # use default resource group and storage account names. + # To ensure uniqueness, we must take the subscription ID into + # account. This is necessary because the user may switch + # between different subscription IDs while using the same + # machine. storage_account_name = ( self.DEFAULT_STORAGE_ACCOUNT_NAME.format( region=self.region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format( user_hash=common_utils.get_user_hash())) try: diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 5752a5fa067..dd319f5dbf7 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -48,6 +48,7 @@ from sky import jobs from sky import serve from sky import skypilot_config +from sky.adaptors import azure from sky.adaptors import cloudflare from sky.adaptors import ibm from sky.clouds import AWS @@ -1105,7 +1106,9 @@ def test_azure_storage_mounts_with_stop(): default_region = 'eastus' storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, user_hash=common_utils.get_user_hash())) + region=default_region, + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) template_str = pathlib.Path( @@ -4262,7 +4265,8 @@ def cli_delete_cmd(store_type, storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( region=default_region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage container delete ' @@ -4301,7 +4305,8 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''): ) else ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( region=default_region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) list_cmd = ('az storage blob list ' @@ -4352,7 +4357,8 @@ def cli_count_name_in_bucket(store_type, bucket_name, file_name, suffix=''): storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( region=default_region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage blob list ' @@ -4380,7 +4386,8 @@ def cli_count_file_in_bucket(store_type, bucket_name): storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( region=default_region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage blob list ' @@ -4583,7 +4590,9 @@ def tmp_az_bucket(self, tmp_bucket_name): default_region = 'eastus' storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, user_hash=common_utils.get_user_hash())) + region=default_region, + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) bucket_uri = data_utils.AZURE_CONTAINER_URL.format( @@ -4821,7 +4830,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url): storage_account_name = ( storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME. format(region=default_region, - user_hash=common_utils.get_user_hash())) + user_hash=common_utils.get_user_hash(), + subscription_hash=azure.get_subscription_hash())) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) command = f'az storage container exists --account-name {storage_account_name} --account-key {storage_account_key} --name {nonexist_bucket_name}' @@ -4901,7 +4911,7 @@ def test_private_bucket(self, private_bucket): if store_type == 'https': # Azure blob uses a different error string since container may # not exist even though the bucket name is ok. - match_str = 'Attempted to fetch a non-existent public container' + match_str = 'Attempted to fetch a non-existent container' with pytest.raises(sky.exceptions.StorageBucketGetError, match=match_str): storage_obj = storage_lib.Storage(source=private_bucket) From cfb8b7b980b6bfebb776f4c45a8eccc6c09825ac Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Mon, 29 Jul 2024 02:15:22 +0000 Subject: [PATCH 02/10] format --- sky/adaptors/azure.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/adaptors/azure.py b/sky/adaptors/azure.py index b71643d5bee..9bff3288813 100644 --- a/sky/adaptors/azure.py +++ b/sky/adaptors/azure.py @@ -468,4 +468,4 @@ def get_subscription_hash() -> str: subscription_id = get_subscription_id() hash_obj = hashlib.md5(subscription_id.encode('utf-8')) subscription_hash = hash_obj.hexdigest()[:_SUBSCRIPTION_HASH_LENGTH] - return subscription_hash \ No newline at end of file + return subscription_hash From 37be9b9f4d0220269590f35737a394ac72c250a4 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Mon, 29 Jul 2024 02:21:00 +0000 Subject: [PATCH 03/10] add comment --- sky/data/storage.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index 97961f67c63..6c28cd5d4d9 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1942,8 +1942,12 @@ class AzureBlobStore(AbstractStore): """Represents the backend for Azure Blob Storage Container.""" _ACCESS_DENIED_MESSAGE = 'Access Denied' - DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}{subscription_hash}' DEFAULT_RESOURCE_GROUP_NAME = 'sky{user_hash}' + # Unlike resource group names, which only need to be unique within the + # subscription, storage account names must be globally unique across all of + # Azure users. Hence, the storage account name includes the subscription + # hash as well to ensure its uniqueness. + DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}{subscription_hash}' class AzureBlobStoreMetadata(AbstractStore.StoreMetadata): """A pickle-able representation of Azure Blob Store. From 889a0dd7189e495b9fadc8530b7af1cbe12c4973 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Mon, 29 Jul 2024 02:22:09 +0000 Subject: [PATCH 04/10] nit --- tests/test_smoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index dd319f5dbf7..e1ae480deb8 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -4911,7 +4911,7 @@ def test_private_bucket(self, private_bucket): if store_type == 'https': # Azure blob uses a different error string since container may # not exist even though the bucket name is ok. - match_str = 'Attempted to fetch a non-existent container' + match_str = 'Attempted to fetch a non-existent public container' with pytest.raises(sky.exceptions.StorageBucketGetError, match=match_str): storage_obj = storage_lib.Storage(source=private_bucket) From b80b755c67147b6b33d39621ce0b14fd9ee559cc Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 01:39:00 +0000 Subject: [PATCH 05/10] update default storage account name with hashed region value --- sky/adaptors/azure.py | 9 --------- sky/data/storage.py | 44 ++++++++++++++++++++++++++++++++++++------ tests/test_smoke.py | 45 +++++++++++++++---------------------------- 3 files changed, 53 insertions(+), 45 deletions(-) diff --git a/sky/adaptors/azure.py b/sky/adaptors/azure.py index 9bff3288813..3bd9f2fc630 100644 --- a/sky/adaptors/azure.py +++ b/sky/adaptors/azure.py @@ -4,7 +4,6 @@ import asyncio import datetime import functools -import hashlib import logging import threading import time @@ -461,11 +460,3 @@ def deployment_mode(): """Azure deployment mode.""" from azure.mgmt.resource.resources.models import DeploymentMode return DeploymentMode - - -def get_subscription_hash() -> str: - """Returns user's subsciprion ID specific hash.""" - subscription_id = get_subscription_id() - hash_obj = hashlib.md5(subscription_id.encode('utf-8')) - subscription_hash = hash_obj.hexdigest()[:_SUBSCRIPTION_HASH_LENGTH] - return subscription_hash diff --git a/sky/data/storage.py b/sky/data/storage.py index 6c28cd5d4d9..e6588d7a38d 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1,5 +1,6 @@ """Storage and Store Classes for Sky Data.""" import enum +import hashlib import os import re import shlex @@ -1947,7 +1948,10 @@ class AzureBlobStore(AbstractStore): # subscription, storage account names must be globally unique across all of # Azure users. Hence, the storage account name includes the subscription # hash as well to ensure its uniqueness. - DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}{subscription_hash}' + DEFAULT_STORAGE_ACCOUNT_NAME = ( + 'sky{region_hash}{user_hash}{subscription_hash}') + _SUBSCRIPTION_HASH_LENGTH = 4 + _REGION_HASH_LENGTH = 4 class AzureBlobStoreMetadata(AbstractStore.StoreMetadata): """A pickle-able representation of Azure Blob Store. @@ -2160,6 +2164,37 @@ def initialize(self): # If is_sky_managed is specified, then we take no action. self.is_sky_managed = is_new_bucket + @staticmethod + def get_default_storage_account_name(region) -> str: + """Generates a default storage account name. + + The subscription ID is included to avoid conflicts when user switches + subscriptions. The region value is hashed to ensure the storage account + name adheres to the 24-character limit, as some region names can be + very long. Using a 4-character hash for the region helps keep the name + concise and prevents potential conflicts. + Reference: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftstorage # pylint: disable=line-too-long + + Args: + region: Name of the region to create the storage account/container. + + Returns: + Name of the default storage account. + """ + subscription_id = azure.get_subscription_id() + subscription_hash_obj = hashlib.md5(subscription_id.encode('utf-8')) + subscription_hash = subscription_hash_obj.hexdigest()[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] + region_hash_obj = hashlib.md5(region.encode('utf-8')) + region_hash = region_hash_obj.hexdigest()[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] + + storage_account_name = ( + AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( + region_hash=region_hash, + user_hash=common_utils.get_user_hash(), + subscription_hash=subscription_hash)) + + return storage_account_name + def _get_storage_account_and_resource_group( self) -> Tuple[str, Optional[str]]: """Get storage account and resource group to be used for AzureBlobStore @@ -2247,11 +2282,8 @@ def _get_storage_account_and_resource_group( # account. This is necessary because the user may switch # between different subscription IDs while using the same # machine. - storage_account_name = ( - self.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=self.region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_account_name = self.get_default_storage_account_name( + self.region) resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format( user_hash=common_utils.get_user_hash())) try: diff --git a/tests/test_smoke.py b/tests/test_smoke.py index b15f639d073..33e6db52912 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1105,10 +1105,7 @@ def test_azure_storage_mounts_with_stop(): storage_name = f'sky-test-{int(time.time())}' default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name(default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) template_str = pathlib.Path( @@ -2976,8 +2973,8 @@ def test_managed_jobs_storage(generic_cloud: str): region = 'westus2' region_flag = f' --region {region}' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=region, user_hash=common_utils.get_user_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + region)) region_cmd = TestStorageWithCredentials.cli_region_cmd( storage_lib.StoreType.AZURE, storage_account_name=storage_account_name) @@ -4273,10 +4270,8 @@ def cli_delete_cmd(store_type, if store_type == storage_lib.StoreType.AZURE: default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage container delete ' @@ -4313,10 +4308,8 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''): storage_account_name = config_storage_account if ( config_storage_account is not None ) else ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) list_cmd = ('az storage blob list ' @@ -4378,10 +4371,8 @@ def cli_count_name_in_bucket(store_type, if storage_account_name is None: default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME. - format(region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage blob list ' @@ -4407,10 +4398,8 @@ def cli_count_file_in_bucket(store_type, bucket_name): elif store_type == storage_lib.StoreType.AZURE: default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) return ('az storage blob list ' @@ -4612,10 +4601,8 @@ def tmp_az_bucket(self, tmp_bucket_name): # Creates a temporary bucket using gsutil default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( - region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) bucket_uri = data_utils.AZURE_CONTAINER_URL.format( @@ -4851,10 +4838,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url): elif nonexist_bucket_url.startswith('https'): default_region = 'eastus' storage_account_name = ( - storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME. - format(region=default_region, - user_hash=common_utils.get_user_hash(), - subscription_hash=azure.get_subscription_hash())) + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) command = f'az storage container exists --account-name {storage_account_name} --account-key {storage_account_key} --name {nonexist_bucket_name}' From 916143b0655bd09ca14147a37326c8f9c8578e22 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 01:39:41 +0000 Subject: [PATCH 06/10] nit --- sky/data/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index e6588d7a38d..53aaf67b40d 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -2165,7 +2165,7 @@ def initialize(self): self.is_sky_managed = is_new_bucket @staticmethod - def get_default_storage_account_name(region) -> str: + def get_default_storage_account_name(region: str) -> str: """Generates a default storage account name. The subscription ID is included to avoid conflicts when user switches From 9d3a41426b331baad237f5127c4ec038f914eb60 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 01:55:03 +0000 Subject: [PATCH 07/10] format --- sky/data/storage.py | 17 ++++++++++------- tests/test_smoke.py | 14 ++++++-------- 2 files changed, 16 insertions(+), 15 deletions(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index 53aaf67b40d..beca625f874 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1985,7 +1985,7 @@ def __init__(self, name: str, source: str, storage_account_name: str = '', - region: Optional[str] = None, + region: Optional[str] = 'eastus', is_sky_managed: Optional[bool] = None, sync_on_reconstruction: bool = True): self.storage_client: 'storage.Client' @@ -2165,9 +2165,9 @@ def initialize(self): self.is_sky_managed = is_new_bucket @staticmethod - def get_default_storage_account_name(region: str) -> str: + def get_default_storage_account_name(region: Optional[str]) -> str: """Generates a default storage account name. - + The subscription ID is included to avoid conflicts when user switches subscriptions. The region value is hashed to ensure the storage account name adheres to the 24-character limit, as some region names can be @@ -2181,18 +2181,21 @@ def get_default_storage_account_name(region: str) -> str: Returns: Name of the default storage account. """ + assert region is not None subscription_id = azure.get_subscription_id() subscription_hash_obj = hashlib.md5(subscription_id.encode('utf-8')) - subscription_hash = subscription_hash_obj.hexdigest()[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] + subscription_hash = subscription_hash_obj.hexdigest( + )[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] region_hash_obj = hashlib.md5(region.encode('utf-8')) - region_hash = region_hash_obj.hexdigest()[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] - + region_hash = region_hash_obj.hexdigest()[:AzureBlobStore. + _SUBSCRIPTION_HASH_LENGTH] + storage_account_name = ( AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( region_hash=region_hash, user_hash=common_utils.get_user_hash(), subscription_hash=subscription_hash)) - + return storage_account_name def _get_storage_account_and_resource_group( diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 33e6db52912..d5fcf159111 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1104,8 +1104,8 @@ def test_azure_storage_mounts_with_stop(): cloud = 'azure' storage_name = f'sky-test-{int(time.time())}' default_region = 'eastus' - storage_account_name = ( - storage_lib.AzureBlobStore.get_default_storage_account_name(default_region)) + storage_account_name = (storage_lib.AzureBlobStore. + get_default_storage_account_name(default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) template_str = pathlib.Path( @@ -2973,8 +2973,7 @@ def test_managed_jobs_storage(generic_cloud: str): region = 'westus2' region_flag = f' --region {region}' storage_account_name = ( - storage_lib.AzureBlobStore.get_default_storage_account_name( - region)) + storage_lib.AzureBlobStore.get_default_storage_account_name(region)) region_cmd = TestStorageWithCredentials.cli_region_cmd( storage_lib.StoreType.AZURE, storage_account_name=storage_account_name) @@ -4306,10 +4305,9 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''): config_storage_account = skypilot_config.get_nested( ('azure', 'storage_account'), None) storage_account_name = config_storage_account if ( - config_storage_account is not None - ) else ( - storage_lib.AzureBlobStore.get_default_storage_account_name( - default_region)) + config_storage_account is not None) else ( + storage_lib.AzureBlobStore.get_default_storage_account_name( + default_region)) storage_account_key = data_utils.get_az_storage_account_key( storage_account_name) list_cmd = ('az storage blob list ' From de0ee07505fd0f3e65a92e9bea81ae4fa3e0baf4 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 02:10:09 +0000 Subject: [PATCH 08/10] nit --- sky/data/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index beca625f874..842f8a3a093 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -2188,7 +2188,7 @@ def get_default_storage_account_name(region: Optional[str]) -> str: )[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH] region_hash_obj = hashlib.md5(region.encode('utf-8')) region_hash = region_hash_obj.hexdigest()[:AzureBlobStore. - _SUBSCRIPTION_HASH_LENGTH] + _REGION_HASH_LENGTH] storage_account_name = ( AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format( From a8390873507926bf76417ce73496f4ea15f527d8 Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 17:35:54 +0000 Subject: [PATCH 09/10] comment update --- sky/data/storage.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index 842f8a3a093..3bdab6fd152 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -2166,12 +2166,13 @@ def initialize(self): @staticmethod def get_default_storage_account_name(region: Optional[str]) -> str: - """Generates a default storage account name. + """Generates a unique default storage account name. The subscription ID is included to avoid conflicts when user switches - subscriptions. The region value is hashed to ensure the storage account - name adheres to the 24-character limit, as some region names can be - very long. Using a 4-character hash for the region helps keep the name + subscriptions. The length of region_hash, user_hash, and + subscription_hash are adjusted to ensure the storage account name + adheres to the 24-character limit, as some region names can be very + long. Using a 4-character hash for the region helps keep the name concise and prevents potential conflicts. Reference: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftstorage # pylint: disable=line-too-long @@ -2281,10 +2282,6 @@ def _get_storage_account_and_resource_group( else: # If storage account name is not provided from config, then # use default resource group and storage account names. - # To ensure uniqueness, we must take the subscription ID into - # account. This is necessary because the user may switch - # between different subscription IDs while using the same - # machine. storage_account_name = self.get_default_storage_account_name( self.region) resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format( From 3fd5387fb64bde6c0d63013513f21e3b9210a7dd Mon Sep 17 00:00:00 2001 From: Doyoung Kim Date: Sat, 17 Aug 2024 17:36:04 +0000 Subject: [PATCH 10/10] nit --- sky/adaptors/azure.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sky/adaptors/azure.py b/sky/adaptors/azure.py index 3bd9f2fc630..0cadb0b2bd8 100644 --- a/sky/adaptors/azure.py +++ b/sky/adaptors/azure.py @@ -28,7 +28,6 @@ _session_creation_lock = threading.RLock() _MAX_RETRY_FOR_GET_SUBSCRIPTION_ID = 5 -_SUBSCRIPTION_HASH_LENGTH = 4 @common.load_lazy_modules(modules=_LAZY_MODULES)