Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Azure][Storage] Update default storage account naming with subscription id hash #3796

53 changes: 47 additions & 6 deletions sky/data/storage.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Storage and Store Classes for Sky Data."""
import enum
import hashlib
import os
import re
import shlex
Expand Down Expand Up @@ -1942,8 +1943,15 @@ class AzureBlobStore(AbstractStore):
"""Represents the backend for Azure Blob Storage Container."""

_ACCESS_DENIED_MESSAGE = 'Access Denied'
DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}'
DEFAULT_RESOURCE_GROUP_NAME = 'sky{user_hash}'
# Unlike resource group names, which only need to be unique within the
# subscription, storage account names must be globally unique across all of
# Azure users. Hence, the storage account name includes the subscription
# hash as well to ensure its uniqueness.
DEFAULT_STORAGE_ACCOUNT_NAME = (
'sky{region_hash}{user_hash}{subscription_hash}')
_SUBSCRIPTION_HASH_LENGTH = 4
_REGION_HASH_LENGTH = 4

class AzureBlobStoreMetadata(AbstractStore.StoreMetadata):
"""A pickle-able representation of Azure Blob Store.
Expand Down Expand Up @@ -1977,7 +1985,7 @@ def __init__(self,
name: str,
source: str,
storage_account_name: str = '',
region: Optional[str] = None,
region: Optional[str] = 'eastus',
is_sky_managed: Optional[bool] = None,
sync_on_reconstruction: bool = True):
self.storage_client: 'storage.Client'
Expand Down Expand Up @@ -2156,6 +2164,41 @@ def initialize(self):
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket

@staticmethod
def get_default_storage_account_name(region: Optional[str]) -> str:
"""Generates a unique default storage account name.

The subscription ID is included to avoid conflicts when user switches
subscriptions. The length of region_hash, user_hash, and
subscription_hash are adjusted to ensure the storage account name
adheres to the 24-character limit, as some region names can be very
long. Using a 4-character hash for the region helps keep the name
concise and prevents potential conflicts.
Reference: https://learn.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftstorage # pylint: disable=line-too-long

Args:
region: Name of the region to create the storage account/container.

Returns:
Name of the default storage account.
"""
assert region is not None
subscription_id = azure.get_subscription_id()
subscription_hash_obj = hashlib.md5(subscription_id.encode('utf-8'))
subscription_hash = subscription_hash_obj.hexdigest(
)[:AzureBlobStore._SUBSCRIPTION_HASH_LENGTH]
region_hash_obj = hashlib.md5(region.encode('utf-8'))
region_hash = region_hash_obj.hexdigest()[:AzureBlobStore.
_REGION_HASH_LENGTH]

storage_account_name = (
AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region_hash=region_hash,
user_hash=common_utils.get_user_hash(),
subscription_hash=subscription_hash))

return storage_account_name

def _get_storage_account_and_resource_group(
self) -> Tuple[str, Optional[str]]:
"""Get storage account and resource group to be used for AzureBlobStore
Expand Down Expand Up @@ -2239,10 +2282,8 @@ def _get_storage_account_and_resource_group(
else:
# If storage account name is not provided from config, then
# use default resource group and storage account names.
storage_account_name = (
self.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=self.region,
user_hash=common_utils.get_user_hash()))
storage_account_name = self.get_default_storage_account_name(
self.region)
resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format(
user_hash=common_utils.get_user_hash()))
try:
Expand Down
41 changes: 17 additions & 24 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from sky import jobs
from sky import serve
from sky import skypilot_config
from sky.adaptors import azure
from sky.adaptors import cloudflare
from sky.adaptors import ibm
from sky.clouds import AWS
Expand Down Expand Up @@ -1103,9 +1104,8 @@ def test_azure_storage_mounts_with_stop():
cloud = 'azure'
storage_name = f'sky-test-{int(time.time())}'
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region, user_hash=common_utils.get_user_hash()))
storage_account_name = (storage_lib.AzureBlobStore.
get_default_storage_account_name(default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
template_str = pathlib.Path(
Expand Down Expand Up @@ -2973,8 +2973,7 @@ def test_managed_jobs_storage(generic_cloud: str):
region = 'westus2'
region_flag = f' --region {region}'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=region, user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(region))
region_cmd = TestStorageWithCredentials.cli_region_cmd(
storage_lib.StoreType.AZURE,
storage_account_name=storage_account_name)
Expand Down Expand Up @@ -4270,9 +4269,8 @@ def cli_delete_cmd(store_type,
if store_type == storage_lib.StoreType.AZURE:
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage container delete '
Expand Down Expand Up @@ -4307,11 +4305,9 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''):
config_storage_account = skypilot_config.get_nested(
('azure', 'storage_account'), None)
storage_account_name = config_storage_account if (
config_storage_account is not None
) else (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
config_storage_account is not None) else (
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
list_cmd = ('az storage blob list '
Expand Down Expand Up @@ -4373,9 +4369,8 @@ def cli_count_name_in_bucket(store_type,
if storage_account_name is None:
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
format(region=default_region,
user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage blob list '
Expand All @@ -4401,9 +4396,8 @@ def cli_count_file_in_bucket(store_type, bucket_name):
elif store_type == storage_lib.StoreType.AZURE:
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage blob list '
Expand Down Expand Up @@ -4605,8 +4599,8 @@ def tmp_az_bucket(self, tmp_bucket_name):
# Creates a temporary bucket using gsutil
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region, user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
bucket_uri = data_utils.AZURE_CONTAINER_URL.format(
Expand Down Expand Up @@ -4842,9 +4836,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url):
elif nonexist_bucket_url.startswith('https'):
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
format(region=default_region,
user_hash=common_utils.get_user_hash()))
storage_lib.AzureBlobStore.get_default_storage_account_name(
default_region))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
command = f'az storage container exists --account-name {storage_account_name} --account-key {storage_account_key} --name {nonexist_bucket_name}'
Expand Down
Loading