Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Azure][Storage] Update default storage account naming with subscription id hash #3796

10 changes: 10 additions & 0 deletions sky/adaptors/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import asyncio
import datetime
import functools
import hashlib
import logging
import threading
import time
Expand All @@ -28,6 +29,7 @@

_session_creation_lock = threading.RLock()
_MAX_RETRY_FOR_GET_SUBSCRIPTION_ID = 5
_SUBSCRIPTION_HASH_LENGTH = 4


@common.load_lazy_modules(modules=_LAZY_MODULES)
Expand Down Expand Up @@ -459,3 +461,11 @@ def deployment_mode():
"""Azure deployment mode."""
from azure.mgmt.resource.resources.models import DeploymentMode
return DeploymentMode


def get_subscription_hash() -> str:
"""Returns user's subsciprion ID specific hash."""
landscapepainter marked this conversation as resolved.
Show resolved Hide resolved
subscription_id = get_subscription_id()
hash_obj = hashlib.md5(subscription_id.encode('utf-8'))
subscription_hash = hash_obj.hexdigest()[:_SUBSCRIPTION_HASH_LENGTH]
return subscription_hash
13 changes: 11 additions & 2 deletions sky/data/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1942,8 +1942,12 @@ class AzureBlobStore(AbstractStore):
"""Represents the backend for Azure Blob Storage Container."""

_ACCESS_DENIED_MESSAGE = 'Access Denied'
DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}'
DEFAULT_RESOURCE_GROUP_NAME = 'sky{user_hash}'
# Unlike resource group names, which only need to be unique within the
# subscription, storage account names must be globally unique across all of
# Azure users. Hence, the storage account name includes the subscription
# hash as well to ensure its uniqueness.
DEFAULT_STORAGE_ACCOUNT_NAME = 'sky{region}{user_hash}{subscription_hash}'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks like azure allows upto 24 characters for account name. With a 4 char for subscription hash, 8 chars for user hash and 3 chars for sky, we are getting dangerously to the limit (9 characters left). E.g., using region southcentralus (14 chars) may cause Azure to return an error.

Can you check if we need to shorten it? If so, perhaps we can truncate the user hash and subscription has to 2 characters each?

Copy link
Collaborator Author

@landscapepainter landscapepainter Aug 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for catching this. We indeed have to shorten this. It seems like the longest region name is australiasoutheast with 18 chars. So keeping subscription hash and user hash to 2 characters each would not work as the length becomes 25 with the prefix, sky. Also using 1 character for either subscription hash or user hash seemed to be concerning as well. So I hashed the region and kept it to have length of 4. So this default storage account name will have a fixed length of 19 characters now.


class AzureBlobStoreMetadata(AbstractStore.StoreMetadata):
"""A pickle-able representation of Azure Blob Store.
Expand Down Expand Up @@ -2239,10 +2243,15 @@ def _get_storage_account_and_resource_group(
else:
# If storage account name is not provided from config, then
# use default resource group and storage account names.
# To ensure uniqueness, we must take the subscription ID into
# account. This is necessary because the user may switch
# between different subscription IDs while using the same
# machine.
storage_account_name = (
self.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=self.region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
resource_group_name = (self.DEFAULT_RESOURCE_GROUP_NAME.format(
user_hash=common_utils.get_user_hash()))
try:
Expand Down
24 changes: 17 additions & 7 deletions tests/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from sky import jobs
from sky import serve
from sky import skypilot_config
from sky.adaptors import azure
from sky.adaptors import cloudflare
from sky.adaptors import ibm
from sky.clouds import AWS
Expand Down Expand Up @@ -1105,7 +1106,9 @@ def test_azure_storage_mounts_with_stop():
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region, user_hash=common_utils.get_user_hash()))
region=default_region,
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
template_str = pathlib.Path(
Expand Down Expand Up @@ -4272,7 +4275,8 @@ def cli_delete_cmd(store_type,
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage container delete '
Expand Down Expand Up @@ -4311,7 +4315,8 @@ def cli_ls_cmd(store_type, bucket_name, suffix=''):
) else (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
list_cmd = ('az storage blob list '
Expand Down Expand Up @@ -4375,7 +4380,8 @@ def cli_count_name_in_bucket(store_type,
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
format(region=default_region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage blob list '
Expand Down Expand Up @@ -4403,7 +4409,8 @@ def cli_count_file_in_bucket(store_type, bucket_name):
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
return ('az storage blob list '
Expand Down Expand Up @@ -4606,7 +4613,9 @@ def tmp_az_bucket(self, tmp_bucket_name):
default_region = 'eastus'
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.format(
region=default_region, user_hash=common_utils.get_user_hash()))
region=default_region,
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
bucket_uri = data_utils.AZURE_CONTAINER_URL.format(
Expand Down Expand Up @@ -4844,7 +4853,8 @@ def test_nonexistent_bucket(self, nonexist_bucket_url):
storage_account_name = (
storage_lib.AzureBlobStore.DEFAULT_STORAGE_ACCOUNT_NAME.
format(region=default_region,
user_hash=common_utils.get_user_hash()))
user_hash=common_utils.get_user_hash(),
subscription_hash=azure.get_subscription_hash()))
storage_account_key = data_utils.get_az_storage_account_key(
storage_account_name)
command = f'az storage container exists --account-name {storage_account_name} --account-key {storage_account_key} --name {nonexist_bucket_name}'
Expand Down
Loading