Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

remove field _is_sky_managed for intermediate bucket #4545

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 23 additions & 26 deletions sky/data/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@
_STORAGE_LOG_FILE_NAME = 'storage_sync.log'


def _is_sky_managed_intermediate_bucket(bucket_name: str) -> bool:
return re.match(r'skypilot-filemounts-.+-[a-f0-9]{8}',
bucket_name) is not None
Comment on lines +81 to +83
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Basing on bucket name is not a robust solution. Can't we manually set the force_delete accordingly in maybe_translate...?

Copy link
Collaborator Author

@zpoint zpoint Jan 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_force_delete is already True in our cases. The controller will call delete anyway; it just doesn’t know whether the bucket is sky_managed. Should it delete the entire bucket or just the sub_path



def get_cached_enabled_storage_clouds_or_refresh(
raise_if_no_cloud_access: bool = False) -> List[str]:
# This is a temporary solution until https://github.com/skypilot-org/skypilot/issues/1943 # pylint: disable=line-too-long
Expand Down Expand Up @@ -550,8 +555,6 @@ def __init__(
mode: StorageMode = StorageMode.MOUNT,
sync_on_reconstruction: bool = True,
# pylint: disable=invalid-name
_is_sky_managed: Optional[bool] = None,
# pylint: disable=invalid-name
_bucket_sub_path: Optional[str] = None
) -> None:
"""Initializes a Storage object.
Expand Down Expand Up @@ -591,16 +594,6 @@ def __init__(
there. This is set to false when the Storage object is created not
for direct use, e.g. for 'sky storage delete', or the storage is
being re-used, e.g., for `sky start` on a stopped cluster.
_is_sky_managed: Optional[bool]; Indicates if the storage is managed
by Sky. Without this argument, the controller's behavior differs
from the local machine. For example, if a bucket does not exist:
Local Machine (is_sky_managed=True) →
Controller (is_sky_managed=False).
With this argument, the controller aligns with the local machine,
ensuring it retains the is_sky_managed information from the YAML.
During teardown, if is_sky_managed is True, the controller should
delete the bucket. Otherwise, it might mistakenly delete only the
sub-path, assuming is_sky_managed is False.
_bucket_sub_path: Optional[str]; The subdirectory to use for the
storage object.
"""
Expand All @@ -610,7 +603,6 @@ def __init__(
self.mode = mode
assert mode in StorageMode
self.sync_on_reconstruction = sync_on_reconstruction
self._is_sky_managed = _is_sky_managed
self._bucket_sub_path = _bucket_sub_path

# TODO(romilb, zhwu): This is a workaround to support storage deletion
Expand Down Expand Up @@ -1024,7 +1016,6 @@ def add_store(self,
source=self.source,
region=region,
sync_on_reconstruction=self.sync_on_reconstruction,
is_sky_managed=self._is_sky_managed,
_bucket_sub_path=self._bucket_sub_path)
except exceptions.StorageBucketCreateError:
# Creation failed, so this must be sky managed store. Add failure
Expand Down Expand Up @@ -1157,8 +1148,6 @@ def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
mode_str = config.pop('mode', None)
force_delete = config.pop('_force_delete', None)
# pylint: disable=invalid-name
_is_sky_managed = config.pop('_is_sky_managed', None)
# pylint: disable=invalid-name
_bucket_sub_path = config.pop('_bucket_sub_path', None)
if force_delete is None:
force_delete = False
Expand All @@ -1180,7 +1169,6 @@ def from_yaml_config(cls, config: Dict[str, Any]) -> 'Storage':
source=source,
persistent=persistent,
mode=mode,
_is_sky_managed=_is_sky_managed,
_bucket_sub_path=_bucket_sub_path)
if store is not None:
storage_obj.add_store(StoreType(store.upper()))
Expand All @@ -1205,12 +1193,9 @@ def add_if_not_none(key: str, value: Optional[Any]):
add_if_not_none('source', self.source)

stores = None
is_sky_managed = self._is_sky_managed
if self.stores:
stores = ','.join([store.value for store in self.stores])
is_sky_managed = list(self.stores.values())[0].is_sky_managed
add_if_not_none('store', stores)
add_if_not_none('_is_sky_managed', is_sky_managed)
add_if_not_none('persistent', self.persistent)
add_if_not_none('mode', self.mode.value)
if self.force_delete:
Expand Down Expand Up @@ -1384,7 +1369,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))
Comment on lines +1373 to +1374
Copy link
Collaborator

@romilbhardwaj romilbhardwaj Jan 8, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_sky_managed should not be inferred on bucket name, it depends on whether the bucket is created by sky or not. Can we instead pass this information to the controller using force_delete, since this field is used mainly for deletion logic?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_force_delete is already True in our cases. The controller will call delete anyway; it just doesn’t know whether the bucket is sky_managed. Should it delete the entire bucket or just the sub_path


def upload(self):
"""Uploads source to store bucket.
Expand Down Expand Up @@ -1867,7 +1854,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))

def upload(self):
"""Uploads source to store bucket.
Expand Down Expand Up @@ -2442,7 +2431,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))

def _update_storage_account_name_and_resource(self):
self.storage_account_name, self.resource_group_name = (
Expand Down Expand Up @@ -3152,7 +3143,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))

def upload(self):
"""Uploads source to store bucket.
Expand Down Expand Up @@ -3631,7 +3624,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))

def upload(self):
"""Uploads files from local machine to bucket.
Expand Down Expand Up @@ -4075,7 +4070,9 @@ def initialize(self):
# object (i.e., did not exist in global_user_state) and we should
# set the is_sky_managed property.
# If is_sky_managed is specified, then we take no action.
self.is_sky_managed = is_new_bucket
self.is_sky_managed = (is_new_bucket or
_is_sky_managed_intermediate_bucket(
self.name))

def upload(self):
"""Uploads source to store bucket.
Expand Down
3 changes: 0 additions & 3 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,6 @@ def get_storage_schema():
mode.value for mode in storage.StorageMode
]
},
'_is_sky_managed': {
'type': 'boolean',
},
'_bucket_sub_path': {
'type': 'string',
},
Expand Down
Loading