From 51bc021a27b188cd22bd1901d4ab69b2a942b3b5 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Tue, 10 Dec 2024 16:36:36 +0100 Subject: [PATCH 01/22] fix file deletion --- .../simcore_service_storage/simcore_s3_dsm.py | 47 ++++++++++++++----- .../storage/tests/unit/test_handlers_files.py | 21 +++++++++ 2 files changed, 56 insertions(+), 12 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index b6e7e57f1cc..298b7e7d7ab 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -499,6 +499,18 @@ async def __get_link( return link + def find_enclosing(self, file_id: str, kwnown_file_ids: list[str]) -> str | None: + current_path = Path(file_id) + kwnon_file_paths = [Path(file_id) for file_id in kwnown_file_ids] + + while current_path and current_path != Path(current_path).parent: + if current_path in kwnon_file_paths: + return f"{current_path}" + + current_path = Path(current_path).parent + + return None + async def delete_file( self, user_id: UserID, @@ -523,22 +535,33 @@ async def delete_file( if not can.delete: raise FileAccessRightError(access_right="delete", file_id=file_id) - with suppress(FileMetaDataNotFoundError): - # NOTE: deleting might be slow, so better ensure we release the connection - async with self.engine.acquire() as conn: - file: FileMetaDataAtDB = await db_file_meta_data.get( - conn, TypeAdapter(SimcoreS3FileID).validate_python(file_id) - ) + # NOTE: deleting might be slow, so better ensure we release the connection + async with self.engine.acquire() as conn: + enclosing_file_id = self.find_enclosing( + file_id, + [ + known_file.file_id + for known_file in await db_file_meta_data.list_fmds( + conn, user_id=user_id + ) + ], + ) + + enclosing_file = await db_file_meta_data.get( + conn, TypeAdapter(SimcoreS3FileID).validate_python(enclosing_file_id) + ) + + if await db_file_meta_data.exists(conn, file_id): + await db_file_meta_data.delete(conn, [file_id]) + await get_s3_client(self.app).delete_objects_recursively( - bucket=file.bucket_name, + bucket=enclosing_file.bucket_name, prefix=( - ensure_ends_with(file.file_id, "/") - if file.is_directory - else file.file_id + ensure_ends_with(enclosing_file.file_id, "/") + if enclosing_file.is_directory and enclosing_file_id == file_id + else file_id ), ) - async with self.engine.acquire() as conn: - await db_file_meta_data.delete(conn, [file.file_id]) async def delete_project_simcore_s3( self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None diff --git a/services/storage/tests/unit/test_handlers_files.py b/services/storage/tests/unit/test_handlers_files.py index f9fc415d86a..ced8e4bc8a4 100644 --- a/services/storage/tests/unit/test_handlers_files.py +++ b/services/storage/tests/unit/test_handlers_files.py @@ -1386,6 +1386,27 @@ async def test_upload_file_is_directory_and_remove_content( ) assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT + # DELETE ONE FILE FROM THE DIRECTORY + + assert client.app + delete_url = ( + client.app.router["delete_file"] + .url_for( + location_id=f"{location_id}", + file_id=urllib.parse.quote(list_of_files[0].file_id, safe=""), + ) + .with_query(user_id=user_id) + ) + response = await client.delete(f"{delete_url}") + _, error = await assert_status(response, status.HTTP_204_NO_CONTENT) + assert error is None + + list_of_files: list[FileMetaDataGet] = await _list_files_legacy( + client, user_id, location_id, directory_file_upload + ) + + assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - 1 + # DIRECTORY REMOVAL await delete_directory(directory_file_upload=directory_file_upload) From d612a58b7e85e0f1e5e547a8200f56a14870745b Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 10:29:00 +0100 Subject: [PATCH 02/22] update implementation --- .../simcore_service_storage/simcore_s3_dsm.py | 45 +++++++------------ 1 file changed, 15 insertions(+), 30 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 298b7e7d7ab..0deb0686bc4 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -499,13 +499,17 @@ async def __get_link( return link - def find_enclosing(self, file_id: str, kwnown_file_ids: list[str]) -> str | None: + async def __find_enclosing_file( + self, conn: SAConnection, user_id: UserID, file_id: SimcoreS3FileID + ) -> FileMetaDataAtDB | None: + kwnon_files = { + Path(known_file.file_id): known_file + for known_file in await db_file_meta_data.list_fmds(conn, user_id=user_id) + } current_path = Path(file_id) - kwnon_file_paths = [Path(file_id) for file_id in kwnown_file_ids] - while current_path and current_path != Path(current_path).parent: - if current_path in kwnon_file_paths: - return f"{current_path}" + if current_path in kwnon_files: + return kwnon_files.get(current_path) current_path = Path(current_path).parent @@ -535,33 +539,14 @@ async def delete_file( if not can.delete: raise FileAccessRightError(access_right="delete", file_id=file_id) - # NOTE: deleting might be slow, so better ensure we release the connection - async with self.engine.acquire() as conn: - enclosing_file_id = self.find_enclosing( - file_id, - [ - known_file.file_id - for known_file in await db_file_meta_data.list_fmds( - conn, user_id=user_id - ) - ], - ) - - enclosing_file = await db_file_meta_data.get( - conn, TypeAdapter(SimcoreS3FileID).validate_python(enclosing_file_id) - ) - - if await db_file_meta_data.exists(conn, file_id): + enclosing_file = await self.__find_enclosing_file(conn, user_id, file_id) + if enclosing_file and enclosing_file.file_id == file_id: await db_file_meta_data.delete(conn, [file_id]) - await get_s3_client(self.app).delete_objects_recursively( - bucket=enclosing_file.bucket_name, - prefix=( - ensure_ends_with(enclosing_file.file_id, "/") - if enclosing_file.is_directory and enclosing_file_id == file_id - else file_id - ), - ) + await get_s3_client(self.app).delete_objects_recursively( + bucket=self.simcore_bucket_name, + prefix=file_id, + ) async def delete_project_simcore_s3( self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None From 325ae15f309c9d5ce5339767bc2186b6abaaa991 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 11:26:06 +0100 Subject: [PATCH 03/22] fix pylint --- .../simcore_service_storage/simcore_s3_dsm.py | 24 +++++-------------- .../simcore_s3_dsm_utils.py | 19 ++++++++++++++- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 0deb0686bc4..fab5b233477 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -79,7 +79,11 @@ from .s3 import get_s3_client from .s3_utils import S3TransferDataCB, update_task_progress from .settings import Settings -from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id +from .simcore_s3_dsm_utils import ( + expand_directory, + find_enclosing_file, + get_directory_file_id, +) from .utils import ( convert_db_to_model, download_to_file_or_raise, @@ -499,22 +503,6 @@ async def __get_link( return link - async def __find_enclosing_file( - self, conn: SAConnection, user_id: UserID, file_id: SimcoreS3FileID - ) -> FileMetaDataAtDB | None: - kwnon_files = { - Path(known_file.file_id): known_file - for known_file in await db_file_meta_data.list_fmds(conn, user_id=user_id) - } - current_path = Path(file_id) - while current_path and current_path != Path(current_path).parent: - if current_path in kwnon_files: - return kwnon_files.get(current_path) - - current_path = Path(current_path).parent - - return None - async def delete_file( self, user_id: UserID, @@ -539,7 +527,7 @@ async def delete_file( if not can.delete: raise FileAccessRightError(access_right="delete", file_id=file_id) - enclosing_file = await self.__find_enclosing_file(conn, user_id, file_id) + enclosing_file = await find_enclosing_file(conn, user_id, file_id) if enclosing_file and enclosing_file.file_id == file_id: await db_file_meta_data.delete(conn, [file_id]) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index e4a58549e31..e88d2931b27 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -14,7 +14,7 @@ from . import db_file_meta_data from .exceptions import FileMetaDataNotFoundError -from .models import FileMetaData, FileMetaDataAtDB +from .models import FileMetaData, FileMetaDataAtDB, UserID from .utils import convert_db_to_model @@ -119,3 +119,20 @@ async def _get_fmd( directory_file_id_fmd = await _get_fmd(conn, directory_file_id) return directory_file_id if directory_file_id_fmd else None + + +async def find_enclosing_file( + conn: SAConnection, user_id: UserID, file_id: SimcoreS3FileID +) -> FileMetaDataAtDB | None: + kwnon_files = { + Path(known_file.file_id): known_file + for known_file in await db_file_meta_data.list_fmds(conn, user_id=user_id) + } + current_path = Path(file_id) + while current_path and current_path != Path(current_path).parent: + if current_path in kwnon_files: + return kwnon_files.get(current_path) + + current_path = Path(current_path).parent + + return None From e6852f284ce91c2dfcba4b3be00927b90977e671 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 16:55:26 +0100 Subject: [PATCH 04/22] update enclosing --- .../src/simcore_service_storage/simcore_s3_dsm.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index fab5b233477..0bc0511bbc8 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -528,8 +528,13 @@ async def delete_file( raise FileAccessRightError(access_right="delete", file_id=file_id) enclosing_file = await find_enclosing_file(conn, user_id, file_id) - if enclosing_file and enclosing_file.file_id == file_id: - await db_file_meta_data.delete(conn, [file_id]) + if enclosing_file: + if enclosing_file.file_id == file_id: + await db_file_meta_data.delete(conn, [file_id]) + else: + await db_file_meta_data.upsert( + conn, self._update_database_from_storage(enclosing_file) + ) await get_s3_client(self.app).delete_objects_recursively( bucket=self.simcore_bucket_name, From cac2c8e60bd47ddbaa543426c509905b9f03f221 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 17:04:53 +0100 Subject: [PATCH 05/22] fix await --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 0bc0511bbc8..9d170662eb1 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -533,7 +533,7 @@ async def delete_file( await db_file_meta_data.delete(conn, [file_id]) else: await db_file_meta_data.upsert( - conn, self._update_database_from_storage(enclosing_file) + conn, await self._update_database_from_storage(enclosing_file) ) await get_s3_client(self.app).delete_objects_recursively( From 21cf5a57d60bf29d3e64349fda70dbe061c226f3 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 18:48:46 +0100 Subject: [PATCH 06/22] fix typo --- .../src/simcore_service_storage/simcore_s3_dsm_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index e88d2931b27..5b229927f42 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -124,14 +124,14 @@ async def _get_fmd( async def find_enclosing_file( conn: SAConnection, user_id: UserID, file_id: SimcoreS3FileID ) -> FileMetaDataAtDB | None: - kwnon_files = { + known_files = { Path(known_file.file_id): known_file for known_file in await db_file_meta_data.list_fmds(conn, user_id=user_id) } current_path = Path(file_id) while current_path and current_path != Path(current_path).parent: - if current_path in kwnon_files: - return kwnon_files.get(current_path) + if current_path in known_files: + return known_files.get(current_path) current_path = Path(current_path).parent From 551c68d79d056f68aed8abbd015e0187c40f632d Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Wed, 11 Dec 2024 18:50:36 +0100 Subject: [PATCH 07/22] set undefined file_size --- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 9d170662eb1..5f20d707459 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -532,9 +532,8 @@ async def delete_file( if enclosing_file.file_id == file_id: await db_file_meta_data.delete(conn, [file_id]) else: - await db_file_meta_data.upsert( - conn, await self._update_database_from_storage(enclosing_file) - ) + enclosing_file.file_size = UNDEFINED_SIZE_TYPE + await db_file_meta_data.upsert(conn, enclosing_file) await get_s3_client(self.app).delete_objects_recursively( bucket=self.simcore_bucket_name, From f5de42ba36f8197b1bfe96e0e6df354f93966830 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 10:21:55 +0100 Subject: [PATCH 08/22] enhance implementation --- .../simcore_service_storage/simcore_s3_dsm.py | 26 ++++++------ .../simcore_s3_dsm_utils.py | 42 ++++++------------- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 5f20d707459..7ae27bca8cd 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -79,11 +79,7 @@ from .s3 import get_s3_client from .s3_utils import S3TransferDataCB, update_task_progress from .settings import Settings -from .simcore_s3_dsm_utils import ( - expand_directory, - find_enclosing_file, - get_directory_file_id, -) +from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id, get_fmd from .utils import ( convert_db_to_model, download_to_file_or_raise, @@ -527,19 +523,23 @@ async def delete_file( if not can.delete: raise FileAccessRightError(access_right="delete", file_id=file_id) - enclosing_file = await find_enclosing_file(conn, user_id, file_id) - if enclosing_file: - if enclosing_file.file_id == file_id: - await db_file_meta_data.delete(conn, [file_id]) - else: - enclosing_file.file_size = UNDEFINED_SIZE_TYPE - await db_file_meta_data.upsert(conn, enclosing_file) - await get_s3_client(self.app).delete_objects_recursively( bucket=self.simcore_bucket_name, prefix=file_id, ) + async with self.engine.acquire() as conn: + file_fmd = await get_fmd(conn, file_id) + if file_fmd and not file_fmd.is_directory: + enclosing_dir_file_id = await get_directory_file_id(conn, file_id) + if enclosing_dir_file_id: + enclosing_dir_fmd = await db_file_meta_data.get( + conn, enclosing_dir_file_id + ) + enclosing_dir_fmd.file_size = -1 + await db_file_meta_data.upsert(conn, enclosing_dir_fmd) + await db_file_meta_data.delete(conn, [file_id]) + async def delete_project_simcore_s3( self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None ) -> None: diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index 5b229927f42..969367905bc 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -14,7 +14,7 @@ from . import db_file_meta_data from .exceptions import FileMetaDataNotFoundError -from .models import FileMetaData, FileMetaDataAtDB, UserID +from .models import FileMetaData, FileMetaDataAtDB from .utils import convert_db_to_model @@ -78,6 +78,16 @@ async def expand_directory( return result +async def get_fmd( + conn: SAConnection, s3_file_id: StorageFileID +) -> FileMetaDataAtDB | None: + with suppress(FileMetaDataNotFoundError): + return await db_file_meta_data.get( + conn, TypeAdapter(SimcoreS3FileID).validate_python(s3_file_id) + ) + return None + + def get_simcore_directory(file_id: SimcoreS3FileID) -> str: try: directory_id = SimcoreS3DirectoryID.from_simcore_s3_object(file_id) @@ -94,16 +104,7 @@ async def get_directory_file_id( in the `file_meta_data` table """ - async def _get_fmd( - conn: SAConnection, s3_file_id: StorageFileID - ) -> FileMetaDataAtDB | None: - with suppress(FileMetaDataNotFoundError): - return await db_file_meta_data.get( - conn, TypeAdapter(SimcoreS3FileID).validate_python(s3_file_id) - ) - return None - - provided_file_id_fmd = await _get_fmd(conn, file_id) + provided_file_id_fmd = await get_fmd(conn, file_id) if provided_file_id_fmd: # file_meta_data exists it is not a directory return None @@ -116,23 +117,6 @@ async def _get_fmd( directory_file_id = TypeAdapter(SimcoreS3FileID).validate_python( directory_file_id_str ) - directory_file_id_fmd = await _get_fmd(conn, directory_file_id) + directory_file_id_fmd = await get_fmd(conn, directory_file_id) return directory_file_id if directory_file_id_fmd else None - - -async def find_enclosing_file( - conn: SAConnection, user_id: UserID, file_id: SimcoreS3FileID -) -> FileMetaDataAtDB | None: - known_files = { - Path(known_file.file_id): known_file - for known_file in await db_file_meta_data.list_fmds(conn, user_id=user_id) - } - current_path = Path(file_id) - while current_path and current_path != Path(current_path).parent: - if current_path in known_files: - return known_files.get(current_path) - - current_path = Path(current_path).parent - - return None From 1e0c7d9ad05f3bb81e7a0a8be8abe081668cab4a Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 10:44:43 +0100 Subject: [PATCH 09/22] fix --- .../simcore_service_storage/simcore_s3_dsm.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 7ae27bca8cd..bd5f84149c0 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -531,14 +531,15 @@ async def delete_file( async with self.engine.acquire() as conn: file_fmd = await get_fmd(conn, file_id) if file_fmd and not file_fmd.is_directory: - enclosing_dir_file_id = await get_directory_file_id(conn, file_id) - if enclosing_dir_file_id: - enclosing_dir_fmd = await db_file_meta_data.get( - conn, enclosing_dir_file_id - ) - enclosing_dir_fmd.file_size = -1 - await db_file_meta_data.upsert(conn, enclosing_dir_fmd) - await db_file_meta_data.delete(conn, [file_id]) + await db_file_meta_data.delete(conn, [file_id]) + + enclosing_dir_file_id = await get_directory_file_id(conn, file_id) + if enclosing_dir_file_id: + enclosing_dir_fmd = await db_file_meta_data.get( + conn, enclosing_dir_file_id + ) + enclosing_dir_fmd.file_size = -1 + await db_file_meta_data.upsert(conn, enclosing_dir_fmd) async def delete_project_simcore_s3( self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None From c7d0946d44732a6c6bf1720d5a69ba2c4ee079d1 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 10:50:32 +0100 Subject: [PATCH 10/22] fix delete dirs --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index bd5f84149c0..3da4463ae6a 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -530,7 +530,7 @@ async def delete_file( async with self.engine.acquire() as conn: file_fmd = await get_fmd(conn, file_id) - if file_fmd and not file_fmd.is_directory: + if file_fmd: await db_file_meta_data.delete(conn, [file_id]) enclosing_dir_file_id = await get_directory_file_id(conn, file_id) From 5c710dc5f41c6fbe04cdbe20dd65d9035f7b02a5 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 11:05:56 +0100 Subject: [PATCH 11/22] add constant --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 3da4463ae6a..80257af388f 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -21,6 +21,7 @@ UploadedBytesTransferredCallback, ) from models_library.api_schemas_storage import ( + UNDEFINED_SIZE, UNDEFINED_SIZE_TYPE, LinkType, S3BucketName, @@ -538,7 +539,7 @@ async def delete_file( enclosing_dir_fmd = await db_file_meta_data.get( conn, enclosing_dir_file_id ) - enclosing_dir_fmd.file_size = -1 + enclosing_dir_fmd.file_size = UNDEFINED_SIZE await db_file_meta_data.upsert(conn, enclosing_dir_fmd) async def delete_project_simcore_s3( From 419005319b74958f779e1d633cd1b12cce1d9822 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 12:37:58 +0100 Subject: [PATCH 12/22] refine --- .../simcore_service_storage/simcore_s3_dsm.py | 17 ++++++----------- .../simcore_s3_dsm_utils.py | 6 +++--- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 80257af388f..34e9520eda6 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -80,7 +80,7 @@ from .s3 import get_s3_client from .s3_utils import S3TransferDataCB, update_task_progress from .settings import Settings -from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id, get_fmd +from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id, try_get_fmd from .utils import ( convert_db_to_model, download_to_file_or_raise, @@ -530,17 +530,12 @@ async def delete_file( ) async with self.engine.acquire() as conn: - file_fmd = await get_fmd(conn, file_id) - if file_fmd: + if await try_get_fmd(conn, file_id): await db_file_meta_data.delete(conn, [file_id]) - - enclosing_dir_file_id = await get_directory_file_id(conn, file_id) - if enclosing_dir_file_id: - enclosing_dir_fmd = await db_file_meta_data.get( - conn, enclosing_dir_file_id - ) - enclosing_dir_fmd.file_size = UNDEFINED_SIZE - await db_file_meta_data.upsert(conn, enclosing_dir_fmd) + if parent_dir_file_id := await get_directory_file_id(conn, file_id): + parent_dir_fmd = await db_file_meta_data.get(conn, parent_dir_file_id) + parent_dir_fmd.file_size = UNDEFINED_SIZE + await db_file_meta_data.upsert(conn, parent_dir_fmd) async def delete_project_simcore_s3( self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index 969367905bc..b5dd2cd5787 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -78,7 +78,7 @@ async def expand_directory( return result -async def get_fmd( +async def try_get_fmd( conn: SAConnection, s3_file_id: StorageFileID ) -> FileMetaDataAtDB | None: with suppress(FileMetaDataNotFoundError): @@ -104,7 +104,7 @@ async def get_directory_file_id( in the `file_meta_data` table """ - provided_file_id_fmd = await get_fmd(conn, file_id) + provided_file_id_fmd = await try_get_fmd(conn, file_id) if provided_file_id_fmd: # file_meta_data exists it is not a directory return None @@ -117,6 +117,6 @@ async def get_directory_file_id( directory_file_id = TypeAdapter(SimcoreS3FileID).validate_python( directory_file_id_str ) - directory_file_id_fmd = await get_fmd(conn, directory_file_id) + directory_file_id_fmd = await try_get_fmd(conn, directory_file_id) return directory_file_id if directory_file_id_fmd else None From 755a1a16ccd8694cd345e4831b15396ef3ce04e9 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 15:31:21 +0100 Subject: [PATCH 13/22] enhance impl --- .../db_file_meta_data.py | 10 +++---- .../simcore_service_storage/simcore_s3_dsm.py | 29 +++++++++++++++---- .../simcore_s3_dsm_utils.py | 23 +++++++-------- .../tests/unit/test_db_file_meta_data.py | 11 ++++--- .../storage/tests/unit/test_handlers_files.py | 21 ++++++++++++++ 5 files changed, 65 insertions(+), 29 deletions(-) diff --git a/services/storage/src/simcore_service_storage/db_file_meta_data.py b/services/storage/src/simcore_service_storage/db_file_meta_data.py index b742449ee00..8c2938d85bd 100644 --- a/services/storage/src/simcore_service_storage/db_file_meta_data.py +++ b/services/storage/src/simcore_service_storage/db_file_meta_data.py @@ -72,7 +72,7 @@ def _list_filter_with_partial_file_id_stmt( file_id_prefix: str | None, partial_file_id: str | None, sha256_checksum: SHA256Str | None, - only_files: bool, + is_directory: bool | None = None, limit: int | None = None, offset: int | None = None, ): @@ -98,8 +98,8 @@ def _list_filter_with_partial_file_id_stmt( conditions.append(file_meta_data.c.file_id.startswith(file_id_prefix)) if partial_file_id: conditions.append(file_meta_data.c.file_id.ilike(f"%{partial_file_id}%")) - if only_files: - conditions.append(file_meta_data.c.is_directory.is_(False)) + if is_directory is not None: + conditions.append(file_meta_data.c.is_directory.is_(is_directory)) if sha256_checksum: conditions.append(file_meta_data.c.sha256_checksum == sha256_checksum) @@ -119,7 +119,7 @@ async def list_filter_with_partial_file_id( file_id_prefix: str | None, partial_file_id: str | None, sha256_checksum: SHA256Str | None, - only_files: bool, + is_directory: bool | None = None, limit: int | None = None, offset: int | None = None, ) -> list[FileMetaDataAtDB]: @@ -129,7 +129,7 @@ async def list_filter_with_partial_file_id( file_id_prefix=file_id_prefix, partial_file_id=partial_file_id, sha256_checksum=sha256_checksum, - only_files=only_files, + is_directory=is_directory, limit=limit, offset=offset, ) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 34e9520eda6..791f0a1467c 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -80,7 +80,7 @@ from .s3 import get_s3_client from .s3_utils import S3TransferDataCB, update_task_progress from .settings import Settings -from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id, try_get_fmd +from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id from .utils import ( convert_db_to_model, download_to_file_or_raise, @@ -182,7 +182,6 @@ async def list_files( # noqa C901 ), file_id_prefix=None, partial_file_id=uuid_filter, - only_files=False, sha256_checksum=None, ) @@ -518,6 +517,11 @@ async def delete_file( # Only use this in those circumstances where a collaborator requires to delete a file (the current # permissions model will not allow him to do so, even though this is a legitimate action) # SEE https://github.com/ITISFoundation/osparc-simcore/issues/5159 + def _get_subpath(path: str, levels: int): + components = path.strip("/").split("/") + subpath = "/".join(components[:levels]) + return "/" + subpath if subpath else "/" + async with self.engine.acquire() as conn: if enforce_access_rights: can: AccessRights = await get_file_access_rights(conn, user_id, file_id) @@ -530,10 +534,23 @@ async def delete_file( ) async with self.engine.acquire() as conn: - if await try_get_fmd(conn, file_id): - await db_file_meta_data.delete(conn, [file_id]) - if parent_dir_file_id := await get_directory_file_id(conn, file_id): - parent_dir_fmd = await db_file_meta_data.get(conn, parent_dir_file_id) + try: + if await db_file_meta_data.get(conn, file_id): + await db_file_meta_data.delete(conn, [file_id]) + except FileMetaDataNotFoundError: + _logger.warning("File %s not found in database", file_id) + + if parent_dir_fmds := await db_file_meta_data.list_filter_with_partial_file_id( + conn, + user_or_project_filter=UserOrProjectFilter( + user_id=user_id, project_ids=[] + ), + file_id_prefix=_get_subpath(file_id, 2), + partial_file_id=None, + is_directory=True, + sha256_checksum=None, + ): + parent_dir_fmd = parent_dir_fmds[0] parent_dir_fmd.file_size = UNDEFINED_SIZE await db_file_meta_data.upsert(conn, parent_dir_fmd) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index b5dd2cd5787..e4a58549e31 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -78,16 +78,6 @@ async def expand_directory( return result -async def try_get_fmd( - conn: SAConnection, s3_file_id: StorageFileID -) -> FileMetaDataAtDB | None: - with suppress(FileMetaDataNotFoundError): - return await db_file_meta_data.get( - conn, TypeAdapter(SimcoreS3FileID).validate_python(s3_file_id) - ) - return None - - def get_simcore_directory(file_id: SimcoreS3FileID) -> str: try: directory_id = SimcoreS3DirectoryID.from_simcore_s3_object(file_id) @@ -104,7 +94,16 @@ async def get_directory_file_id( in the `file_meta_data` table """ - provided_file_id_fmd = await try_get_fmd(conn, file_id) + async def _get_fmd( + conn: SAConnection, s3_file_id: StorageFileID + ) -> FileMetaDataAtDB | None: + with suppress(FileMetaDataNotFoundError): + return await db_file_meta_data.get( + conn, TypeAdapter(SimcoreS3FileID).validate_python(s3_file_id) + ) + return None + + provided_file_id_fmd = await _get_fmd(conn, file_id) if provided_file_id_fmd: # file_meta_data exists it is not a directory return None @@ -117,6 +116,6 @@ async def get_directory_file_id( directory_file_id = TypeAdapter(SimcoreS3FileID).validate_python( directory_file_id_str ) - directory_file_id_fmd = await try_get_fmd(conn, directory_file_id) + directory_file_id_fmd = await _get_fmd(conn, directory_file_id) return directory_file_id if directory_file_id_fmd else None diff --git a/services/storage/tests/unit/test_db_file_meta_data.py b/services/storage/tests/unit/test_db_file_meta_data.py index da94c6a5eeb..d6c2684328f 100644 --- a/services/storage/tests/unit/test_db_file_meta_data.py +++ b/services/storage/tests/unit/test_db_file_meta_data.py @@ -31,7 +31,7 @@ def _check(func_smt, **kwargs): file_id_prefix=None, partial_file_id=None, sha256_checksum=None, - only_files=True, + is_directory=False, ) # WHERE file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC @@ -41,7 +41,7 @@ def _check(func_smt, **kwargs): file_id_prefix=None, partial_file_id=None, sha256_checksum=None, - only_files=True, + is_directory=False, ) # WHERE file_meta_data.user_id = '42' AND file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC @@ -53,7 +53,7 @@ def _check(func_smt, **kwargs): file_id_prefix=None, partial_file_id=None, sha256_checksum=None, - only_files=True, + is_directory=False, ) # WHERE (file_meta_data.user_id = '42' OR file_meta_data.project_id IN ('18d5'..., )) AND file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC @@ -65,7 +65,7 @@ def _check(func_smt, **kwargs): file_id_prefix=None, partial_file_id=None, sha256_checksum=None, - only_files=True, + is_directory=False, limit=10, offset=1, ) @@ -78,7 +78,6 @@ def _check(func_smt, **kwargs): file_id_prefix=None, partial_file_id="{project_id}/", sha256_checksum=None, - only_files=False, ) # As used in SimcoreS3DataManager.search_owned_files @@ -88,7 +87,7 @@ def _check(func_smt, **kwargs): file_id_prefix="api/", partial_file_id=None, sha256_checksum=faker.sha256(), - only_files=True, + is_directory=False, limit=10, offset=0, ) diff --git a/services/storage/tests/unit/test_handlers_files.py b/services/storage/tests/unit/test_handlers_files.py index ced8e4bc8a4..79893690e0d 100644 --- a/services/storage/tests/unit/test_handlers_files.py +++ b/services/storage/tests/unit/test_handlers_files.py @@ -1345,6 +1345,7 @@ async def test_upload_file_is_directory_and_remove_content( client: TestClient, location_id: LocationID, user_id: UserID, + faker: Faker, ): FILE_SIZE_IN_DIR = TypeAdapter(ByteSize).validate_python("1Mib") DIR_NAME = "some-dir" @@ -1407,6 +1408,26 @@ async def test_upload_file_is_directory_and_remove_content( assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - 1 + # DELETE NOT EXISTING + delete_url = ( + client.app.router["delete_file"] + .url_for( + location_id=f"{location_id}", + file_id=urllib.parse.quote( + "/".join(list_of_files[0].file_id.split("/")[:2]) + "/does_not_exist", + safe="", + ), + ) + .with_query(user_id=user_id) + ) + response = await client.delete(f"{delete_url}") + _, error = await assert_status(response, status.HTTP_204_NO_CONTENT) + assert error is None + + list_of_files: list[FileMetaDataGet] = await _list_files_legacy( + client, user_id, location_id, directory_file_upload + ) + # DIRECTORY REMOVAL await delete_directory(directory_file_upload=directory_file_upload) From cab5b553fee869a32ba8a27cdfda7591a9ce8b82 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 15:35:05 +0100 Subject: [PATCH 14/22] fix test --- .../simcore_service_storage/simcore_s3_dsm.py | 2 +- .../storage/tests/unit/test_handlers_files.py | 21 ++++++++++++------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 791f0a1467c..8470940d051 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -752,7 +752,7 @@ async def search_owned_files( ), file_id_prefix=file_id_prefix, partial_file_id=None, - only_files=True, + is_directory=False, sha256_checksum=sha256_checksum, limit=limit, offset=offset, diff --git a/services/storage/tests/unit/test_handlers_files.py b/services/storage/tests/unit/test_handlers_files.py index 79893690e0d..40d4b72f15e 100644 --- a/services/storage/tests/unit/test_handlers_files.py +++ b/services/storage/tests/unit/test_handlers_files.py @@ -1387,14 +1387,18 @@ async def test_upload_file_is_directory_and_remove_content( ) assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - # DELETE ONE FILE FROM THE DIRECTORY + # DELETE NOT EXISTING assert client.app + delete_url = ( client.app.router["delete_file"] .url_for( location_id=f"{location_id}", - file_id=urllib.parse.quote(list_of_files[0].file_id, safe=""), + file_id=urllib.parse.quote( + "/".join(list_of_files[0].file_id.split("/")[:2]) + "/does_not_exist", + safe="", + ), ) .with_query(user_id=user_id) ) @@ -1406,17 +1410,16 @@ async def test_upload_file_is_directory_and_remove_content( client, user_id, location_id, directory_file_upload ) - assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - 1 + assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - # DELETE NOT EXISTING + # DELETE ONE FILE FROM THE DIRECTORY + + assert client.app delete_url = ( client.app.router["delete_file"] .url_for( location_id=f"{location_id}", - file_id=urllib.parse.quote( - "/".join(list_of_files[0].file_id.split("/")[:2]) + "/does_not_exist", - safe="", - ), + file_id=urllib.parse.quote(list_of_files[0].file_id, safe=""), ) .with_query(user_id=user_id) ) @@ -1428,6 +1431,8 @@ async def test_upload_file_is_directory_and_remove_content( client, user_id, location_id, directory_file_upload ) + assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - 1 + # DIRECTORY REMOVAL await delete_directory(directory_file_upload=directory_file_upload) From a0901094154ea9919773c59be4db07649c42af78 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 15:39:21 +0100 Subject: [PATCH 15/22] add exception handling --- .../src/simcore_service_storage/simcore_s3_dsm.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 8470940d051..51a32895ec2 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -528,10 +528,13 @@ def _get_subpath(path: str, levels: int): if not can.delete: raise FileAccessRightError(access_right="delete", file_id=file_id) - await get_s3_client(self.app).delete_objects_recursively( - bucket=self.simcore_bucket_name, - prefix=file_id, - ) + try: + await get_s3_client(self.app).delete_objects_recursively( + bucket=self.simcore_bucket_name, + prefix=file_id, + ) + except S3KeyNotFoundError: + _logger.warning("File %s not found in S3", file_id) async with self.engine.acquire() as conn: try: From 6f0c61687840e8664d8081130d573d460266456f Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Thu, 12 Dec 2024 15:47:18 +0100 Subject: [PATCH 16/22] get best match --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 51a32895ec2..aab8c90949b 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -553,7 +553,7 @@ def _get_subpath(path: str, levels: int): is_directory=True, sha256_checksum=None, ): - parent_dir_fmd = parent_dir_fmds[0] + parent_dir_fmd = max(parent_dir_fmds, key=lambda fmd: len(fmd.file_id)) parent_dir_fmd.file_size = UNDEFINED_SIZE await db_file_meta_data.upsert(conn, parent_dir_fmd) From 4e07227f23d8d89101097f3f6e458b247a109e01 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 09:20:21 +0100 Subject: [PATCH 17/22] direct delete --- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index aab8c90949b..ce045fb0151 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -537,11 +537,7 @@ def _get_subpath(path: str, levels: int): _logger.warning("File %s not found in S3", file_id) async with self.engine.acquire() as conn: - try: - if await db_file_meta_data.get(conn, file_id): - await db_file_meta_data.delete(conn, [file_id]) - except FileMetaDataNotFoundError: - _logger.warning("File %s not found in database", file_id) + await db_file_meta_data.delete(conn, [file_id]) if parent_dir_fmds := await db_file_meta_data.list_filter_with_partial_file_id( conn, From 15581b78123f212ea762942122c498426b41c1fc Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 09:40:02 +0100 Subject: [PATCH 18/22] extract util --- .../simcore_service_storage/simcore_s3_dsm.py | 13 ++++++------ .../simcore_s3_dsm_utils.py | 6 ++++++ .../tests/unit/test_simcore_s3_dsm_utils.py | 21 +++++++++++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 services/storage/tests/unit/test_simcore_s3_dsm_utils.py diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index ce045fb0151..5ebc31120e7 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -80,7 +80,11 @@ from .s3 import get_s3_client from .s3_utils import S3TransferDataCB, update_task_progress from .settings import Settings -from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id +from .simcore_s3_dsm_utils import ( + compute_file_id_prefix, + expand_directory, + get_directory_file_id, +) from .utils import ( convert_db_to_model, download_to_file_or_raise, @@ -517,11 +521,6 @@ async def delete_file( # Only use this in those circumstances where a collaborator requires to delete a file (the current # permissions model will not allow him to do so, even though this is a legitimate action) # SEE https://github.com/ITISFoundation/osparc-simcore/issues/5159 - def _get_subpath(path: str, levels: int): - components = path.strip("/").split("/") - subpath = "/".join(components[:levels]) - return "/" + subpath if subpath else "/" - async with self.engine.acquire() as conn: if enforce_access_rights: can: AccessRights = await get_file_access_rights(conn, user_id, file_id) @@ -544,7 +543,7 @@ def _get_subpath(path: str, levels: int): user_or_project_filter=UserOrProjectFilter( user_id=user_id, project_ids=[] ), - file_id_prefix=_get_subpath(file_id, 2), + file_id_prefix=compute_file_id_prefix(file_id, 2), partial_file_id=None, is_directory=True, sha256_checksum=None, diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index e4a58549e31..dae3e2b48dd 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -119,3 +119,9 @@ async def _get_fmd( directory_file_id_fmd = await _get_fmd(conn, directory_file_id) return directory_file_id if directory_file_id_fmd else None + + +def compute_file_id_prefix(file_id: str, levels: int): + components = file_id.strip("/").split("/") + subpath = "/".join(components[:levels]) + return "/" + subpath if subpath else "/" diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py new file mode 100644 index 00000000000..1db057ba6c9 --- /dev/null +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -0,0 +1,21 @@ +import pytest +from simcore_service_storage.simcore_s3_dsm_utils import compute_file_id_prefix + + +@pytest.mark.parametrize( + "file_id, levels, expected", + [ + ( + "b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin", + 2, + "b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my", + ), + ( + "api/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin", + 2, + "api/42b9cc07-60f5-4d29-a063-176d1467901c/my", + ), + ], +) +def test_compute_file_id_prefix(file_id, levels, expected): + assert compute_file_id_prefix(file_id, levels) == expected From a0d4011e7c44cb66951759f0c3fe6be0f5c28ed5 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 09:47:08 +0100 Subject: [PATCH 19/22] fix test --- services/storage/tests/unit/test_simcore_s3_dsm_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py index 1db057ba6c9..01869537c08 100644 --- a/services/storage/tests/unit/test_simcore_s3_dsm_utils.py +++ b/services/storage/tests/unit/test_simcore_s3_dsm_utils.py @@ -7,12 +7,12 @@ [ ( "b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin", - 2, + 3, "b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my", ), ( "api/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin", - 2, + 3, "api/42b9cc07-60f5-4d29-a063-176d1467901c/my", ), ], From 0c664fad84f54685fe400fb9a06b83aff6467f50 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 09:48:01 +0100 Subject: [PATCH 20/22] is_directory is required --- .../storage/src/simcore_service_storage/db_file_meta_data.py | 4 ++-- .../storage/src/simcore_service_storage/simcore_s3_dsm.py | 1 + .../src/simcore_service_storage/simcore_s3_dsm_utils.py | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/services/storage/src/simcore_service_storage/db_file_meta_data.py b/services/storage/src/simcore_service_storage/db_file_meta_data.py index 8c2938d85bd..593a48f72b2 100644 --- a/services/storage/src/simcore_service_storage/db_file_meta_data.py +++ b/services/storage/src/simcore_service_storage/db_file_meta_data.py @@ -72,7 +72,7 @@ def _list_filter_with_partial_file_id_stmt( file_id_prefix: str | None, partial_file_id: str | None, sha256_checksum: SHA256Str | None, - is_directory: bool | None = None, + is_directory: bool | None, limit: int | None = None, offset: int | None = None, ): @@ -119,7 +119,7 @@ async def list_filter_with_partial_file_id( file_id_prefix: str | None, partial_file_id: str | None, sha256_checksum: SHA256Str | None, - is_directory: bool | None = None, + is_directory: bool | None, limit: int | None = None, offset: int | None = None, ) -> list[FileMetaDataAtDB]: diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 5ebc31120e7..89b229d424e 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -185,6 +185,7 @@ async def list_files( # noqa C901 user_id=uid, project_ids=accessible_projects_ids ), file_id_prefix=None, + is_directory=None, partial_file_id=uuid_filter, sha256_checksum=None, ) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py index dae3e2b48dd..eb5f2f1240c 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm_utils.py @@ -123,5 +123,4 @@ async def _get_fmd( def compute_file_id_prefix(file_id: str, levels: int): components = file_id.strip("/").split("/") - subpath = "/".join(components[:levels]) - return "/" + subpath if subpath else "/" + return "/".join(components[:levels]) From 12a936dc45cd56af668407b35dce6e2d50dfd6ab Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 09:52:58 +0100 Subject: [PATCH 21/22] fix missing --- services/storage/tests/unit/test_db_file_meta_data.py | 1 + 1 file changed, 1 insertion(+) diff --git a/services/storage/tests/unit/test_db_file_meta_data.py b/services/storage/tests/unit/test_db_file_meta_data.py index d6c2684328f..c362fabe82f 100644 --- a/services/storage/tests/unit/test_db_file_meta_data.py +++ b/services/storage/tests/unit/test_db_file_meta_data.py @@ -76,6 +76,7 @@ def _check(func_smt, **kwargs): _list_filter_with_partial_file_id_stmt, user_or_project_filter=UserOrProjectFilter(user_id=42, project_ids=[]), file_id_prefix=None, + is_directory=None, partial_file_id="{project_id}/", sha256_checksum=None, ) From a71c718769218c29048eee7330c83e2d41963436 Mon Sep 17 00:00:00 2001 From: Giancarlo Romeo Date: Fri, 13 Dec 2024 11:24:37 +0100 Subject: [PATCH 22/22] add comment --- services/storage/src/simcore_service_storage/simcore_s3_dsm.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py index 89b229d424e..d41630b5230 100644 --- a/services/storage/src/simcore_service_storage/simcore_s3_dsm.py +++ b/services/storage/src/simcore_service_storage/simcore_s3_dsm.py @@ -535,6 +535,8 @@ async def delete_file( ) except S3KeyNotFoundError: _logger.warning("File %s not found in S3", file_id) + # we still need to clean up the database entry (it exists) + # and to invalidate the size of the parent directory async with self.engine.acquire() as conn: await db_file_meta_data.delete(conn, [file_id])