Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🐛 Fix deletion of files in folders #6935

Merged
merged 39 commits into from
Dec 13, 2024
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
51bc021
fix file deletion
giancarloromeo Dec 10, 2024
27c9462
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
d612a58
update implementation
giancarloromeo Dec 11, 2024
c6a0f82
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
325ae15
fix pylint
giancarloromeo Dec 11, 2024
3d1699f
Merge branch 'fix-file-deletion' of github.com:giancarloromeo/osparc-…
giancarloromeo Dec 11, 2024
85d1577
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
a361b74
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
e83f183
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
e6852f2
update enclosing
giancarloromeo Dec 11, 2024
f12a49d
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
cac2c8e
fix await
giancarloromeo Dec 11, 2024
11c45fb
Merge branch 'fix-file-deletion' of github.com:giancarloromeo/osparc-…
giancarloromeo Dec 11, 2024
21cf5a5
fix typo
giancarloromeo Dec 11, 2024
551c68d
set undefined file_size
giancarloromeo Dec 11, 2024
77f548a
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
8f9231f
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 11, 2024
f5de42b
enhance implementation
giancarloromeo Dec 12, 2024
2b31008
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 12, 2024
1e0c7d9
fix
giancarloromeo Dec 12, 2024
c7d0946
fix delete dirs
giancarloromeo Dec 12, 2024
5c710dc
add constant
giancarloromeo Dec 12, 2024
4190053
refine
giancarloromeo Dec 12, 2024
37c18d4
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 12, 2024
0795cc9
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 12, 2024
755a1a1
enhance impl
giancarloromeo Dec 12, 2024
1926198
Merge branch 'fix-file-deletion' of github.com:giancarloromeo/osparc-…
giancarloromeo Dec 12, 2024
cab5b55
fix test
giancarloromeo Dec 12, 2024
a090109
add exception handling
giancarloromeo Dec 12, 2024
6f0c616
get best match
giancarloromeo Dec 12, 2024
25ac01c
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 12, 2024
4e07227
direct delete
giancarloromeo Dec 13, 2024
35ec289
Merge branch 'fix-file-deletion' of github.com:giancarloromeo/osparc-…
giancarloromeo Dec 13, 2024
15581b7
extract util
giancarloromeo Dec 13, 2024
a0d4011
fix test
giancarloromeo Dec 13, 2024
0c664fa
is_directory is required
giancarloromeo Dec 13, 2024
12a936d
fix missing
giancarloromeo Dec 13, 2024
b282d0e
Merge branch 'master' into fix-file-deletion
giancarloromeo Dec 13, 2024
a71c718
add comment
giancarloromeo Dec 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def _list_filter_with_partial_file_id_stmt(
file_id_prefix: str | None,
partial_file_id: str | None,
sha256_checksum: SHA256Str | None,
only_files: bool,
is_directory: bool | None,
limit: int | None = None,
offset: int | None = None,
):
Expand All @@ -98,8 +98,8 @@ def _list_filter_with_partial_file_id_stmt(
conditions.append(file_meta_data.c.file_id.startswith(file_id_prefix))
if partial_file_id:
conditions.append(file_meta_data.c.file_id.ilike(f"%{partial_file_id}%"))
if only_files:
conditions.append(file_meta_data.c.is_directory.is_(False))
if is_directory is not None:
conditions.append(file_meta_data.c.is_directory.is_(is_directory))
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
if sha256_checksum:
conditions.append(file_meta_data.c.sha256_checksum == sha256_checksum)

Expand All @@ -119,7 +119,7 @@ async def list_filter_with_partial_file_id(
file_id_prefix: str | None,
partial_file_id: str | None,
sha256_checksum: SHA256Str | None,
only_files: bool,
is_directory: bool | None,
limit: int | None = None,
offset: int | None = None,
) -> list[FileMetaDataAtDB]:
Expand All @@ -129,7 +129,7 @@ async def list_filter_with_partial_file_id(
file_id_prefix=file_id_prefix,
partial_file_id=partial_file_id,
sha256_checksum=sha256_checksum,
only_files=only_files,
is_directory=is_directory,
limit=limit,
offset=offset,
)
Expand Down
47 changes: 30 additions & 17 deletions services/storage/src/simcore_service_storage/simcore_s3_dsm.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
UploadedBytesTransferredCallback,
)
from models_library.api_schemas_storage import (
UNDEFINED_SIZE,
UNDEFINED_SIZE_TYPE,
LinkType,
S3BucketName,
Expand Down Expand Up @@ -79,7 +80,11 @@
from .s3 import get_s3_client
from .s3_utils import S3TransferDataCB, update_task_progress
from .settings import Settings
from .simcore_s3_dsm_utils import expand_directory, get_directory_file_id
from .simcore_s3_dsm_utils import (
compute_file_id_prefix,
expand_directory,
get_directory_file_id,
)
from .utils import (
convert_db_to_model,
download_to_file_or_raise,
Expand Down Expand Up @@ -180,8 +185,8 @@ async def list_files( # noqa C901
user_id=uid, project_ids=accessible_projects_ids
),
file_id_prefix=None,
is_directory=None,
partial_file_id=uuid_filter,
only_files=False,
sha256_checksum=None,
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
)

Expand Down Expand Up @@ -523,22 +528,30 @@ async def delete_file(
if not can.delete:
raise FileAccessRightError(access_right="delete", file_id=file_id)

with suppress(FileMetaDataNotFoundError):
# NOTE: deleting might be slow, so better ensure we release the connection
async with self.engine.acquire() as conn:
file: FileMetaDataAtDB = await db_file_meta_data.get(
conn, TypeAdapter(SimcoreS3FileID).validate_python(file_id)
)
try:
await get_s3_client(self.app).delete_objects_recursively(
bucket=file.bucket_name,
prefix=(
ensure_ends_with(file.file_id, "/")
if file.is_directory
else file.file_id
),
bucket=self.simcore_bucket_name,
prefix=file_id,
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
)
async with self.engine.acquire() as conn:
await db_file_meta_data.delete(conn, [file.file_id])
except S3KeyNotFoundError:
_logger.warning("File %s not found in S3", file_id)
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved

async with self.engine.acquire() as conn:
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
await db_file_meta_data.delete(conn, [file_id])

if parent_dir_fmds := await db_file_meta_data.list_filter_with_partial_file_id(
conn,
user_or_project_filter=UserOrProjectFilter(
user_id=user_id, project_ids=[]
),
file_id_prefix=compute_file_id_prefix(file_id, 2),
partial_file_id=None,
is_directory=True,
sha256_checksum=None,
):
parent_dir_fmd = max(parent_dir_fmds, key=lambda fmd: len(fmd.file_id))
parent_dir_fmd.file_size = UNDEFINED_SIZE
await db_file_meta_data.upsert(conn, parent_dir_fmd)

async def delete_project_simcore_s3(
self, user_id: UserID, project_id: ProjectID, node_id: NodeID | None = None
Expand Down Expand Up @@ -738,7 +751,7 @@ async def search_owned_files(
),
file_id_prefix=file_id_prefix,
partial_file_id=None,
only_files=True,
is_directory=False,
sha256_checksum=sha256_checksum,
limit=limit,
offset=offset,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,3 +119,8 @@ async def _get_fmd(
directory_file_id_fmd = await _get_fmd(conn, directory_file_id)

return directory_file_id if directory_file_id_fmd else None


def compute_file_id_prefix(file_id: str, levels: int):
components = file_id.strip("/").split("/")
return "/".join(components[:levels])
12 changes: 6 additions & 6 deletions services/storage/tests/unit/test_db_file_meta_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _check(func_smt, **kwargs):
file_id_prefix=None,
partial_file_id=None,
sha256_checksum=None,
only_files=True,
is_directory=False,
)
# WHERE file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC

Expand All @@ -41,7 +41,7 @@ def _check(func_smt, **kwargs):
file_id_prefix=None,
partial_file_id=None,
sha256_checksum=None,
only_files=True,
is_directory=False,
)
# WHERE file_meta_data.user_id = '42' AND file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC

Expand All @@ -53,7 +53,7 @@ def _check(func_smt, **kwargs):
file_id_prefix=None,
partial_file_id=None,
sha256_checksum=None,
only_files=True,
is_directory=False,
)
# WHERE (file_meta_data.user_id = '42' OR file_meta_data.project_id IN ('18d5'..., )) AND file_meta_data.is_directory IS false ORDER BY file_meta_data.created_at ASC

Expand All @@ -65,7 +65,7 @@ def _check(func_smt, **kwargs):
file_id_prefix=None,
partial_file_id=None,
sha256_checksum=None,
only_files=True,
is_directory=False,
limit=10,
offset=1,
)
Expand All @@ -76,9 +76,9 @@ def _check(func_smt, **kwargs):
_list_filter_with_partial_file_id_stmt,
user_or_project_filter=UserOrProjectFilter(user_id=42, project_ids=[]),
file_id_prefix=None,
is_directory=None,
partial_file_id="{project_id}/",
sha256_checksum=None,
only_files=False,
)

# As used in SimcoreS3DataManager.search_owned_files
Expand All @@ -88,7 +88,7 @@ def _check(func_smt, **kwargs):
file_id_prefix="api/",
partial_file_id=None,
sha256_checksum=faker.sha256(),
only_files=True,
is_directory=False,
limit=10,
offset=0,
)
47 changes: 47 additions & 0 deletions services/storage/tests/unit/test_handlers_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,6 +1345,7 @@ async def test_upload_file_is_directory_and_remove_content(
client: TestClient,
location_id: LocationID,
user_id: UserID,
faker: Faker,
):
FILE_SIZE_IN_DIR = TypeAdapter(ByteSize).validate_python("1Mib")
DIR_NAME = "some-dir"
Expand Down Expand Up @@ -1386,6 +1387,52 @@ async def test_upload_file_is_directory_and_remove_content(
)
assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT

# DELETE NOT EXISTING

assert client.app

delete_url = (
client.app.router["delete_file"]
.url_for(
location_id=f"{location_id}",
file_id=urllib.parse.quote(
"/".join(list_of_files[0].file_id.split("/")[:2]) + "/does_not_exist",
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
safe="",
),
)
.with_query(user_id=user_id)
)
response = await client.delete(f"{delete_url}")
_, error = await assert_status(response, status.HTTP_204_NO_CONTENT)
assert error is None

list_of_files: list[FileMetaDataGet] = await _list_files_legacy(
client, user_id, location_id, directory_file_upload
)

assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT

# DELETE ONE FILE FROM THE DIRECTORY

assert client.app
delete_url = (
client.app.router["delete_file"]
.url_for(
location_id=f"{location_id}",
file_id=urllib.parse.quote(list_of_files[0].file_id, safe=""),
)
.with_query(user_id=user_id)
)
response = await client.delete(f"{delete_url}")
_, error = await assert_status(response, status.HTTP_204_NO_CONTENT)
assert error is None

list_of_files: list[FileMetaDataGet] = await _list_files_legacy(
giancarloromeo marked this conversation as resolved.
Show resolved Hide resolved
client, user_id, location_id, directory_file_upload
)

assert len(list_of_files) == SUBDIR_COUNT * FILE_COUNT - 1

# DIRECTORY REMOVAL

await delete_directory(directory_file_upload=directory_file_upload)
Expand Down
21 changes: 21 additions & 0 deletions services/storage/tests/unit/test_simcore_s3_dsm_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import pytest
from simcore_service_storage.simcore_s3_dsm_utils import compute_file_id_prefix


@pytest.mark.parametrize(
"file_id, levels, expected",
[
(
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
3,
"b21a3b80-d578-4b33-a224-e24ee2e4966a/42b9cc07-60f5-4d29-a063-176d1467901c/my",
),
(
"api/42b9cc07-60f5-4d29-a063-176d1467901c/my/amazing/sub/folder/with/a/file.bin",
3,
"api/42b9cc07-60f5-4d29-a063-176d1467901c/my",
),
],
)
def test_compute_file_id_prefix(file_id, levels, expected):
assert compute_file_id_prefix(file_id, levels) == expected
Loading