From 2feb807e1eee2271141c96ed37f184296dead659 Mon Sep 17 00:00:00 2001 From: IlyaFaer Date: Wed, 26 Jun 2024 11:24:45 +0400 Subject: [PATCH] feat(filesystem): use only netloc and scheme for fingerprint --- dlt/common/storages/configuration.py | 15 +++++++++++++-- tests/load/filesystem/test_filesystem_client.py | 16 +++++++++++----- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 09beb0015e..b2bdb3a7b6 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -111,8 +111,19 @@ def resolve_credentials_type(self) -> Type[CredentialsConfiguration]: return self.PROTOCOL_CREDENTIALS.get(self.protocol) or Optional[CredentialsConfiguration] # type: ignore[return-value] def fingerprint(self) -> str: - """Returns a fingerprint of bucket_url""" - return digest128(self.bucket_url) if self.bucket_url else "" + """Returns a fingerprint of bucket schema and netloc. + + Returns: + str: Fingerprint. + """ + if not self.bucket_url: + return "" + + if self.is_local_path(self.bucket_url): + return digest128("") + + uri = urlparse(self.bucket_url) + return digest128(self.bucket_url.replace(uri.path, "")) def __str__(self) -> str: """Return displayable destination location""" diff --git a/tests/load/filesystem/test_filesystem_client.py b/tests/load/filesystem/test_filesystem_client.py index fbfd08271b..53e54c2f06 100644 --- a/tests/load/filesystem/test_filesystem_client.py +++ b/tests/load/filesystem/test_filesystem_client.py @@ -39,11 +39,17 @@ def logger_autouse() -> None: ] -def test_filesystem_destination_configuration() -> None: - assert FilesystemDestinationClientConfiguration().fingerprint() == "" - assert FilesystemDestinationClientConfiguration( - bucket_url="s3://cool" - ).fingerprint() == digest128("s3://cool") +@pytest.mark.parametrize( + "url, exp", + ( + (None, ""), + ("/path/path2", digest128("")), + ("s3://cool", digest128("s3://cool")), + ("s3://cool.domain/path/path2", digest128("s3://cool.domain")), + ), +) +def test_filesystem_destination_configuration(url, exp) -> None: + assert FilesystemDestinationClientConfiguration(bucket_url=url).fingerprint() == exp @pytest.mark.parametrize("write_disposition", ("replace", "append", "merge"))