From 03e8de56396506628bf684286c10f0f134cf8377 Mon Sep 17 00:00:00 2001 From: Marcel Coetzee Date: Tue, 19 Dec 2023 16:58:29 +0200 Subject: [PATCH] Add preliminary tests (#814) Signed-off-by: Marcel Coetzee --- dlt/common/storages/configuration.py | 8 ++--- dlt/common/storages/fsspec_filesystem.py | 12 ++++---- .../load/filesystem/test_filesystem_common.py | 29 +++++++++++++++++++ 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/dlt/common/storages/configuration.py b/dlt/common/storages/configuration.py index 83e7e88189..3e44948995 100644 --- a/dlt/common/storages/configuration.py +++ b/dlt/common/storages/configuration.py @@ -92,7 +92,7 @@ class FilesystemConfiguration(BaseConfiguration): } bucket_url: str = None - # should be an union of all possible credentials as found in PROTOCOL_CREDENTIALS + # should be a union of all possible credentials as found in PROTOCOL_CREDENTIALS credentials: FileSystemCredentials @property @@ -112,7 +112,7 @@ def on_resolved(self) -> None: "File path or netloc missing. Field bucket_url of FilesystemClientConfiguration" " must contain valid url with a path or host:password component." ) - # this is just a path in local file system + # this is just a path in a local file system if url.path == self.bucket_url: url = url._replace(scheme="file") self.bucket_url = url.geturl() @@ -124,9 +124,7 @@ def resolve_credentials_type(self) -> Type[CredentialsConfiguration]: def fingerprint(self) -> str: """Returns a fingerprint of bucket_url""" - if self.bucket_url: - return digest128(self.bucket_url) - return "" + return digest128(self.bucket_url) if self.bucket_url else "" def __str__(self) -> str: """Return displayable destination location""" diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index 18c1837e00..0ae7d221f5 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -56,7 +56,8 @@ def fsspec_filesystem( ) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` for a given `protocol` and credentials. - Please supply credentials instance corresponding to the protocol. The `protocol` is just the code name of the filesystem ie: + Please supply credentials instance corresponding to the protocol. + The `protocol` is just the code name of the filesystem i.e.: * s3 * az, abfs * gcs, gs @@ -69,7 +70,7 @@ def fsspec_filesystem( def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSystem, str]: """Instantiates an authenticated fsspec `FileSystem` from `config` argument. - Authenticates following filesystems: + Authenticates the following filesystems: * s3 * az, abfs * gcs, gs @@ -97,7 +98,8 @@ def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSys fs_kwargs["token"] = dict(config.credentials) fs_kwargs["project"] = config.credentials.project_id try: - return url_to_fs(config.bucket_url, use_listings_cache=False, **fs_kwargs) # type: ignore[no-any-return] + fs_kwargs["use_listings_cache"] = False + return url_to_fs(config.bucket_url, **fs_kwargs) except ModuleNotFoundError as e: raise MissingDependencyException("filesystem", [f"{version.DLT_PKG_NAME}[{proto}]"]) from e @@ -122,7 +124,7 @@ def __init__( @property def fsspec(self) -> AbstractFileSystem: - """The filesystem client based on the given credentials. + """The filesystem client is based on the given credentials. Returns: AbstractFileSystem: The fsspec client. @@ -202,7 +204,7 @@ def glob_files( import os bucket_url_parsed = urlparse(bucket_url) - # if this is file path without scheme + # if this is a file path without a scheme if not bucket_url_parsed.scheme or (os.path.isabs(bucket_url) and "\\" in bucket_url): # this is a file so create a proper file url bucket_url = pathlib.Path(bucket_url).absolute().as_uri() diff --git a/tests/load/filesystem/test_filesystem_common.py b/tests/load/filesystem/test_filesystem_common.py index 92cce62160..560a6fb278 100644 --- a/tests/load/filesystem/test_filesystem_common.py +++ b/tests/load/filesystem/test_filesystem_common.py @@ -94,3 +94,32 @@ def test_filesystem_instance_from_s3_endpoint(environment: Dict[str, str]) -> No assert bucket_name == "dummy-bucket" assert filesystem.key == "fake-access-key" assert filesystem.secret == "fake-secret-key" + + +def test_filesystem_configuration_with_additional_arguments() -> None: + config = FilesystemConfiguration(bucket_url="az://root", additional_args={'use_ssl': True}, + client_kwargs={'verify': 'public.crt'}) + assert dict(config) == {"bucket_url": "az://root", "credentials": None, + "additional_args": {'use_ssl': True}, "client_kwargs": {'verify': 'public.crt'}} + + +def test_filesystem_instance_from_s3_endpoint_with_additional_arguments(environment: Dict[str, str]) -> None: + """Test that fsspec instance is correctly configured when using endpoint URL, along with additional arguments.""" + from s3fs import S3FileSystem + + config = FilesystemConfiguration(bucket_url="s3://dummy-bucket", additional_args={'use_ssl': True}, + client_kwargs={'verify': 'public.crt'}) + filesystem, bucket_name = fsspec_from_config(config) + + assert isinstance(filesystem, S3FileSystem) + + assert hasattr(filesystem, 'use_ssl'), "use_ssl additional property does not exist in filesystem instance" + assert filesystem.use_ssl, "use_ssl property does not match expected value" + + assert hasattr(filesystem, 'client_kwargs'), "client_kwargs property does not exist in filesystem instance" + assert filesystem.client_kwargs == {'verify': 'public.crt'}, "client_kwargs property does not match expected value" + + +def test_s3_wrong_certificate(environment: Dict[str, str]) -> None: + """Test that an exception is raised when the wrong certificate is provided.""" + pytest.skip("Not implemented yet")