Skip to content

Commit

Permalink
Add preliminary tests (#814)
Browse files Browse the repository at this point in the history
Signed-off-by: Marcel Coetzee <[email protected]>
  • Loading branch information
Pipboyguy committed Dec 19, 2023
1 parent c360820 commit 03e8de5
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 10 deletions.
8 changes: 3 additions & 5 deletions dlt/common/storages/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ class FilesystemConfiguration(BaseConfiguration):
}

bucket_url: str = None
# should be an union of all possible credentials as found in PROTOCOL_CREDENTIALS
# should be a union of all possible credentials as found in PROTOCOL_CREDENTIALS
credentials: FileSystemCredentials

@property
Expand All @@ -112,7 +112,7 @@ def on_resolved(self) -> None:
"File path or netloc missing. Field bucket_url of FilesystemClientConfiguration"
" must contain valid url with a path or host:password component."
)
# this is just a path in local file system
# this is just a path in a local file system
if url.path == self.bucket_url:
url = url._replace(scheme="file")
self.bucket_url = url.geturl()
Expand All @@ -124,9 +124,7 @@ def resolve_credentials_type(self) -> Type[CredentialsConfiguration]:

def fingerprint(self) -> str:
"""Returns a fingerprint of bucket_url"""
if self.bucket_url:
return digest128(self.bucket_url)
return ""
return digest128(self.bucket_url) if self.bucket_url else ""

def __str__(self) -> str:
"""Return displayable destination location"""
Expand Down
12 changes: 7 additions & 5 deletions dlt/common/storages/fsspec_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def fsspec_filesystem(
) -> Tuple[AbstractFileSystem, str]:
"""Instantiates an authenticated fsspec `FileSystem` for a given `protocol` and credentials.
Please supply credentials instance corresponding to the protocol. The `protocol` is just the code name of the filesystem ie:
Please supply credentials instance corresponding to the protocol.
The `protocol` is just the code name of the filesystem i.e.:
* s3
* az, abfs
* gcs, gs
Expand All @@ -69,7 +70,7 @@ def fsspec_filesystem(
def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSystem, str]:
"""Instantiates an authenticated fsspec `FileSystem` from `config` argument.
Authenticates following filesystems:
Authenticates the following filesystems:
* s3
* az, abfs
* gcs, gs
Expand Down Expand Up @@ -97,7 +98,8 @@ def fsspec_from_config(config: FilesystemConfiguration) -> Tuple[AbstractFileSys
fs_kwargs["token"] = dict(config.credentials)
fs_kwargs["project"] = config.credentials.project_id
try:
return url_to_fs(config.bucket_url, use_listings_cache=False, **fs_kwargs) # type: ignore[no-any-return]
fs_kwargs["use_listings_cache"] = False
return url_to_fs(config.bucket_url, **fs_kwargs)
except ModuleNotFoundError as e:
raise MissingDependencyException("filesystem", [f"{version.DLT_PKG_NAME}[{proto}]"]) from e

Expand All @@ -122,7 +124,7 @@ def __init__(

@property
def fsspec(self) -> AbstractFileSystem:
"""The filesystem client based on the given credentials.
"""The filesystem client is based on the given credentials.
Returns:
AbstractFileSystem: The fsspec client.
Expand Down Expand Up @@ -202,7 +204,7 @@ def glob_files(
import os

bucket_url_parsed = urlparse(bucket_url)
# if this is file path without scheme
# if this is a file path without a scheme
if not bucket_url_parsed.scheme or (os.path.isabs(bucket_url) and "\\" in bucket_url):
# this is a file so create a proper file url
bucket_url = pathlib.Path(bucket_url).absolute().as_uri()
Expand Down
29 changes: 29 additions & 0 deletions tests/load/filesystem/test_filesystem_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,32 @@ def test_filesystem_instance_from_s3_endpoint(environment: Dict[str, str]) -> No
assert bucket_name == "dummy-bucket"
assert filesystem.key == "fake-access-key"
assert filesystem.secret == "fake-secret-key"


def test_filesystem_configuration_with_additional_arguments() -> None:
config = FilesystemConfiguration(bucket_url="az://root", additional_args={'use_ssl': True},
client_kwargs={'verify': 'public.crt'})
assert dict(config) == {"bucket_url": "az://root", "credentials": None,
"additional_args": {'use_ssl': True}, "client_kwargs": {'verify': 'public.crt'}}


def test_filesystem_instance_from_s3_endpoint_with_additional_arguments(environment: Dict[str, str]) -> None:
"""Test that fsspec instance is correctly configured when using endpoint URL, along with additional arguments."""
from s3fs import S3FileSystem

config = FilesystemConfiguration(bucket_url="s3://dummy-bucket", additional_args={'use_ssl': True},
client_kwargs={'verify': 'public.crt'})
filesystem, bucket_name = fsspec_from_config(config)

assert isinstance(filesystem, S3FileSystem)

assert hasattr(filesystem, 'use_ssl'), "use_ssl additional property does not exist in filesystem instance"
assert filesystem.use_ssl, "use_ssl property does not match expected value"

assert hasattr(filesystem, 'client_kwargs'), "client_kwargs property does not exist in filesystem instance"
assert filesystem.client_kwargs == {'verify': 'public.crt'}, "client_kwargs property does not match expected value"


def test_s3_wrong_certificate(environment: Dict[str, str]) -> None:
"""Test that an exception is raised when the wrong certificate is provided."""
pytest.skip("Not implemented yet")

0 comments on commit 03e8de5

Please sign in to comment.