Skip to content

Commit

Permalink
fixes globbing on windows
Browse files Browse the repository at this point in the history
  • Loading branch information
rudolfix committed Oct 9, 2023
1 parent b9cb2f2 commit 321be27
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 8 deletions.
7 changes: 6 additions & 1 deletion dlt/common/storages/configuration.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from urllib.parse import urlparse
from typing import TYPE_CHECKING, Any, Literal, Optional, Type, get_args, ClassVar, Dict, Union

Expand Down Expand Up @@ -74,7 +75,11 @@ class FilesystemConfiguration(BaseConfiguration):
def protocol(self) -> str:
"""`bucket_url` protocol"""
url = urlparse(self.bucket_url)
return url.scheme or "file"
# this prevents windows absolute paths to be recognized as schemas
if not url.scheme or (os.path.isabs(self.bucket_url) and "\\" in self.bucket_url):
return "file"
else:
return url.scheme

def on_resolved(self) -> None:
url = urlparse(self.bucket_url)
Expand Down
7 changes: 6 additions & 1 deletion dlt/common/storages/fsspec_filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,10 @@ def glob_files(
Returns:
Iterable[FileItem]: The list of files.
"""
import os
bucket_url_parsed = urlparse(bucket_url)
if not bucket_url_parsed.scheme:
# if this is file path without scheme
if not bucket_url_parsed.scheme or (os.path.isabs(bucket_url) and "\\" in bucket_url):
# this is a file so create a proper file url
bucket_url = pathlib.Path(bucket_url).absolute().as_uri()
bucket_url_parsed = urlparse(bucket_url)
Expand All @@ -207,6 +209,9 @@ def glob_files(
for file, md in glob_result.items():
if md["type"] != "file":
continue
# make that absolute path on a file://
if bucket_url_parsed.scheme == "file" and not file.startswith("/"):
file = "/" + file
file_name = posixpath.relpath(file, bucket_path)
file_url = bucket_url_parsed.scheme + "://" + file
yield FileItem(
Expand Down
13 changes: 7 additions & 6 deletions tests/common/storages/test_local_filesystem.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import os
import itertools
import pytest


import pathlib

from dlt.common.storages import fsspec_from_config, FilesystemConfiguration
from dlt.common.storages.fsspec_filesystem import glob_files
Expand All @@ -14,13 +13,15 @@

@pytest.mark.parametrize("bucket_url,load_content", itertools.product(["file:///", "/", ""], [True, False]))
def test_filesystem_dict_local(bucket_url: str, load_content: bool) -> None:
if bucket_url in ["file://", ""]:
if bucket_url in [""]:
# relative paths
bucket_url += TEST_SAMPLE_FILES
bucket_url = TEST_SAMPLE_FILES
else:
bucket_url += os.path.abspath(TEST_SAMPLE_FILES)[1:]
if bucket_url == "/":
bucket_url = os.path.abspath(TEST_SAMPLE_FILES)
else:
bucket_url = pathlib.Path(TEST_SAMPLE_FILES).absolute().as_uri()

print(bucket_url)
config = FilesystemConfiguration(bucket_url=bucket_url)
filesystem, _ = fsspec_from_config(config)
# use glob to get data
Expand Down

0 comments on commit 321be27

Please sign in to comment.