From 9c10c40d5c2c4cb3a53f0a9d5e20ea8428eb82b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Josu=C3=A9=20Sehnem?= Date: Sat, 11 Nov 2023 18:23:48 -0300 Subject: [PATCH] add support for url query --- dlt/common/storages/fsspec_filesystem.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/dlt/common/storages/fsspec_filesystem.py b/dlt/common/storages/fsspec_filesystem.py index a1babca559..dba4f86d3b 100644 --- a/dlt/common/storages/fsspec_filesystem.py +++ b/dlt/common/storages/fsspec_filesystem.py @@ -199,12 +199,10 @@ def glob_files( bucket_url = pathlib.Path(bucket_url).absolute().as_uri() bucket_url_parsed = urlparse(bucket_url) - # not all filesystems use the host in list functions - if bucket_url_parsed.scheme in ["gdrive"]: - bucket_path = bucket_url_parsed.path - else: - bucket_path = bucket_url_parsed._replace(scheme='').geturl() - bucket_path = bucket_path[2:] if bucket_path.startswith("//") else bucket_path + bucket_path = bucket_url_parsed._replace(scheme='').geturl() + bucket_path = bucket_path[2:] if bucket_path.startswith("//") else bucket_path + bucket_path = bucket_path.split("?", 1)[0] + query = bucket_url_parsed.query filter_url = posixpath.join(bucket_path, file_glob) @@ -219,10 +217,7 @@ def glob_files( if bucket_url_parsed.scheme == "file" and not file.startswith("/"): file = "/" + file file_name = posixpath.relpath(file, bucket_path) - if bucket_url_parsed.scheme in ["gdrive"]: - file_url = bucket_url_parsed.scheme + "://" + bucket_url_parsed.netloc + "/" + file.lstrip("/") - else: - file_url = bucket_url_parsed.scheme + "://" + file + file_url = bucket_url_parsed.scheme + "://" + file + ("?" + query if query else "") yield FileItem( file_name=file_name, file_url=file_url,