From 6b74c7d0e80d79f990dec88d685d73188cdd35cf Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Wed, 21 Jun 2023 12:53:34 -0400 Subject: [PATCH 1/4] allow the base folder to not be a bucket --- waterbutler/providers/s3/provider.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/waterbutler/providers/s3/provider.py b/waterbutler/providers/s3/provider.py index 93655d2dc..e3a4dae22 100644 --- a/waterbutler/providers/s3/provider.py +++ b/waterbutler/providers/s3/provider.py @@ -685,7 +685,11 @@ async def _metadata_file(self, path, revision=None): async def _metadata_folder(self, path): await self._check_region() - params = {'prefix': path.path, 'delimiter': '/'} + if path._orig_path == '/' and self.settings.get('id'): + params = {'prefix': self.settings['id'], 'delimiter': '/'} + else: + params = {'prefix': path.path, 'delimiter': '/'} + resp = await self.make_request( 'GET', functools.partial(self.bucket.generate_url, settings.TEMP_URL_SECS, 'GET', query_parameters=params), @@ -721,13 +725,17 @@ async def _metadata_folder(self, path): items = [ S3FolderMetadata(item) - for item in prefixes + for item in prefixes if item['Prefix'] != path.path ] for content in contents: if content['Key'] == path.path: continue + base_folder = self.settings.get('id') + if base_folder and content['Key'] == base_folder: + continue + if content['Key'].endswith('/'): items.append(S3FolderKeyMetadata(content)) else: From b3feadb75e40048c6797d8827435a69db121d822 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Tue, 11 Jul 2023 14:39:05 -0400 Subject: [PATCH 2/4] use colon delineation for S3 buckets --- waterbutler/providers/s3/provider.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/waterbutler/providers/s3/provider.py b/waterbutler/providers/s3/provider.py index e3a4dae22..21edd52da 100644 --- a/waterbutler/providers/s3/provider.py +++ b/waterbutler/providers/s3/provider.py @@ -686,8 +686,10 @@ async def _metadata_folder(self, path): await self._check_region() if path._orig_path == '/' and self.settings.get('id'): - params = {'prefix': self.settings['id'], 'delimiter': '/'} + base_folder = self.settings['id'].split(':/')[1] + params = {'prefix': base_folder, 'delimiter': '/'} else: + base_folder = None params = {'prefix': path.path, 'delimiter': '/'} resp = await self.make_request( @@ -732,7 +734,6 @@ async def _metadata_folder(self, path): if content['Key'] == path.path: continue - base_folder = self.settings.get('id') if base_folder and content['Key'] == base_folder: continue From 5b7ca195affc8d489913ac5bc3ed6678e4f1b296 Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Tue, 25 Jul 2023 09:35:41 -0400 Subject: [PATCH 3/4] fix folder paths --- waterbutler/providers/s3/provider.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/waterbutler/providers/s3/provider.py b/waterbutler/providers/s3/provider.py index 21edd52da..1285854b7 100644 --- a/waterbutler/providers/s3/provider.py +++ b/waterbutler/providers/s3/provider.py @@ -69,7 +69,10 @@ async def validate_v1_path(self, path, **kwargs): await self._check_region() if path == '/': - return WaterButlerPath(path) + # adjust path using base folder to include the buckets root, so just the `/` translates to just + # the `/base_folder/` path + base_folder = self.settings.get('id', ':/').split(':/')[1] + return WaterButlerPath(f'/{base_folder}') implicit_folder = path.endswith('/') @@ -685,12 +688,10 @@ async def _metadata_file(self, path, revision=None): async def _metadata_folder(self, path): await self._check_region() - if path._orig_path == '/' and self.settings.get('id'): - base_folder = self.settings['id'].split(':/')[1] - params = {'prefix': base_folder, 'delimiter': '/'} - else: - base_folder = None - params = {'prefix': path.path, 'delimiter': '/'} + # The user selected base folder, the root of the where that user's node is connected. + prefix = self.settings['id'].split(':/')[1] if path == '/' and self.settings.get('id') else path.path + + params = {'prefix': prefix, 'delimiter': '/'} resp = await self.make_request( 'GET', @@ -731,10 +732,7 @@ async def _metadata_folder(self, path): ] for content in contents: - if content['Key'] == path.path: - continue - - if base_folder and content['Key'] == base_folder: + if content['Key'] == params['prefix']: continue if content['Key'].endswith('/'): From e627806d9bfc16f854a667d7126cbe4d70e97c3d Mon Sep 17 00:00:00 2001 From: John Tordoff <> Date: Tue, 1 Aug 2023 08:49:08 -0400 Subject: [PATCH 4/4] add unittests for s3 improvements --- tests/providers/s3/test_provider.py | 44 +++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/tests/providers/s3/test_provider.py b/tests/providers/s3/test_provider.py index cdfb36cda..ffc58d39c 100644 --- a/tests/providers/s3/test_provider.py +++ b/tests/providers/s3/test_provider.py @@ -232,6 +232,21 @@ async def test_validate_v1_path_file(self, provider, file_header_metadata, mock_ assert wb_path_v1 == wb_path_v0 + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_validate_v1_path_file_with_subfolder(self, provider, file_header_metadata, mock_time): + file_path = '/my-subfolder/foobah' + provider.settings['id'] = 'the-bucket:/my-subfolder/' + + good_metadata_url = provider.bucket.new_key(file_path).generate_url(100, 'HEAD') + aiohttpretty.register_uri('HEAD', good_metadata_url, headers=file_header_metadata) + + assert WaterButlerPath('/my-subfolder/') == await provider.validate_v1_path('/') + wb_path_v1 = await provider.validate_v1_path(file_path) + wb_path_v0 = await provider.validate_path(file_path) + + assert wb_path_v1 == wb_path_v0 + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_validate_v1_path_folder(self, provider, folder_metadata, mock_time): @@ -383,6 +398,35 @@ async def test_download_folder_400s(self, provider, mock_time): await provider.download(WaterButlerPath('/cool/folder/mom/')) assert e.value.code == 400 + @pytest.mark.asyncio + @pytest.mark.aiohttpretty + async def test_upload_to_subfolder_as_root(self, + provider, + file_content, + file_stream, + file_header_metadata, + mock_time + ): + + provider.settings['id'] = 'the-bucket:/my-subfolder/' + path = WaterButlerPath('/my-subfolder/foobah') + + content_md5 = hashlib.md5(file_content).hexdigest() + + url = provider.bucket.new_key(path.path).generate_url(100, 'PUT') + metadata_url = provider.bucket.new_key(path.path).generate_url(100, 'HEAD') + aiohttpretty.register_uri('HEAD', metadata_url, headers=file_header_metadata) + header = {'ETag': f'"{content_md5}"'} + aiohttpretty.register_uri('PUT', url, status=201, headers=header) + + metadata, created = await provider.upload(file_stream, path) + + assert metadata.kind == 'file' + assert metadata.path == '/my-subfolder/foobah' + assert not created + assert aiohttpretty.has_call(method='PUT', uri=url) + assert aiohttpretty.has_call(method='HEAD', uri=metadata_url) + @pytest.mark.asyncio @pytest.mark.aiohttpretty async def test_upload_update(self,