From 518d86a7554b1351f839ee7cd57ed3fc607ed9e5 Mon Sep 17 00:00:00 2001 From: Jimmy Schleicher Date: Mon, 17 May 2021 12:32:28 -0400 Subject: [PATCH 1/3] Persist S3 client through Bucket __init__ --- tc_aws/aws/bucket.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tc_aws/aws/bucket.py b/tc_aws/aws/bucket.py index 0da6cf0..c253f1d 100644 --- a/tc_aws/aws/bucket.py +++ b/tc_aws/aws/bucket.py @@ -11,6 +11,7 @@ class Bucket(object): + _client = None _instances = {} @staticmethod @@ -35,8 +36,6 @@ def __init__(self, bucket, region, endpoint, max_retry=None): """ self._bucket = bucket - session = aiobotocore.get_session() - config = None if max_retry is not None: config = Config( @@ -45,12 +44,13 @@ def __init__(self, bucket, region, endpoint, max_retry=None): ) ) - self._client = session.create_client( - 's3', - region_name=region, - endpoint_url=endpoint, - config=config - ) + if self._client is None: + self._client = aiobotocore.get_session().create_client( + 's3', + region_name=region, + endpoint_url=endpoint, + config=config + ) async def get(self, path): """ From 152863e83ac5646312fffd120d550f0411d4c998 Mon Sep 17 00:00:00 2001 From: Jimmy Schleicher Date: Wed, 14 Jul 2021 13:49:27 -0400 Subject: [PATCH 2/3] Modify Bucket.exists to call HeadObject and return boolean --- tc_aws/aws/bucket.py | 14 ++++++++++++++ tc_aws/storages/s3_storage.py | 10 +--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/tc_aws/aws/bucket.py b/tc_aws/aws/bucket.py index c253f1d..d8ef50e 100644 --- a/tc_aws/aws/bucket.py +++ b/tc_aws/aws/bucket.py @@ -52,6 +52,20 @@ def __init__(self, bucket, region, endpoint, max_retry=None): config=config ) + async def exists(self, path): + """ + Checks if an object exists at a given path + :param string path: Path or 'key' to retrieve AWS object + """ + try: + await self._client.head_object( + Bucket=self._bucket, + Key=self._clean_key(path), + ) + except Exception: + return False + return True + async def get(self, path): """ Returns object at given path diff --git a/tc_aws/storages/s3_storage.py b/tc_aws/storages/s3_storage.py index 19a46d4..14ed488 100644 --- a/tc_aws/storages/s3_storage.py +++ b/tc_aws/storages/s3_storage.py @@ -150,15 +150,7 @@ async def exists(self, path): :param string path: Path to check """ file_abspath = self._normalize_path(path) - - try: - await self.storage.get(file_abspath) - except ClientError as err: - if err.response['Error']['Code'] == 'NoSuchKey': - return False - raise - - return True + return await self.storage.exists(file_abspath) async def remove(self, path): """ From 490941e6c346ac5f37b9c4009c3678ef4230bd65 Mon Sep 17 00:00:00 2001 From: Jimmy Schleicher Date: Wed, 14 Jul 2021 13:50:02 -0400 Subject: [PATCH 3/3] Use try/with block when reading response['Body'] --- tc_aws/loaders/s3_loader.py | 3 ++- tc_aws/result_storages/s3_storage.py | 3 ++- tc_aws/storages/s3_storage.py | 9 ++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/tc_aws/loaders/s3_loader.py b/tc_aws/loaders/s3_loader.py index 9a70a38..bd08247 100644 --- a/tc_aws/loaders/s3_loader.py +++ b/tc_aws/loaders/s3_loader.py @@ -63,7 +63,8 @@ async def load(context, url): return result result.successful = True - result.buffer = await file_key['Body'].read() + async with file_key['Body'] as stream: + result.buffer = await stream.read() result.metadata.update( size=file_key['ContentLength'], diff --git a/tc_aws/result_storages/s3_storage.py b/tc_aws/result_storages/s3_storage.py index a801a6e..601f7df 100644 --- a/tc_aws/result_storages/s3_storage.py +++ b/tc_aws/result_storages/s3_storage.py @@ -60,7 +60,8 @@ async def get(self, path = None): return None result = ResultStorageResult() - result.buffer = await key['Body'].read() + async with key['Body'] as stream: + result.buffer = await stream.read() result.successful = True result.metadata = { diff --git a/tc_aws/storages/s3_storage.py b/tc_aws/storages/s3_storage.py index 14ed488..f258c28 100644 --- a/tc_aws/storages/s3_storage.py +++ b/tc_aws/storages/s3_storage.py @@ -109,7 +109,8 @@ async def get_crypto(self, path): logger.warn("[STORAGE] s3 key not found at %s" % crypto_path) return None - file_key = await file_key['Body'].read() + async with file_key['Body'] as stream: + file_key = await stream.read() return file_key.decode('utf-8') @@ -129,7 +130,8 @@ async def get_detector_data(self, path): if not file_key or self.is_expired(file_key) or 'Body' not in file_key: return None - return loads(await file_key['Body'].read()) + async with file_key['Body'] as stream: + return loads(await stream.read()) async def get(self, path): """ @@ -142,7 +144,8 @@ async def get(self, path): except BotoCoreError: return None - return await file['Body'].read() + async with file['Body'] as stream: + return await stream.read() async def exists(self, path): """