From 8b8d04b35d29c05434e64d9e546aacd59b467b71 Mon Sep 17 00:00:00 2001 From: Jay Luker Date: Mon, 15 Jul 2024 13:07:53 -0400 Subject: [PATCH] Add retries to the individual streaming of files from zoom to s3 - added tenacity (retry library) to function requirements - failed files will be retried up to 5 times with a 2m wait in between set `tcp_keepalive=True` for the boto3 s3 client this seems like a smart thing to do anyway, even though the connection issues that are motivating the retries are on the zoom side. See: https://www.miketheman.net/2022/10/04/reduce-aws-lambda-latencies-with-keep-alive-in-python/ --- .pre-commit-config.yaml | 2 +- CHANGELOG.md | 5 ++ function_requirements/common-requirements.in | 1 + function_requirements/common-requirements.txt | 24 +++++----- functions/zoom-downloader.py | 32 +++++++++++-- requirements/base.in | 2 + requirements/base.txt | 16 ++++--- requirements/dev.txt | 47 +++++++++++++++---- requirements/tox.txt | 4 +- tests/test_downloader.py | 34 ++++++++++++++ 10 files changed, 137 insertions(+), 30 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1194bc1..081612c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,6 +8,6 @@ repos: types: [python] - id: black name: black - entry: black + entry: black -v language: system types: [python] diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c73ea6..d7164dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [unreleased] +## [4.2.2 - 2024-08-02] + +- added internal retry mechanism when downloading from zoom +- added `tcp_keepalive=True` to the boto3 s3 client + ## [4.2.0 - 2023-10-10] ### Added diff --git a/function_requirements/common-requirements.in b/function_requirements/common-requirements.in index 5d7ca98..7fa5505 100644 --- a/function_requirements/common-requirements.in +++ b/function_requirements/common-requirements.in @@ -1,6 +1,7 @@ boto3 aws-lambda-logging requests +tenacity PyJWT pytz python-dotenv diff --git a/function_requirements/common-requirements.txt b/function_requirements/common-requirements.txt index af244b3..98b64f7 100644 --- a/function_requirements/common-requirements.txt +++ b/function_requirements/common-requirements.txt @@ -2,12 +2,12 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --resolver=backtracking common-requirements.in +# pip-compile function_requirements/common-requirements.in # aws-lambda-logging==0.1.1 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in boto3==1.26.142 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in botocore==1.29.142 # via # boto3 @@ -21,10 +21,10 @@ charset-normalizer==3.1.0 google-api-core==2.11.0 # via google-api-python-client google-api-python-client==2.87.0 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in google-auth==2.19.0 # via - # -r common-requirements.in + # -r function_requirements/common-requirements.in # google-api-core # google-api-python-client # google-auth-httplib2 @@ -32,7 +32,7 @@ google-auth==2.19.0 google-auth-httplib2==0.1.0 # via google-api-python-client google-auth-oauthlib==1.0.0 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in googleapis-common-protos==1.59.0 # via google-api-core httplib2==0.22.0 @@ -58,25 +58,25 @@ pyasn1==0.5.0 pyasn1-modules==0.3.0 # via google-auth pyjwt==2.7.0 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in pyparsing==3.0.9 # via httplib2 python-dateutil==2.8.2 # via botocore python-dotenv==1.0.0 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in pytz==2023.3 - # via -r common-requirements.in + # via -r function_requirements/common-requirements.in requests==2.31.0 # via - # -r common-requirements.in + # -r function_requirements/common-requirements.in # google-api-core # requests-oauthlib requests-oauthlib==1.3.1 # via google-auth-oauthlib rsa==4.7.2 # via - # -r common-requirements.in + # -r function_requirements/common-requirements.in # google-auth s3transfer==0.6.1 # via boto3 @@ -85,6 +85,8 @@ six==1.16.0 # google-auth # google-auth-httplib2 # python-dateutil +tenacity==8.5.0 + # via -r function_requirements/common-requirements.in uritemplate==4.1.1 # via google-api-python-client urllib3==1.26.16 diff --git a/functions/zoom-downloader.py b/functions/zoom-downloader.py index 8eb4231..91e13d8 100644 --- a/functions/zoom-downloader.py +++ b/functions/zoom-downloader.py @@ -17,6 +17,14 @@ import logging import concurrent.futures from copy import deepcopy +from botocore.config import Config +from tenacity import ( + Retrying, + RetryError, + stop_after_attempt, + wait_fixed, + retry_if_exception_type, +) logger = logging.getLogger() @@ -33,6 +41,8 @@ DOWNLOAD_MESSAGES_PER_INVOCATION = env("DOWNLOAD_MESSAGES_PER_INVOCATION") # Ignore recordings that are less than MIN_DURATION (in minutes) MINIMUM_DURATION = int(env("MINIMUM_DURATION", 2)) +STREAM_FROM_ZOOM_TO_S3_RETRIES = 5 +STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT = 120 class PermanentDownloadError(Exception): @@ -48,7 +58,7 @@ class ZoomDownloadLinkError(Exception): sqs = boto3.resource("sqs") -s3 = boto3.client("s3") +s3 = boto3.client("s3", config=Config(tcp_keepalive=True)) @setup_logging @@ -358,8 +368,24 @@ def upload_to_s3(self): self.downloaded_files = [] for file in self.recording_files: try: - file.stream_file_to_s3() + retry_attempts = Retrying( + reraise=True, + stop=stop_after_attempt(STREAM_FROM_ZOOM_TO_S3_RETRIES), + wait=wait_fixed(STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT), + retry=retry_if_exception_type(RetryableDownloadError), + ) + for attempt in retry_attempts: + with attempt: + logger.info({"retry state": attempt.retry_state}) + file.stream_file_to_s3() self.downloaded_files.append(file) + # RetryError means we've exhausted all retries + except RetryError: + logger.exception( + {"Giving up trying to download file": file.file_data} + ) + # raise this so the whole lambda call will be retried + raise RetryableDownloadError() except ZoomDownloadLinkError: logger.warning( {"Error accessing possibly deleted file": file.file_data} @@ -663,7 +689,7 @@ def stream_file_to_s3(self): Key=self.s3_filename, UploadId=mpu["UploadId"], ) - raise + raise RetryableDownloadError() if self.file_extension == "mp4": if not self.valid_mp4_file(): diff --git a/requirements/base.in b/requirements/base.in index 81f0950..14ad70e 100644 --- a/requirements/base.in +++ b/requirements/base.in @@ -7,5 +7,7 @@ requests tabulate aws-cdk-lib constructs +tenacity +pyyaml!=6.0.0,!=5.4.0,!=5.4.1, # pyyaml later versions are broken with cython 3 -r ../function_requirements/common-requirements.txt diff --git a/requirements/base.txt b/requirements/base.txt index ee1497f..d9ee562 100644 --- a/requirements/base.txt +++ b/requirements/base.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --resolver=backtracking requirements/base.in +# pip-compile requirements/base.in # attrs==23.1.0 # via @@ -146,8 +146,10 @@ python-dotenv==1.0.0 # -r requirements/base.in pytz==2023.3 # via -r requirements/../function_requirements/common-requirements.txt -pyyaml==5.4.1 - # via awscli +pyyaml==5.3.1 + # via + # -r requirements/base.in + # awscli requests==2.31.0 # via # -r requirements/../function_requirements/common-requirements.txt @@ -176,6 +178,10 @@ six==1.16.0 # python-dateutil tabulate==0.9.0 # via -r requirements/base.in +tenacity==8.5.0 + # via + # -r requirements/../function_requirements/common-requirements.txt + # -r requirements/base.in typeguard==2.13.3 # via # aws-cdk-asset-awscli-v1 @@ -196,5 +202,3 @@ urllib3==1.26.16 # botocore # google-auth # requests -zipp==3.15.0 - # via importlib-resources diff --git a/requirements/dev.txt b/requirements/dev.txt index abdb0ea..febdd13 100644 --- a/requirements/dev.txt +++ b/requirements/dev.txt @@ -1,8 +1,8 @@ # -# This file is autogenerated by pip-compile with Python 3.8 +# This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --resolver=backtracking requirements/dev.in +# pip-compile requirements/dev.in # attrs==23.1.0 # via @@ -27,6 +27,7 @@ aws-lambda-logging==0.1.1 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt awscli==1.27.142 # via -r requirements/base.txt black==23.3.0 @@ -35,10 +36,12 @@ boto3==1.26.142 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt botocore==1.29.142 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # awscli # boto3 # s3transfer @@ -46,6 +49,7 @@ cachetools==5.3.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-auth cattrs==22.2.0 # via @@ -55,6 +59,7 @@ certifi==2023.5.7 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # requests cfgv==3.3.1 # via pre-commit @@ -62,6 +67,7 @@ charset-normalizer==3.1.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # requests click==8.1.3 # via black @@ -101,15 +107,18 @@ google-api-core==2.11.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-python-client google-api-python-client==2.87.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt google-auth==2.19.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-core # google-api-python-client # google-auth-httplib2 @@ -118,20 +127,24 @@ google-auth-httplib2==0.1.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-python-client google-auth-oauthlib==1.0.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt googleapis-common-protos==1.59.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-core httplib2==0.22.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-python-client # google-auth-httplib2 identify==2.5.24 @@ -140,6 +153,7 @@ idna==3.4 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # requests importlib-resources==5.12.0 # via @@ -155,6 +169,7 @@ jmespath==1.0.1 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # boto3 # botocore jsii==1.82.0 @@ -177,6 +192,7 @@ oauthlib==3.2.2 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # requests-oauthlib packaging==23.1 # via @@ -203,6 +219,7 @@ protobuf==4.23.2 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-core # googleapis-common-protos publication==0.0.3 @@ -220,12 +237,14 @@ pyasn1==0.5.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # pyasn1-modules # rsa pyasn1-modules==0.3.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-auth pycodestyle==2.10.0 # via flake8 @@ -235,10 +254,12 @@ pyjwt==2.7.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt pyparsing==3.0.9 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # httplib2 pytest==7.3.1 # via @@ -256,6 +277,7 @@ python-dateutil==2.8.2 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # botocore # freezegun # jsii @@ -263,11 +285,13 @@ python-dotenv==1.0.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt pytz==2023.3 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt -pyyaml==5.4.1 + # -r requirements/tox.txt +pyyaml==5.3.1 # via # -r requirements/base.txt # awscli @@ -276,6 +300,7 @@ requests==2.31.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-core # requests-mock # requests-oauthlib @@ -285,23 +310,27 @@ requests-oauthlib==1.3.1 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-auth-oauthlib rsa==4.7.2 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # awscli # google-auth s3transfer==0.6.1 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # awscli # boto3 six==1.16.0 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-auth # google-auth-httplib2 # python-dateutil @@ -309,6 +338,11 @@ six==1.16.0 # tox tabulate==0.9.0 # via -r requirements/base.txt +tenacity==8.5.0 + # via + # -r requirements/../function_requirements/common-requirements.txt + # -r requirements/base.txt + # -r requirements/tox.txt tomli==2.0.1 # via # -r requirements/tox.txt @@ -330,17 +364,18 @@ typeguard==2.13.3 typing-extensions==4.6.2 # via # -r requirements/base.txt - # black # jsii uritemplate==4.1.1 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # google-api-python-client urllib3==1.26.16 # via # -r requirements/../function_requirements/common-requirements.txt # -r requirements/base.txt + # -r requirements/tox.txt # botocore # google-auth # requests @@ -348,10 +383,6 @@ virtualenv==20.23.0 # via # pre-commit # tox -zipp==3.15.0 - # via - # -r requirements/base.txt - # importlib-resources # The following packages are considered to be unsafe in a requirements file: # setuptools diff --git a/requirements/tox.txt b/requirements/tox.txt index ea89e9e..15c6871 100644 --- a/requirements/tox.txt +++ b/requirements/tox.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --resolver=backtracking requirements/tox.in +# pip-compile requirements/tox.in # aws-lambda-logging==0.1.1 # via -r requirements/../function_requirements/common-requirements.txt @@ -148,6 +148,8 @@ six==1.16.0 # google-auth-httplib2 # python-dateutil # requests-mock +tenacity==8.5.0 + # via -r requirements/../function_requirements/common-requirements.txt tomli==2.0.1 # via # coverage diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 78f54b4..36bbda7 100755 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -612,6 +612,40 @@ def mock_stream_file_to_s3(*args): dl.upload_to_s3() assert len(dl.downloaded_files) == expected_count + def test_upload_to_s3_retries(self): + now = datetime.strftime(datetime.now(), TIMESTAMP_FORMAT) + dl = downloader.Download( + None, + { + "uuid": "abc", + "zoom_series_id": "02334", + "start_time": now, + "recording_files": [ + {"recording_start": now, "recording_type": "view_type_1"}, + ], + }, + ) + + downloader.STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT = 0.1 + + downloader.STREAM_FROM_ZOOM_TO_S3_RETRIES = 3 + + num_retries = 0 + + def count_retries(*args): + nonlocal num_retries + num_retries += 1 + raise downloader.RetryableDownloadError("connection reset, loser!") + + self.mocker.patch.object( + downloader.ZoomFile, "stream_file_to_s3", side_effect=count_retries + ) + + with pytest.raises(Exception): + dl.upload_to_s3() + + assert num_retries == 3 + """ Tests for class ZoomFile