Skip to content

Commit

Permalink
Add retries to the individual streaming of files from zoom to s3
Browse files Browse the repository at this point in the history
- added tenacity (retry library) to function requirements
- failed files will be retried up to 5 times with a 2m wait in between

set `tcp_keepalive=True` for the boto3 s3 client

this seems like a smart thing to do anyway, even though the connection
issues that are motivating the retries are on the zoom side.

See: https://www.miketheman.net/2022/10/04/reduce-aws-lambda-latencies-with-keep-alive-in-python/
  • Loading branch information
lbjay committed Aug 2, 2024
1 parent f586e22 commit 8b8d04b
Show file tree
Hide file tree
Showing 10 changed files with 137 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ repos:
types: [python]
- id: black
name: black
entry: black
entry: black -v
language: system
types: [python]
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [unreleased]

## [4.2.2 - 2024-08-02]

- added internal retry mechanism when downloading from zoom
- added `tcp_keepalive=True` to the boto3 s3 client

## [4.2.0 - 2023-10-10]

### Added
Expand Down
1 change: 1 addition & 0 deletions function_requirements/common-requirements.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
boto3
aws-lambda-logging
requests
tenacity
PyJWT
pytz
python-dotenv
Expand Down
24 changes: 13 additions & 11 deletions function_requirements/common-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --resolver=backtracking common-requirements.in
# pip-compile function_requirements/common-requirements.in
#
aws-lambda-logging==0.1.1
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
boto3==1.26.142
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
botocore==1.29.142
# via
# boto3
Expand All @@ -21,18 +21,18 @@ charset-normalizer==3.1.0
google-api-core==2.11.0
# via google-api-python-client
google-api-python-client==2.87.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
google-auth==2.19.0
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-api-core
# google-api-python-client
# google-auth-httplib2
# google-auth-oauthlib
google-auth-httplib2==0.1.0
# via google-api-python-client
google-auth-oauthlib==1.0.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
googleapis-common-protos==1.59.0
# via google-api-core
httplib2==0.22.0
Expand All @@ -58,25 +58,25 @@ pyasn1==0.5.0
pyasn1-modules==0.3.0
# via google-auth
pyjwt==2.7.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
pyparsing==3.0.9
# via httplib2
python-dateutil==2.8.2
# via botocore
python-dotenv==1.0.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
pytz==2023.3
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
requests==2.31.0
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-api-core
# requests-oauthlib
requests-oauthlib==1.3.1
# via google-auth-oauthlib
rsa==4.7.2
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-auth
s3transfer==0.6.1
# via boto3
Expand All @@ -85,6 +85,8 @@ six==1.16.0
# google-auth
# google-auth-httplib2
# python-dateutil
tenacity==8.5.0
# via -r function_requirements/common-requirements.in
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.16
Expand Down
32 changes: 29 additions & 3 deletions functions/zoom-downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
import logging
import concurrent.futures
from copy import deepcopy
from botocore.config import Config
from tenacity import (
Retrying,
RetryError,
stop_after_attempt,
wait_fixed,
retry_if_exception_type,
)

logger = logging.getLogger()

Expand All @@ -33,6 +41,8 @@
DOWNLOAD_MESSAGES_PER_INVOCATION = env("DOWNLOAD_MESSAGES_PER_INVOCATION")
# Ignore recordings that are less than MIN_DURATION (in minutes)
MINIMUM_DURATION = int(env("MINIMUM_DURATION", 2))
STREAM_FROM_ZOOM_TO_S3_RETRIES = 5
STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT = 120


class PermanentDownloadError(Exception):
Expand All @@ -48,7 +58,7 @@ class ZoomDownloadLinkError(Exception):


sqs = boto3.resource("sqs")
s3 = boto3.client("s3")
s3 = boto3.client("s3", config=Config(tcp_keepalive=True))


@setup_logging
Expand Down Expand Up @@ -358,8 +368,24 @@ def upload_to_s3(self):
self.downloaded_files = []
for file in self.recording_files:
try:
file.stream_file_to_s3()
retry_attempts = Retrying(
reraise=True,
stop=stop_after_attempt(STREAM_FROM_ZOOM_TO_S3_RETRIES),
wait=wait_fixed(STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT),
retry=retry_if_exception_type(RetryableDownloadError),
)
for attempt in retry_attempts:
with attempt:
logger.info({"retry state": attempt.retry_state})
file.stream_file_to_s3()
self.downloaded_files.append(file)
# RetryError means we've exhausted all retries
except RetryError:
logger.exception(
{"Giving up trying to download file": file.file_data}
)
# raise this so the whole lambda call will be retried
raise RetryableDownloadError()
except ZoomDownloadLinkError:
logger.warning(
{"Error accessing possibly deleted file": file.file_data}
Expand Down Expand Up @@ -663,7 +689,7 @@ def stream_file_to_s3(self):
Key=self.s3_filename,
UploadId=mpu["UploadId"],
)
raise
raise RetryableDownloadError()

if self.file_extension == "mp4":
if not self.valid_mp4_file():
Expand Down
2 changes: 2 additions & 0 deletions requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ requests
tabulate
aws-cdk-lib
constructs
tenacity
pyyaml!=6.0.0,!=5.4.0,!=5.4.1, # pyyaml later versions are broken with cython 3

-r ../function_requirements/common-requirements.txt
16 changes: 10 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.8
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --resolver=backtracking requirements/base.in
# pip-compile requirements/base.in
#
attrs==23.1.0
# via
Expand Down Expand Up @@ -146,8 +146,10 @@ python-dotenv==1.0.0
# -r requirements/base.in
pytz==2023.3
# via -r requirements/../function_requirements/common-requirements.txt
pyyaml==5.4.1
# via awscli
pyyaml==5.3.1
# via
# -r requirements/base.in
# awscli
requests==2.31.0
# via
# -r requirements/../function_requirements/common-requirements.txt
Expand Down Expand Up @@ -176,6 +178,10 @@ six==1.16.0
# python-dateutil
tabulate==0.9.0
# via -r requirements/base.in
tenacity==8.5.0
# via
# -r requirements/../function_requirements/common-requirements.txt
# -r requirements/base.in
typeguard==2.13.3
# via
# aws-cdk-asset-awscli-v1
Expand All @@ -196,5 +202,3 @@ urllib3==1.26.16
# botocore
# google-auth
# requests
zipp==3.15.0
# via importlib-resources
Loading

0 comments on commit 8b8d04b

Please sign in to comment.