Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add retries to the individual streaming of files from zoom to s3 #163

Merged
merged 1 commit into from
Aug 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ repos:
types: [python]
- id: black
name: black
entry: black
entry: black -v
language: system
types: [python]
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [unreleased]

## [4.2.2 - 2024-08-02]

- added internal retry mechanism when downloading from zoom
- added `tcp_keepalive=True` to the boto3 s3 client

## [4.2.0 - 2023-10-10]

### Added
Expand Down
1 change: 1 addition & 0 deletions function_requirements/common-requirements.in
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
boto3
aws-lambda-logging
requests
tenacity
PyJWT
pytz
python-dotenv
Expand Down
24 changes: 13 additions & 11 deletions function_requirements/common-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --resolver=backtracking common-requirements.in
# pip-compile function_requirements/common-requirements.in
#
aws-lambda-logging==0.1.1
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
boto3==1.26.142
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
botocore==1.29.142
# via
# boto3
Expand All @@ -21,18 +21,18 @@ charset-normalizer==3.1.0
google-api-core==2.11.0
# via google-api-python-client
google-api-python-client==2.87.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
google-auth==2.19.0
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-api-core
# google-api-python-client
# google-auth-httplib2
# google-auth-oauthlib
google-auth-httplib2==0.1.0
# via google-api-python-client
google-auth-oauthlib==1.0.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
googleapis-common-protos==1.59.0
# via google-api-core
httplib2==0.22.0
Expand All @@ -58,25 +58,25 @@ pyasn1==0.5.0
pyasn1-modules==0.3.0
# via google-auth
pyjwt==2.7.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
pyparsing==3.0.9
# via httplib2
python-dateutil==2.8.2
# via botocore
python-dotenv==1.0.0
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
pytz==2023.3
# via -r common-requirements.in
# via -r function_requirements/common-requirements.in
requests==2.31.0
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-api-core
# requests-oauthlib
requests-oauthlib==1.3.1
# via google-auth-oauthlib
rsa==4.7.2
# via
# -r common-requirements.in
# -r function_requirements/common-requirements.in
# google-auth
s3transfer==0.6.1
# via boto3
Expand All @@ -85,6 +85,8 @@ six==1.16.0
# google-auth
# google-auth-httplib2
# python-dateutil
tenacity==8.5.0
# via -r function_requirements/common-requirements.in
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.16
Expand Down
32 changes: 29 additions & 3 deletions functions/zoom-downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,14 @@
import logging
import concurrent.futures
from copy import deepcopy
from botocore.config import Config
from tenacity import (
Retrying,
RetryError,
stop_after_attempt,
wait_fixed,
retry_if_exception_type,
)

logger = logging.getLogger()

Expand All @@ -33,6 +41,8 @@
DOWNLOAD_MESSAGES_PER_INVOCATION = env("DOWNLOAD_MESSAGES_PER_INVOCATION")
# Ignore recordings that are less than MIN_DURATION (in minutes)
MINIMUM_DURATION = int(env("MINIMUM_DURATION", 2))
STREAM_FROM_ZOOM_TO_S3_RETRIES = 5
STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT = 120


class PermanentDownloadError(Exception):
Expand All @@ -48,7 +58,7 @@ class ZoomDownloadLinkError(Exception):


sqs = boto3.resource("sqs")
s3 = boto3.client("s3")
s3 = boto3.client("s3", config=Config(tcp_keepalive=True))


@setup_logging
Expand Down Expand Up @@ -358,8 +368,24 @@ def upload_to_s3(self):
self.downloaded_files = []
for file in self.recording_files:
try:
file.stream_file_to_s3()
retry_attempts = Retrying(
reraise=True,
stop=stop_after_attempt(STREAM_FROM_ZOOM_TO_S3_RETRIES),
wait=wait_fixed(STREAM_FROM_ZOOM_TO_S3_RETRY_WAIT),
retry=retry_if_exception_type(RetryableDownloadError),
)
for attempt in retry_attempts:
with attempt:
lbjay marked this conversation as resolved.
Show resolved Hide resolved
logger.info({"retry state": attempt.retry_state})
file.stream_file_to_s3()
self.downloaded_files.append(file)
# RetryError means we've exhausted all retries
except RetryError:
logger.exception(
{"Giving up trying to download file": file.file_data}
)
# raise this so the whole lambda call will be retried
raise RetryableDownloadError()
except ZoomDownloadLinkError:
logger.warning(
{"Error accessing possibly deleted file": file.file_data}
Expand Down Expand Up @@ -663,7 +689,7 @@ def stream_file_to_s3(self):
Key=self.s3_filename,
UploadId=mpu["UploadId"],
)
raise
raise RetryableDownloadError()

if self.file_extension == "mp4":
if not self.valid_mp4_file():
Expand Down
2 changes: 2 additions & 0 deletions requirements/base.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,7 @@ requests
tabulate
aws-cdk-lib
constructs
tenacity
pyyaml!=6.0.0,!=5.4.0,!=5.4.1, # pyyaml later versions are broken with cython 3

-r ../function_requirements/common-requirements.txt
16 changes: 10 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#
# This file is autogenerated by pip-compile with Python 3.8
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --resolver=backtracking requirements/base.in
# pip-compile requirements/base.in
#
attrs==23.1.0
# via
Expand Down Expand Up @@ -146,8 +146,10 @@ python-dotenv==1.0.0
# -r requirements/base.in
pytz==2023.3
# via -r requirements/../function_requirements/common-requirements.txt
pyyaml==5.4.1
# via awscli
pyyaml==5.3.1
# via
# -r requirements/base.in
# awscli
requests==2.31.0
# via
# -r requirements/../function_requirements/common-requirements.txt
Expand Down Expand Up @@ -176,6 +178,10 @@ six==1.16.0
# python-dateutil
tabulate==0.9.0
# via -r requirements/base.in
tenacity==8.5.0
# via
# -r requirements/../function_requirements/common-requirements.txt
# -r requirements/base.in
typeguard==2.13.3
# via
# aws-cdk-asset-awscli-v1
Expand All @@ -196,5 +202,3 @@ urllib3==1.26.16
# botocore
# google-auth
# requests
zipp==3.15.0
# via importlib-resources
Loading
Loading