diff --git a/api/api/utils/asyncio.py b/api/api/utils/asyncio.py new file mode 100644 index 00000000000..3b41a1f1446 --- /dev/null +++ b/api/api/utils/asyncio.py @@ -0,0 +1,29 @@ +import asyncio +import logging +from collections.abc import Awaitable + + +parent_logger = logging.getLogger(__name__) + + +_do_not_wait_for_logger = parent_logger.getChild("do_not_wait_for") + + +def do_not_wait_for(awaitable: Awaitable) -> None: + """ + Consume an awaitable without waiting for it to finish. + + This allows us to call an async function that we don't care about + the result of, without needing to wait for it to complete. This is + useful, for example, if some operation creates a side effect that + isn't necessary for the response we're processing (e.g., Redis tallying). + """ + try: + loop = asyncio.get_running_loop() + except RuntimeError as exc: + _do_not_wait_for_logger.error( + "`do_not_wait_for` must be called inside a running event loop." + ) + raise exc + + loop.create_task(awaitable) diff --git a/api/api/utils/image_proxy/__init__.py b/api/api/utils/image_proxy/__init__.py index bcfb44db6aa..e37226fcac0 100644 --- a/api/api/utils/image_proxy/__init__.py +++ b/api/api/utils/image_proxy/__init__.py @@ -7,11 +7,15 @@ from django.http import HttpResponse from rest_framework.exceptions import UnsupportedMediaType +import aiohttp import django_redis import requests import sentry_sdk +from asgiref.sync import sync_to_async from sentry_sdk import push_scope, set_context +from api.utils.aiohttp import get_aiohttp_session +from api.utils.asyncio import do_not_wait_for from api.utils.image_proxy.exception import UpstreamThumbnailException from api.utils.image_proxy.extension import get_image_extension from api.utils.image_proxy.photon import get_photon_request_params @@ -75,11 +79,40 @@ def get_request_params_for_extension( ) -def get( +@sync_to_async +def _tally_response( + tallies, + media_info: MediaInfo, + month: str, + domain: str, + response: aiohttp.ClientResponse, +): + """ + Tally image proxy response without waiting for Redis to respond. + + Pulled into a separate function to help reduce overload when skimming + the `get` function, which is complex enough as is. + """ + tallies.incr(f"thumbnail_response_code:{month}:{response.status}"), + tallies.incr( + f"thumbnail_response_code_by_domain:{domain}:" f"{month}:{response.status}" + ) + tallies.incr( + f"thumbnail_response_code_by_provider:{media_info.media_provider}:" + f"{month}:{response.status}" + ) + + +_UPSTREAM_TIMEOUT = aiohttp.ClientTimeout(15) + + +async def get( media_info: MediaInfo, request_config: RequestConfig = RequestConfig(), ) -> HttpResponse: """ + Retrieve the proxied image. + Proxy an image through Photon if its file type is supported, else return the original image if the file type is SVG. Otherwise, raise an exception. """ @@ -88,9 +121,10 @@ def get( logger = parent_logger.getChild("get") tallies = django_redis.get_redis_connection("tallies") + tallies_incr = sync_to_async(tallies.incr) month = get_monthly_timestamp() - image_extension = get_image_extension(image_url, media_identifier) + image_extension = await get_image_extension(image_url, media_identifier) headers = {"Accept": request_config.accept_header} | HEADERS @@ -106,26 +140,35 @@ def get( ) try: - upstream_response = requests.get( + session = await get_aiohttp_session() + + upstream_response = await session.get( upstream_url, - timeout=15, + timeout=_UPSTREAM_TIMEOUT, params=params, headers=headers, ) - tallies.incr(f"thumbnail_response_code:{month}:{upstream_response.status_code}") - tallies.incr( - f"thumbnail_response_code_by_domain:{domain}:" - f"{month}:{upstream_response.status_code}" - ) - tallies.incr( - f"thumbnail_response_code_by_provider:{media_info.media_provider}:" - f"{month}:{upstream_response.status_code}" + do_not_wait_for( + _tally_response(tallies, media_info, month, domain, upstream_response) ) upstream_response.raise_for_status() + status_code = upstream_response.status + content_type = upstream_response.headers.get("Content-Type") + logger.debug( + "Image proxy response status: %s, content-type: %s", + status_code, + content_type, + ) + content = await upstream_response.content.read() + return HttpResponse( + content, + status=status_code, + content_type=content_type, + ) except Exception as exc: exception_name = f"{exc.__class__.__module__}.{exc.__class__.__name__}" key = f"thumbnail_error:{exception_name}:{domain}:{month}" - count = tallies.incr(key) + count = await tallies_incr(key) if count <= settings.THUMBNAIL_ERROR_INITIAL_ALERT_THRESHOLD or ( count % settings.THUMBNAIL_ERROR_REPEATED_ALERT_FREQUENCY == 0 ): @@ -144,8 +187,10 @@ def get( sentry_sdk.capture_exception(exc) if isinstance(exc, requests.exceptions.HTTPError): code = exc.response.status_code - tallies.incr( - f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}" + do_not_wait_for( + tallies_incr( + f"thumbnail_http_error:{domain}:{month}:{code}:{exc.response.text}" + ) ) logger.warning( f"Failed to render thumbnail " @@ -153,15 +198,3 @@ def get( f"{media_info.media_provider=}" ) raise UpstreamThumbnailException(f"Failed to render thumbnail. {exc}") - - res_status = upstream_response.status_code - content_type = upstream_response.headers.get("Content-Type") - logger.debug( - f"Image proxy response status: {res_status}, content-type: {content_type}" - ) - - return HttpResponse( - upstream_response.content, - status=res_status, - content_type=content_type, - ) diff --git a/api/api/utils/image_proxy/extension.py b/api/api/utils/image_proxy/extension.py index ae74d7f0963..2f2612022c3 100644 --- a/api/api/utils/image_proxy/extension.py +++ b/api/api/utils/image_proxy/extension.py @@ -1,14 +1,20 @@ from os.path import splitext from urllib.parse import urlparse +import aiohttp import django_redis -import requests import sentry_sdk +from asgiref.sync import sync_to_async +from api.utils.aiohttp import get_aiohttp_session +from api.utils.asyncio import do_not_wait_for from api.utils.image_proxy.exception import UpstreamThumbnailException -def get_image_extension(image_url: str, media_identifier: str) -> str | None: +_HEAD_TIMEOUT = aiohttp.ClientTimeout(10) + + +async def get_image_extension(image_url: str, media_identifier) -> str | None: cache = django_redis.get_redis_connection("default") key = f"media:{media_identifier}:thumb_type" @@ -16,28 +22,29 @@ def get_image_extension(image_url: str, media_identifier: str) -> str | None: if not ext: # If the extension is not present in the URL, try to get it from the redis cache - ext = cache.get(key) + ext = await sync_to_async(cache.get)(key) ext = ext.decode("utf-8") if ext else None if not ext: # If the extension is still not present, try getting it from the content type try: - response = requests.head(image_url, timeout=10) + session = await get_aiohttp_session() + response = await session.head(image_url, timeout=_HEAD_TIMEOUT) response.raise_for_status() + if response.headers and "Content-Type" in response.headers: + content_type = response.headers["Content-Type"] + ext = _get_file_extension_from_content_type(content_type) + else: + ext = None + + do_not_wait_for(sync_to_async(cache.set)(key, ext if ext else "unknown")) except Exception as exc: sentry_sdk.capture_exception(exc) raise UpstreamThumbnailException( "Failed to render thumbnail due to inability to check media " f"type. {exc}" ) - else: - if response.headers and "Content-Type" in response.headers: - content_type = response.headers["Content-Type"] - ext = _get_file_extension_from_content_type(content_type) - else: - ext = None - cache.set(key, ext if ext else "unknown") return ext diff --git a/api/api/views/media_views.py b/api/api/views/media_views.py index d003feec9fd..c490e95d45a 100644 --- a/api/api/views/media_views.py +++ b/api/api/views/media_views.py @@ -40,9 +40,6 @@ class InvalidSource(APIException): default_code = "invalid_source" -image_proxy_aget = sync_to_async(image_proxy.get) - - class MediaViewSet(AsyncViewSetMixin, AsyncAPIView, ReadOnlyModelViewSet): view_is_async = True @@ -296,7 +293,7 @@ async def thumbnail(self, request, *_, **__): media_info = await self.get_image_proxy_media_info() - return await image_proxy_aget( + return await image_proxy.get( media_info, request_config=image_proxy.RequestConfig( accept_header=request.headers.get("Accept", "image/*"), diff --git a/api/test/conftest.py b/api/test/conftest.py index e06e86b862b..5277a134a29 100644 --- a/api/test/conftest.py +++ b/api/test/conftest.py @@ -1,11 +1,34 @@ +import asyncio + import pytest -from asgiref.sync import async_to_sync from conf.asgi import application +@pytest.fixture +def get_new_loop(): + loops: list[asyncio.AbstractEventLoop] = [] + + def _get_new_loop() -> asyncio.AbstractEventLoop: + loop = asyncio.new_event_loop() + loops.append(loop) + return loop + + yield _get_new_loop + + for loop in loops: + loop.close() + + +@pytest.fixture(scope="session") +def session_loop() -> asyncio.AbstractEventLoop: + loop = asyncio.new_event_loop() + yield loop + loop.close() + + @pytest.fixture(scope="session", autouse=True) -def ensure_asgi_lifecycle(): +def ensure_asgi_lifecycle(session_loop: asyncio.AbstractEventLoop): """ Call application shutdown lifecycle event. @@ -27,4 +50,4 @@ async def shutdown(): return {"type": "lifespan.shutdown"} yield - async_to_sync(application)(scope, shutdown, noop) + session_loop.run_until_complete(application(scope, shutdown, noop)) diff --git a/api/test/unit/utils/test_aiohttp.py b/api/test/unit/utils/test_aiohttp.py index 126ba56fc13..ec3cbdc1538 100644 --- a/api/test/unit/utils/test_aiohttp.py +++ b/api/test/unit/utils/test_aiohttp.py @@ -1,25 +1,6 @@ -import asyncio - -import pytest - from api.utils.aiohttp import get_aiohttp_session -@pytest.fixture(autouse=True) -def get_new_loop(): - loops: list[asyncio.AbstractEventLoop] = [] - - def _get_new_loop(): - loop = asyncio.new_event_loop() - loops.append(loop) - return loop - - yield _get_new_loop - - for loop in loops: - loop.close() - - def test_reuses_session_within_same_loop(get_new_loop): loop = get_new_loop() diff --git a/api/test/unit/utils/test_image_proxy.py b/api/test/unit/utils/test_image_proxy.py index 5f2aa58f15a..657dbe211b4 100644 --- a/api/test/unit/utils/test_image_proxy.py +++ b/api/test/unit/utils/test_image_proxy.py @@ -1,3 +1,4 @@ +import asyncio from dataclasses import replace from test.factory.models.image import ImageFactory from unittest.mock import MagicMock @@ -6,6 +7,7 @@ from django.conf import settings from rest_framework.exceptions import UnsupportedMediaType +import aiohttp import pook import pytest import requests @@ -17,7 +19,7 @@ UpstreamThumbnailException, extension, ) -from api.utils.image_proxy import get as photon_get +from api.utils.image_proxy import get as _photon_get from api.utils.tallies import get_monthly_timestamp @@ -56,8 +58,26 @@ def auth_key(): settings.PHOTON_AUTH_KEY = None +@pytest.fixture +def photon_get(session_loop): + """ + Run ``image_proxy.get`` and wait for all tasks to finish. + """ + + def do(*args, **kwargs): + try: + res = session_loop.run_until_complete(_photon_get(*args, **kwargs)) + return res + finally: + tasks = asyncio.all_tasks(session_loop) + for task in tasks: + session_loop.run_until_complete(task) + + yield do + + @pook.on -def test_get_successful_no_auth_key_default_args(mock_image_data): +def test_get_successful_no_auth_key_default_args(photon_get, mock_image_data): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -81,7 +101,9 @@ def test_get_successful_no_auth_key_default_args(mock_image_data): @pook.on -def test_get_successful_original_svg_no_auth_key_default_args(mock_image_data): +def test_get_successful_original_svg_no_auth_key_default_args( + photon_get, mock_image_data +): mock_get: pook.Mock = ( pook.get(TEST_IMAGE_URL.replace(".jpg", ".svg")) .header("User-Agent", UA_HEADER) @@ -103,7 +125,9 @@ def test_get_successful_original_svg_no_auth_key_default_args(mock_image_data): @pook.on -def test_get_successful_with_auth_key_default_args(mock_image_data, auth_key): +def test_get_successful_with_auth_key_default_args( + photon_get, mock_image_data, auth_key +): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -128,7 +152,7 @@ def test_get_successful_with_auth_key_default_args(mock_image_data, auth_key): @pook.on -def test_get_successful_no_auth_key_not_compressed(mock_image_data): +def test_get_successful_no_auth_key_not_compressed(photon_get, mock_image_data): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -151,7 +175,7 @@ def test_get_successful_no_auth_key_not_compressed(mock_image_data): @pook.on -def test_get_successful_no_auth_key_full_size(mock_image_data): +def test_get_successful_no_auth_key_full_size(photon_get, mock_image_data): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -174,7 +198,9 @@ def test_get_successful_no_auth_key_full_size(mock_image_data): @pook.on -def test_get_successful_no_auth_key_full_size_not_compressed(mock_image_data): +def test_get_successful_no_auth_key_full_size_not_compressed( + photon_get, mock_image_data +): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .header("User-Agent", UA_HEADER) @@ -195,7 +221,7 @@ def test_get_successful_no_auth_key_full_size_not_compressed(mock_image_data): @pook.on -def test_get_successful_no_auth_key_png_only(mock_image_data): +def test_get_successful_no_auth_key_png_only(photon_get, mock_image_data): mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -219,7 +245,7 @@ def test_get_successful_no_auth_key_png_only(mock_image_data): @pook.on -def test_get_successful_forward_query_params(mock_image_data): +def test_get_successful_forward_query_params(photon_get, mock_image_data): params = urlencode({"hello": "world", 1: 2, "beep": "boop"}) mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) @@ -249,18 +275,18 @@ def test_get_successful_forward_query_params(mock_image_data): @pytest.fixture -def setup_requests_get_exception(monkeypatch): +def setup_request_exception(monkeypatch): def do(exc): - def raise_exc(*args, **kwargs): + async def raise_exc(*args, **kwargs): raise exc - monkeypatch.setattr("requests.get", raise_exc) + monkeypatch.setattr(aiohttp.ClientSession, "get", raise_exc) yield do @pook.on -def test_get_successful_records_response_code(mock_image_data, redis): +def test_get_successful_records_response_code(photon_get, mock_image_data, redis): ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) .params( @@ -317,15 +343,16 @@ def test_get_successful_records_response_code(mock_image_data, redis): ) @alert_count_params def test_get_exception_handles_error( + photon_get, exc, exc_name, count_start, should_alert, capture_exception, - setup_requests_get_exception, + setup_request_exception, redis, ): - setup_requests_get_exception(exc) + setup_request_exception(exc) month = get_monthly_timestamp() key = f"thumbnail_error:{exc_name}:subdomain.example.com:{month}" redis.set(key, count_start) @@ -353,19 +380,20 @@ def test_get_exception_handles_error( ], ) def test_get_http_exception_handles_error( + photon_get, status_code, text, count_start, should_alert, capture_exception, - setup_requests_get_exception, + setup_request_exception, redis, ): mock_response = MagicMock(spec=requests.Response) mock_response.status_code = status_code mock_response.text = text exc = requests.HTTPError(response=mock_response) - setup_requests_get_exception(exc) + setup_request_exception(exc) month = get_monthly_timestamp() key = f"thumbnail_error:requests.exceptions.HTTPError:subdomain.example.com:{month}" @@ -392,7 +420,9 @@ def test_get_http_exception_handles_error( @pook.on -def test_get_successful_https_image_url_sends_ssl_parameter(mock_image_data): +def test_get_successful_https_image_url_sends_ssl_parameter( + photon_get, mock_image_data +): https_url = TEST_IMAGE_URL.replace("http://", "https://") mock_get: pook.Mock = ( pook.get(PHOTON_URL_FOR_TEST_IMAGE) @@ -420,7 +450,7 @@ def test_get_successful_https_image_url_sends_ssl_parameter(mock_image_data): @pook.on -def test_get_unsuccessful_request_raises_custom_exception(): +def test_get_unsuccessful_request_raises_custom_exception(photon_get): mock_get: pook.Mock = pook.get(PHOTON_URL_FOR_TEST_IMAGE).reply(404).mock with pytest.raises( @@ -452,7 +482,7 @@ def test__get_extension_from_url(image_url, expected_ext): @pytest.mark.django_db @pytest.mark.parametrize("image_type", ["apng", "tiff", "bmp"]) -def test_photon_get_raises_by_not_allowed_types(image_type): +def test_photon_get_raises_by_not_allowed_types(photon_get, image_type): image_url = TEST_IMAGE_URL.replace(".jpg", f".{image_type}") image = ImageFactory.create(url=image_url) media_info = MediaInfo( @@ -473,7 +503,9 @@ def test_photon_get_raises_by_not_allowed_types(image_type): ({"Content-Type": "unknown"}, b"unknown"), ], ) -def test_photon_get_saves_image_type_to_cache(redis, headers, expected_cache_val): +def test_photon_get_saves_image_type_to_cache( + photon_get, redis, headers, expected_cache_val +): image_url = TEST_IMAGE_URL.replace(".jpg", "") image = ImageFactory.create(url=image_url) media_info = MediaInfo(