From 9e432d6cd877983c02b423f9c11985336f3fef18 Mon Sep 17 00:00:00 2001 From: Rohan McGovern Date: Mon, 29 Jul 2024 10:53:44 +1000 Subject: [PATCH] Support compressed configuration [RHELDST-25461] Config has formerly always been stored as a String attribute on DynamoDB items. Let's support also loading it as a gzip-compressed Binary attribute. The motivation is to allow more room for the config to grow. The max DynamoDB item size is 400KB, and a realistic string-based config has already reached 70KB (about 18% of the limit). As the config is only ever expected to grow over time, we may eventually hit that limit without warning; this discourages us from adding more fields into the config. The same config compressed uses about ~5KB (about 1% of the limit), which seems small enough that we'll never have to worry about hitting the limit and we can freely add new fields into the config. This change should be fully deployed to all exodus-cdn environments before deploying the corresponding change to exodus-gw. --- exodus_lambda/functions/origin_request.py | 11 ++++++++++- tests/functions/test_origin_request.py | 16 ++++++++++++++-- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/exodus_lambda/functions/origin_request.py b/exodus_lambda/functions/origin_request.py index 28f8c255..daa75801 100755 --- a/exodus_lambda/functions/origin_request.py +++ b/exodus_lambda/functions/origin_request.py @@ -1,5 +1,6 @@ import binascii import functools +import gzip import json import os import time @@ -65,7 +66,15 @@ def definitions(self): ) if query_result["Items"]: item = query_result["Items"][0] - out = json.loads(item["config"]["S"]) + if item_encoded := item["config"].get("B"): + # new-style: config is compressed and stored as bytes + item_bytes = b64decode(item_encoded) + item_json = gzip.decompress(item_bytes).decode() + else: + # old-style, config was stored as JSON string. + # Consider deleting this code path in 2025 + item_json = item["config"]["S"] + out = json.loads(item_json) else: # Provide dict with expected keys when no config is found. out = { diff --git a/tests/functions/test_origin_request.py b/tests/functions/test_origin_request.py index 0f2de2ee..02ad523e 100644 --- a/tests/functions/test_origin_request.py +++ b/tests/functions/test_origin_request.py @@ -1,5 +1,7 @@ +import gzip import json import logging +from base64 import b64encode from urllib.parse import unquote, urlencode import mock @@ -342,15 +344,25 @@ def test_origin_request_invalid_item( ) +@pytest.mark.parametrize("binary_config", (True, False)) @mock.patch("boto3.client") -def test_origin_request_definitions(mocked_boto3_client): +def test_origin_request_definitions(mocked_boto3_client, binary_config: bool): mocked_defs = mock_definitions() + json_defs = json.dumps(mocked_defs) + + if binary_config: + # Config in the style exodus-gw writes from late 2024 onwards + config = {"B": b64encode(gzip.compress(json_defs.encode())).decode()} + else: + # Older-style config + config = {"S": json_defs} + mocked_boto3_client().query.return_value = { "Items": [ { "from_date": {"S": "2020-02-17T00:00:00.000+00:00"}, "config_id": {"S": "exodus-config"}, - "config": {"S": json.dumps(mocked_defs)}, + "config": config, } ] }