Skip to content

Commit

Permalink
Fix TTL extraction [RHELDST-20510]
Browse files Browse the repository at this point in the history
Previously the TTL extraction was implemented in such way that
it could potentially extract the wrong value, if the URL path contained
a component that also matched the regex.
This commit changes the re.match() to re.search(), which ensures it
will find the first occurrence of the match.
  • Loading branch information
drepelov committed Oct 12, 2023
1 parent 84a8967 commit 3580fc5
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 3 deletions.
4 changes: 2 additions & 2 deletions pubtools/_pulp/cdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class CdnClient(object):
_EXPONENT = float(os.environ.get("CDN_RETRY_EXPONENT", "3.0"))
_MAX_SLEEP = float(os.environ.get("CDN_RETRY_MAX_SLEEP", "120.0"))

TTL_REGEX = re.compile(r".*/(\d+[smhd])/.*")
TTL_REGEX = re.compile(r"/(\d+[smhd])/")
CACHE_KEY_HEADER = HeaderPair("akamai-x-get-cache-key", "X-Cache-Key")

def __init__(self, url, max_retry_sleep=_MAX_SLEEP, **kwargs):
Expand Down Expand Up @@ -104,7 +104,7 @@ def _get_ttl(self, path):
out = self._get_headers_for_path(path, headers)

def _parse_ttl(value):
parsed = re.match(
parsed = re.search(
self.TTL_REGEX, value.get(self.CACHE_KEY_HEADER.response) or ""
)
return parsed.group(1) if parsed else None
Expand Down
8 changes: 7 additions & 1 deletion tests/cdn/test_cdn_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,14 @@ def test_format_arl_template(requests_mock, caplog):
("https://cdn.example.com/content/foo/test-path-2/other-file.xml", "30m"),
]

# ARLs are generated from the template using the {ttl} placeholder, which is replaced with
# the real TTL value. The real TTL value is extracted from the cache key header of the real
# request for the given path using '/(\d+[smhd])/' regex.
# The /1h/foo in the mocked header here is to test that if the path contains a component
# that also matches the TTL regex ('/1h/'), it will still find the correct value
# ('/10h/' or '/30m/').
for url, ttl in url_ttl:
headers = {"X-Cache-Key": f"/fake/cache-key/{ttl}/something"}
headers = {"X-Cache-Key": f"/fake/cache-key/{ttl}/something/1h/foo"}
requests_mock.register_uri("HEAD", url, headers=headers)

fts = []
Expand Down

0 comments on commit 3580fc5

Please sign in to comment.