From 9983c37313ff30c37daa711ce977c553eca9d7ee Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Thu, 19 Dec 2024 00:14:16 +0100 Subject: [PATCH 1/5] Fix lru cache copying Walk the circularly linked list instead of using (deep-)copy on its attributes --- sentry_sdk/_lru_cache.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 825c773529..10ed11231c 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -92,11 +92,24 @@ def __init__(self, max_size): self.hits = self.misses = 0 def __copy__(self): - cache = LRUCache(self.max_size) - cache.full = self.full - cache.cache = copy(self.cache) - cache.root = deepcopy(self.root) - return cache + # walk around the circle and fill the new root / cache + cache = {} + node_old = root_old = self.root + node_new = root_new = [None] * 4 + while (node_old := node_old[NEXT]) is not root_old: + _, _, key, val = node_old + cache[node_old[KEY]] = node_new[NEXT] = [node_new, None, key, val] + node_new = node_new[NEXT] + + # close the circle + node_new[NEXT] = root_new + root_new[PREV] = node_new + + lru_cache = LRUCache(self.max_size) + lru_cache.full = self.full + lru_cache.cache = cache + lru_cache.root = root_new + return lru_cache def set(self, key, value): link = self.cache.get(key, SENTINEL) From f956a20184adbd3d51eea4dc692a885d7217ded8 Mon Sep 17 00:00:00 2001 From: ffelixg <142172984+ffelixg@users.noreply.github.com> Date: Thu, 19 Dec 2024 00:35:27 +0100 Subject: [PATCH 2/5] Fix tox complaints --- sentry_sdk/_lru_cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 10ed11231c..0eaed8f57c 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -62,7 +62,7 @@ """ -from copy import copy, deepcopy +from typing import cast, Any SENTINEL = object() @@ -95,7 +95,7 @@ def __copy__(self): # walk around the circle and fill the new root / cache cache = {} node_old = root_old = self.root - node_new = root_new = [None] * 4 + node_new = root_new = [cast(Any, None)] * 4 while (node_old := node_old[NEXT]) is not root_old: _, _, key, val = node_old cache[node_old[KEY]] = node_new[NEXT] = [node_new, None, key, val] From c559c8ad2338628572cd906b9ac772c69f3c985d Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 19 Dec 2024 13:25:23 +0100 Subject: [PATCH 3/5] Some cleanup and some tests --- sentry_sdk/_lru_cache.py | 10 ++-------- tests/test_lru_cache.py | 37 ++++++++++++++++++++++++++++++++++++- tests/test_scope.py | 22 ++++++++++++++++++++++ 3 files changed, 60 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 0eaed8f57c..381c8a746d 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -181,14 +181,8 @@ def get_all(self): nodes = [] node = self.root[NEXT] - # To ensure the loop always terminates we iterate to the maximum - # size of the LRU cache. - for _ in range(self.max_size): - # The cache may not be full. We exit early if we've wrapped - # around to the head. - if node is self.root: - break + while node is not self.root: nodes.append((node[KEY], node[VALUE])) node = node[NEXT] - return nodes + return nodes \ No newline at end of file diff --git a/tests/test_lru_cache.py b/tests/test_lru_cache.py index cab9bbc7eb..1a54ed83d3 100644 --- a/tests/test_lru_cache.py +++ b/tests/test_lru_cache.py @@ -1,5 +1,5 @@ import pytest -from copy import copy +from copy import copy, deepcopy from sentry_sdk._lru_cache import LRUCache @@ -76,3 +76,38 @@ def test_cache_copy(): cache.get(1) assert copied.get_all() == [(1, 1), (2, 2), (3, 3)] assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] + + +def test_cache_deepcopy(): + cache = LRUCache(3) + cache.set(0, 0) + cache.set(1, 1) + + copied = deepcopy(cache) + cache.set(2, 2) + cache.set(3, 3) + assert copied.get_all() == [(0, 0), (1, 1)] + assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] + + copied = deepcopy(cache) + cache.get(1) + assert copied.get_all() == [(1, 1), (2, 2), (3, 3)] + assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] + + +def test_cache_pollution(): + cache1 = LRUCache(max_size=2) + cache1.set(1, True) + cache2 = copy(cache1) + cache2.set(1, False) + assert cache1.get(1) is True + assert cache2.get(1) is False + + +def test_cache_pollution_deepcopy(): + cache1 = LRUCache(max_size=2) + cache1.set(1, True) + cache2 = deepcopy(cache1) + cache2.set(1, False) + assert cache1.get(1) is True + assert cache2.get(1) is False diff --git a/tests/test_scope.py b/tests/test_scope.py index a03eb07a99..9b16dc4344 100644 --- a/tests/test_scope.py +++ b/tests/test_scope.py @@ -43,6 +43,28 @@ def test_all_slots_copied(): assert getattr(scope_copy, attr) == getattr(scope, attr) +def test_scope_flags_copy(): + # Assert forking creates a deepcopy of the flag buffer. The new + # scope is free to mutate without consequence to the old scope. The + # old scope is free to mutate without consequence to the new scope. + old_scope = Scope() + old_scope.flags.set("a", True) + + new_scope = old_scope.fork() + new_scope.flags.set("a", False) + old_scope.flags.set("b", True) + new_scope.flags.set("c", True) + + assert old_scope.flags.get() == [ + {"flag": "a", "result": True}, + {"flag": "b", "result": True}, + ] + assert new_scope.flags.get() == [ + {"flag": "a", "result": False}, + {"flag": "c", "result": True}, + ] + + def test_merging(sentry_init, capture_events): sentry_init() From fe3f23ebedf22af0475efffb50697e69ace326a6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 20 Dec 2024 12:02:14 +0100 Subject: [PATCH 4/5] Simpler LRU cache implementation. --- sentry_sdk/_lru_cache.py | 199 ++++++--------------------------------- 1 file changed, 28 insertions(+), 171 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 381c8a746d..1fe76c6877 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -1,188 +1,45 @@ -""" -A fork of Python 3.6's stdlib lru_cache (found in Python's 'cpython/Lib/functools.py') -adapted into a data structure for single threaded uses. - -https://github.com/python/cpython/blob/v3.6.12/Lib/functools.py - - -Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; - -All Rights Reserved - - -PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 --------------------------------------------- - -1. This LICENSE AGREEMENT is between the Python Software Foundation -("PSF"), and the Individual or Organization ("Licensee") accessing and -otherwise using this software ("Python") in source or binary form and -its associated documentation. - -2. Subject to the terms and conditions of this License Agreement, PSF hereby -grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce, -analyze, test, perform and/or display publicly, prepare derivative works, -distribute, and otherwise use Python alone or in any derivative version, -provided, however, that PSF's License Agreement and PSF's notice of copyright, -i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, -2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software Foundation; -All Rights Reserved" are retained in Python alone or in any derivative version -prepared by Licensee. - -3. In the event Licensee prepares a derivative work that is based on -or incorporates Python or any part thereof, and wants to make -the derivative work available to others as provided herein, then -Licensee hereby agrees to include in any such work a brief summary of -the changes made to Python. - -4. PSF is making Python available to Licensee on an "AS IS" -basis. PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR -IMPLIED. BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND -DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS -FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT -INFRINGE ANY THIRD PARTY RIGHTS. - -5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON -FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS -A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON, -OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF. - -6. This License Agreement will automatically terminate upon a material -breach of its terms and conditions. - -7. Nothing in this License Agreement shall be deemed to create any -relationship of agency, partnership, or joint venture between PSF and -Licensee. This License Agreement does not grant permission to use PSF -trademarks or trade name in a trademark sense to endorse or promote -products or services of Licensee, or any third party. - -8. By copying, installing or otherwise using Python, Licensee -agrees to be bound by the terms and conditions of this License -Agreement. - -""" - -from typing import cast, Any - -SENTINEL = object() - - -# aliases to the entries in a node -PREV = 0 -NEXT = 1 -KEY = 2 -VALUE = 3 +_SENTINEL = object() class LRUCache: - def __init__(self, max_size): - assert max_size > 0 - + def __init__(self, max_size: int): + if max_size <= 0: + raise AssertionError(f"invalid max_size: {max_size}") self.max_size = max_size - self.full = False - - self.cache = {} - - # root of the circularly linked list to keep track of - # the least recently used key - self.root = [] # type: ignore - # the node looks like [PREV, NEXT, KEY, VALUE] - self.root[:] = [self.root, self.root, None, None] - + self._data = {} self.hits = self.misses = 0 + self.full = False def __copy__(self): - # walk around the circle and fill the new root / cache - cache = {} - node_old = root_old = self.root - node_new = root_new = [cast(Any, None)] * 4 - while (node_old := node_old[NEXT]) is not root_old: - _, _, key, val = node_old - cache[node_old[KEY]] = node_new[NEXT] = [node_new, None, key, val] - node_new = node_new[NEXT] - - # close the circle - node_new[NEXT] = root_new - root_new[PREV] = node_new - - lru_cache = LRUCache(self.max_size) - lru_cache.full = self.full - lru_cache.cache = cache - lru_cache.root = root_new - return lru_cache + new = LRUCache(max_size=self.max_size) + new.hits = self.hits + new.misses = self.misses + new.full = self.full + new._data = self._data.copy() + return new def set(self, key, value): - link = self.cache.get(key, SENTINEL) - - if link is not SENTINEL: - # have to move the node to the front of the linked list - link_prev, link_next, _key, _value = link - - # first remove the node from the lsnked list - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - - # insert the node between the root and the last - last = self.root[PREV] - last[NEXT] = self.root[PREV] = link - link[PREV] = last - link[NEXT] = self.root - - # update the value - link[VALUE] = value - + current = self._data.pop(key, _SENTINEL) + if current is not _SENTINEL: + self._data[key] = value elif self.full: - # reuse the root node, so update its key/value - old_root = self.root - old_root[KEY] = key - old_root[VALUE] = value - - self.root = old_root[NEXT] - old_key = self.root[KEY] - - self.root[KEY] = self.root[VALUE] = None - - del self.cache[old_key] - - self.cache[key] = old_root - + self._data.pop(next(iter(self._data))) + self._data[key] = value else: - # insert new node after last - last = self.root[PREV] - link = [last, self.root, key, value] - last[NEXT] = self.root[PREV] = self.cache[key] = link - self.full = len(self.cache) >= self.max_size + self._data[key] = value + self.full = len(self._data) >= self.max_size def get(self, key, default=None): - link = self.cache.get(key, SENTINEL) - - if link is SENTINEL: + try: + ret = self._data.pop(key) + except KeyError: self.misses += 1 - return default - - # have to move the node to the front of the linked list - link_prev, link_next, _key, _value = link - - # first remove the node from the lsnked list - link_prev[NEXT] = link_next - link_next[PREV] = link_prev - - # insert the node between the root and the last - last = self.root[PREV] - last[NEXT] = self.root[PREV] = link - link[PREV] = last - link[NEXT] = self.root - - self.hits += 1 + ret = default + else: + self.hits += 1 + self._data[key] = ret - return link[VALUE] + return ret def get_all(self): - nodes = [] - node = self.root[NEXT] - - while node is not self.root: - nodes.append((node[KEY], node[VALUE])) - node = node[NEXT] - - return nodes \ No newline at end of file + return list(self._data.items()) From 13c9c85ba072cc10c75eb2cd7f19eed6dd95401c Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Fri, 20 Dec 2024 12:17:04 +0100 Subject: [PATCH 5/5] Typing --- sentry_sdk/_lru_cache.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index 1fe76c6877..09eae27df2 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -1,16 +1,24 @@ +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + + _SENTINEL = object() class LRUCache: - def __init__(self, max_size: int): + def __init__(self, max_size): + # type: (int) -> None if max_size <= 0: raise AssertionError(f"invalid max_size: {max_size}") self.max_size = max_size - self._data = {} + self._data = {} # type: dict[Any, Any] self.hits = self.misses = 0 self.full = False def __copy__(self): + # type: () -> LRUCache new = LRUCache(max_size=self.max_size) new.hits = self.hits new.misses = self.misses @@ -19,6 +27,7 @@ def __copy__(self): return new def set(self, key, value): + # type: (Any, Any) -> None current = self._data.pop(key, _SENTINEL) if current is not _SENTINEL: self._data[key] = value @@ -30,6 +39,7 @@ def set(self, key, value): self.full = len(self._data) >= self.max_size def get(self, key, default=None): + # type: (Any, Any) -> Any try: ret = self._data.pop(key) except KeyError: @@ -42,4 +52,5 @@ def get(self, key, default=None): return ret def get_all(self): + # type: () -> list[tuple[Any, Any]] return list(self._data.items())