From 50ad148803e372bdaea4815884788c28a4897974 Mon Sep 17 00:00:00 2001 From: Florian Dellekart <60044734+fdellekart@users.noreply.github.com> Date: Thu, 5 Dec 2024 12:57:09 +0100 Subject: [PATCH 01/14] =?UTF-8?q?fix(grpc):=20Return=20proper=20metadata?= =?UTF-8?q?=20object=20instead=20of=20list=20in=E2=80=A6=20(#3205)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(grpc): Return propagate proper metadata object instead of list in client interceptor Fixes #2509 * fix(grpc): Transform metadata into Metadata object in case it's a tuple Up until version 1.65.0 of grpcio, the metadata was not guaranteed to arrive as the type specified in annotations but could be a tuple. To support versions before that we check and transform it here. * docs(grpc): Add comment about workaround --------- Co-authored-by: Anton Pirker Co-authored-by: Daniel Szoke <7881302+szokeasaurusrex@users.noreply.github.com> --- sentry_sdk/integrations/grpc/aio/client.py | 23 ++++++++++------------ 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/sentry_sdk/integrations/grpc/aio/client.py b/sentry_sdk/integrations/grpc/aio/client.py index e8adeba05e..ff3c213176 100644 --- a/sentry_sdk/integrations/grpc/aio/client.py +++ b/sentry_sdk/integrations/grpc/aio/client.py @@ -6,6 +6,7 @@ ClientCallDetails, UnaryUnaryCall, UnaryStreamCall, + Metadata, ) from google.protobuf.message import Message @@ -19,23 +20,19 @@ class ClientInterceptor: def _update_client_call_details_metadata_from_scope( client_call_details: ClientCallDetails, ) -> ClientCallDetails: - metadata = ( - list(client_call_details.metadata) if client_call_details.metadata else [] - ) + if client_call_details.metadata is None: + client_call_details = client_call_details._replace(metadata=Metadata()) + elif not isinstance(client_call_details.metadata, Metadata): + # This is a workaround for a GRPC bug, which was fixed in grpcio v1.60.0 + # See https://github.com/grpc/grpc/issues/34298. + client_call_details = client_call_details._replace( + metadata=Metadata.from_tuple(client_call_details.metadata) + ) for ( key, value, ) in sentry_sdk.get_current_scope().iter_trace_propagation_headers(): - metadata.append((key, value)) - - client_call_details = ClientCallDetails( - method=client_call_details.method, - timeout=client_call_details.timeout, - metadata=metadata, - credentials=client_call_details.credentials, - wait_for_ready=client_call_details.wait_for_ready, - ) - + client_call_details.metadata.add(key, value) return client_call_details From cda51274de6b11c59a496d610907e4656fa99fd7 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Dec 2024 14:29:06 +0100 Subject: [PATCH 02/14] Add missing stack frames (#3673) Add a new `init()` option `add_full_stack` (default `False`), when set to `True` it will add all the missing frames from the beginning of the execution to the stack trace sent to Sentry. Also adds another option `max_stack_frames` (default `100`) to limit the number of frames sent. The limitation is only enforced when `add_full_stack=True` to not change behavior for existing users. Fixes #3646 --- sentry_sdk/consts.py | 5 ++ sentry_sdk/utils.py | 82 +++++++++++++++++++++++-- tests/test_full_stack_frames.py | 103 ++++++++++++++++++++++++++++++++ 3 files changed, 185 insertions(+), 5 deletions(-) create mode 100644 tests/test_full_stack_frames.py diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 488743b579..6750e85f99 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -6,6 +6,9 @@ # up top to prevent circular import due to integration import DEFAULT_MAX_VALUE_LENGTH = 1024 +DEFAULT_MAX_STACK_FRAMES = 100 +DEFAULT_ADD_FULL_STACK = False + # Also needs to be at the top to prevent circular import class EndpointType(Enum): @@ -551,6 +554,8 @@ def __init__( cert_file=None, # type: Optional[str] key_file=None, # type: Optional[str] custom_repr=None, # type: Optional[Callable[..., Optional[str]]] + add_full_stack=DEFAULT_ADD_FULL_STACK, # type: bool + max_stack_frames=DEFAULT_MAX_STACK_FRAMES, # type: Optional[int] ): # type: (...) -> None pass diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 4d07974809..ae6e7538ac 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -26,7 +26,12 @@ import sentry_sdk from sentry_sdk._compat import PY37 -from sentry_sdk.consts import DEFAULT_MAX_VALUE_LENGTH, EndpointType +from sentry_sdk.consts import ( + DEFAULT_ADD_FULL_STACK, + DEFAULT_MAX_STACK_FRAMES, + DEFAULT_MAX_VALUE_LENGTH, + EndpointType, +) from typing import TYPE_CHECKING @@ -737,6 +742,7 @@ def single_exception_from_error_tuple( exception_id=None, # type: Optional[int] parent_id=None, # type: Optional[int] source=None, # type: Optional[str] + full_stack=None, # type: Optional[list[dict[str, Any]]] ): # type: (...) -> Dict[str, Any] """ @@ -804,10 +810,15 @@ def single_exception_from_error_tuple( custom_repr=custom_repr, ) for tb in iter_stacks(tb) - ] + ] # type: List[Dict[str, Any]] if frames: - exception_value["stacktrace"] = {"frames": frames} + if not full_stack: + new_frames = frames + else: + new_frames = merge_stack_frames(frames, full_stack, client_options) + + exception_value["stacktrace"] = {"frames": new_frames} return exception_value @@ -862,6 +873,7 @@ def exceptions_from_error( exception_id=0, # type: int parent_id=0, # type: int source=None, # type: Optional[str] + full_stack=None, # type: Optional[list[dict[str, Any]]] ): # type: (...) -> Tuple[int, List[Dict[str, Any]]] """ @@ -881,6 +893,7 @@ def exceptions_from_error( exception_id=exception_id, parent_id=parent_id, source=source, + full_stack=full_stack, ) exceptions = [parent] @@ -906,6 +919,7 @@ def exceptions_from_error( mechanism=mechanism, exception_id=exception_id, source="__cause__", + full_stack=full_stack, ) exceptions.extend(child_exceptions) @@ -927,6 +941,7 @@ def exceptions_from_error( mechanism=mechanism, exception_id=exception_id, source="__context__", + full_stack=full_stack, ) exceptions.extend(child_exceptions) @@ -943,6 +958,7 @@ def exceptions_from_error( exception_id=exception_id, parent_id=parent_id, source="exceptions[%s]" % idx, + full_stack=full_stack, ) exceptions.extend(child_exceptions) @@ -953,6 +969,7 @@ def exceptions_from_error_tuple( exc_info, # type: ExcInfo client_options=None, # type: Optional[Dict[str, Any]] mechanism=None, # type: Optional[Dict[str, Any]] + full_stack=None, # type: Optional[list[dict[str, Any]]] ): # type: (...) -> List[Dict[str, Any]] exc_type, exc_value, tb = exc_info @@ -970,6 +987,7 @@ def exceptions_from_error_tuple( mechanism=mechanism, exception_id=0, parent_id=0, + full_stack=full_stack, ) else: @@ -977,7 +995,12 @@ def exceptions_from_error_tuple( for exc_type, exc_value, tb in walk_exception_chain(exc_info): exceptions.append( single_exception_from_error_tuple( - exc_type, exc_value, tb, client_options, mechanism + exc_type=exc_type, + exc_value=exc_value, + tb=tb, + client_options=client_options, + mechanism=mechanism, + full_stack=full_stack, ) ) @@ -1096,6 +1119,46 @@ def exc_info_from_error(error): return exc_info +def merge_stack_frames(frames, full_stack, client_options): + # type: (List[Dict[str, Any]], List[Dict[str, Any]], Optional[Dict[str, Any]]) -> List[Dict[str, Any]] + """ + Add the missing frames from full_stack to frames and return the merged list. + """ + frame_ids = { + ( + frame["abs_path"], + frame["context_line"], + frame["lineno"], + frame["function"], + ) + for frame in frames + } + + new_frames = [ + stackframe + for stackframe in full_stack + if ( + stackframe["abs_path"], + stackframe["context_line"], + stackframe["lineno"], + stackframe["function"], + ) + not in frame_ids + ] + new_frames.extend(frames) + + # Limit the number of frames + max_stack_frames = ( + client_options.get("max_stack_frames", DEFAULT_MAX_STACK_FRAMES) + if client_options + else None + ) + if max_stack_frames is not None: + new_frames = new_frames[len(new_frames) - max_stack_frames :] + + return new_frames + + def event_from_exception( exc_info, # type: Union[BaseException, ExcInfo] client_options=None, # type: Optional[Dict[str, Any]] @@ -1104,12 +1167,21 @@ def event_from_exception( # type: (...) -> Tuple[Event, Dict[str, Any]] exc_info = exc_info_from_error(exc_info) hint = event_hint_with_exc_info(exc_info) + + if client_options and client_options.get("add_full_stack", DEFAULT_ADD_FULL_STACK): + full_stack = current_stacktrace( + include_local_variables=client_options["include_local_variables"], + max_value_length=client_options["max_value_length"], + )["frames"] + else: + full_stack = None + return ( { "level": "error", "exception": { "values": exceptions_from_error_tuple( - exc_info, client_options, mechanism + exc_info, client_options, mechanism, full_stack ) }, }, diff --git a/tests/test_full_stack_frames.py b/tests/test_full_stack_frames.py new file mode 100644 index 0000000000..ad0826cd10 --- /dev/null +++ b/tests/test_full_stack_frames.py @@ -0,0 +1,103 @@ +import sentry_sdk + + +def test_full_stack_frames_default(sentry_init, capture_events): + sentry_init() + events = capture_events() + + def foo(): + try: + bar() + except Exception as e: + sentry_sdk.capture_exception(e) + + def bar(): + raise Exception("This is a test exception") + + foo() + + (event,) = events + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + + assert len(frames) == 2 + assert frames[-1]["function"] == "bar" + assert frames[-2]["function"] == "foo" + + +def test_full_stack_frames_enabled(sentry_init, capture_events): + sentry_init( + add_full_stack=True, + ) + events = capture_events() + + def foo(): + try: + bar() + except Exception as e: + sentry_sdk.capture_exception(e) + + def bar(): + raise Exception("This is a test exception") + + foo() + + (event,) = events + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + + assert len(frames) > 2 + assert frames[-1]["function"] == "bar" + assert frames[-2]["function"] == "foo" + assert frames[-3]["function"] == "foo" + assert frames[-4]["function"] == "test_full_stack_frames_enabled" + + +def test_full_stack_frames_enabled_truncated(sentry_init, capture_events): + sentry_init( + add_full_stack=True, + max_stack_frames=3, + ) + events = capture_events() + + def foo(): + try: + bar() + except Exception as e: + sentry_sdk.capture_exception(e) + + def bar(): + raise Exception("This is a test exception") + + foo() + + (event,) = events + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + + assert len(frames) == 3 + assert frames[-1]["function"] == "bar" + assert frames[-2]["function"] == "foo" + assert frames[-3]["function"] == "foo" + + +def test_full_stack_frames_default_no_truncation_happening(sentry_init, capture_events): + sentry_init( + max_stack_frames=1, # this is ignored if add_full_stack=False (which is the default) + ) + events = capture_events() + + def foo(): + try: + bar() + except Exception as e: + sentry_sdk.capture_exception(e) + + def bar(): + raise Exception("This is a test exception") + + foo() + + (event,) = events + frames = event["exception"]["values"][0]["stacktrace"]["frames"] + + assert len(frames) == 2 + assert frames[-1]["function"] == "bar" + assert frames[-2]["function"] == "foo" From 5891717b1470f0aa29193a9eb6cf0d899f8ba776 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Dec 2024 14:29:42 +0100 Subject: [PATCH 03/14] Script for checking if our instrumented libs are python 3.13 compatible (#3425) A simple script that parses all libraries we test against from our `tox.ini` and then checks PyPI if this library already supports the newest Python version (currently 3.13) --- scripts/ready_yet/main.py | 124 +++++++++++++++++++++++++++++ scripts/ready_yet/requirements.txt | 3 + scripts/ready_yet/run.sh | 16 ++++ 3 files changed, 143 insertions(+) create mode 100644 scripts/ready_yet/main.py create mode 100644 scripts/ready_yet/requirements.txt create mode 100755 scripts/ready_yet/run.sh diff --git a/scripts/ready_yet/main.py b/scripts/ready_yet/main.py new file mode 100644 index 0000000000..bba97d0c98 --- /dev/null +++ b/scripts/ready_yet/main.py @@ -0,0 +1,124 @@ +import time +import re +import sys + +import requests + +from collections import defaultdict + +from pathlib import Path + +from tox.config.cli.parse import get_options +from tox.session.state import State +from tox.config.sets import CoreConfigSet +from tox.config.source.tox_ini import ToxIni + +PYTHON_VERSION = "3.13" + +MATCH_LIB_SENTRY_REGEX = r"py[\d\.]*-(.*)-.*" + +PYPI_PROJECT_URL = "https://pypi.python.org/pypi/{project}/json" +PYPI_VERSION_URL = "https://pypi.python.org/pypi/{project}/{version}/json" + + +def get_tox_envs(tox_ini_path: Path) -> list: + tox_ini = ToxIni(tox_ini_path) + conf = State(get_options(), []).conf + tox_section = next(tox_ini.sections()) + core_config_set = CoreConfigSet( + conf, tox_section, tox_ini_path.parent, tox_ini_path + ) + ( + core_config_set.loaders.extend( + tox_ini.get_loaders( + tox_section, + base=[], + override_map=defaultdict(list, {}), + conf=core_config_set, + ) + ) + ) + return core_config_set.load("env_list") + + +def get_libs(tox_ini: Path, regex: str) -> set: + libs = set() + for env in get_tox_envs(tox_ini): + match = re.match(regex, env) + if match: + libs.add(match.group(1)) + + return sorted(libs) + + +def main(): + """ + Check if libraries in our tox.ini are ready for Python version defined in `PYTHON_VERSION`. + """ + print(f"Checking libs from tox.ini for Python {PYTHON_VERSION} compatibility:") + + ready = set() + not_ready = set() + not_found = set() + + tox_ini = Path(__file__).parent.parent.parent.joinpath("tox.ini") + + libs = get_libs(tox_ini, MATCH_LIB_SENTRY_REGEX) + + for lib in libs: + print(".", end="") + sys.stdout.flush() + + # Get latest version of lib + url = PYPI_PROJECT_URL.format(project=lib) + pypi_data = requests.get(url) + + if pypi_data.status_code != 200: + not_found.add(lib) + continue + + latest_version = pypi_data.json()["info"]["version"] + + # Get supported Python version of latest version of lib + url = PYPI_PROJECT_URL.format(project=lib, version=latest_version) + pypi_data = requests.get(url) + + if pypi_data.status_code != 200: + continue + + classifiers = pypi_data.json()["info"]["classifiers"] + + if f"Programming Language :: Python :: {PYTHON_VERSION}" in classifiers: + ready.add(lib) + else: + not_ready.add(lib) + + # cut pypi some slack + time.sleep(0.1) + + # Print report + print("\n") + print(f"\nReady for Python {PYTHON_VERSION}:") + if len(ready) == 0: + print("- None ") + + for x in sorted(ready): + print(f"- {x}") + + print(f"\nNOT ready for Python {PYTHON_VERSION}:") + if len(not_ready) == 0: + print("- None ") + + for x in sorted(not_ready): + print(f"- {x}") + + print("\nNot found on PyPI:") + if len(not_found) == 0: + print("- None ") + + for x in sorted(not_found): + print(f"- {x}") + + +if __name__ == "__main__": + main() diff --git a/scripts/ready_yet/requirements.txt b/scripts/ready_yet/requirements.txt new file mode 100644 index 0000000000..e0590b89c6 --- /dev/null +++ b/scripts/ready_yet/requirements.txt @@ -0,0 +1,3 @@ +requests +pathlib +tox \ No newline at end of file diff --git a/scripts/ready_yet/run.sh b/scripts/ready_yet/run.sh new file mode 100755 index 0000000000..f32bd7bdda --- /dev/null +++ b/scripts/ready_yet/run.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +# exit on first error +set -xe + +reset + +# create and activate virtual environment +python -m venv .venv +source .venv/bin/activate + +# Install (or update) requirements +python -m pip install -r requirements.txt + +# Run the script +python main.py \ No newline at end of file From 31fdcfaee7e871802f8ffef72847884e28472969 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 5 Dec 2024 13:58:22 +0000 Subject: [PATCH 04/14] fix(django): Fix errors when instrumenting Django cache (#3855) I was testing Spotlight with Sentry and realized things started to get slow and crashy. It looks like sometimes `args` is just an empty array on cache's `_instruments_call` causing lots of exceptions being thrown. This patch fixes that with explicit length checks and also adds a note for the missing instrumentation for `get_or_set` method. This might be related to #2122 and #3300. --- sentry_sdk/integrations/django/caching.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/django/caching.py b/sentry_sdk/integrations/django/caching.py index 39d1679183..7985611761 100644 --- a/sentry_sdk/integrations/django/caching.py +++ b/sentry_sdk/integrations/django/caching.py @@ -75,11 +75,12 @@ def _instrument_call( span.set_data(SPANDATA.CACHE_HIT, True) else: span.set_data(SPANDATA.CACHE_HIT, False) - else: - try: + else: # TODO: We don't handle `get_or_set` which we should + arg_count = len(args) + if arg_count >= 2: # 'set' command item_size = len(str(args[1])) - except IndexError: + elif arg_count == 1: # 'set_many' command item_size = len(str(args[0])) From 5a097705411842c48358b5a797fd92723a853019 Mon Sep 17 00:00:00 2001 From: Burak Yigit Kaya Date: Thu, 5 Dec 2024 14:06:41 +0000 Subject: [PATCH 05/14] fix(spotlight): Don't give up on Spotlight on 3 errors (#3856) Current Spotlight error handling logic gives up sending events to Spotlight after 3 errors. This doesn't make much sense because: 1. Since there is no back off or retry mechanism, even a very brief server hiccup or restart turns off Spotlight reporting 2. Once this shut off kicks in, there is no way to turn it back on except for a server restart I added a note for future work for retries and some short buffer. --- sentry_sdk/spotlight.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sentry_sdk/spotlight.py b/sentry_sdk/spotlight.py index 806ba5a09e..a94c691723 100644 --- a/sentry_sdk/spotlight.py +++ b/sentry_sdk/spotlight.py @@ -42,11 +42,6 @@ def __init__(self, url): def capture_envelope(self, envelope): # type: (Envelope) -> None - if self.tries > 3: - sentry_logger.warning( - "Too many errors sending to Spotlight, stop sending events there." - ) - return body = io.BytesIO() envelope.serialize_into(body) try: @@ -60,7 +55,7 @@ def capture_envelope(self, envelope): ) req.close() except Exception as e: - self.tries += 1 + # TODO: Implement buffering and retrying with exponential backoff sentry_logger.warning(str(e)) From 7a6d460bd14433c3d3f03efa6a4b3f924105adc6 Mon Sep 17 00:00:00 2001 From: Neel Shah Date: Thu, 5 Dec 2024 15:49:17 +0100 Subject: [PATCH 06/14] Copy scope.client reference as well (#3857) --- sentry_sdk/scope.py | 1 + tests/test_scope.py | 6 +----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py index 34ccc7f940..bb45143c48 100644 --- a/sentry_sdk/scope.py +++ b/sentry_sdk/scope.py @@ -225,6 +225,7 @@ def __copy__(self): rv = object.__new__(self.__class__) # type: Scope rv._type = self._type + rv.client = self.client rv._level = self._level rv._name = self._name rv._fingerprint = self._fingerprint diff --git a/tests/test_scope.py b/tests/test_scope.py index 374a354446..a03eb07a99 100644 --- a/tests/test_scope.py +++ b/tests/test_scope.py @@ -19,10 +19,6 @@ ) -SLOTS_NOT_COPIED = {"client"} -"""__slots__ that are not copied when copying a Scope object.""" - - def test_copying(): s1 = Scope() s1.fingerprint = {} @@ -43,7 +39,7 @@ def test_all_slots_copied(): scope_copy = copy.copy(scope) # Check all attributes are copied - for attr in set(Scope.__slots__) - SLOTS_NOT_COPIED: + for attr in set(Scope.__slots__): assert getattr(scope_copy, attr) == getattr(scope, attr) From c591b64d5075628d5fa5351ed4307182981e9bd5 Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Thu, 5 Dec 2024 14:51:42 +0000 Subject: [PATCH 07/14] release: 2.19.1 --- CHANGELOG.md | 20 ++++++++++++++++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbb35eb1eb..d1d0a78ce8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,25 @@ # Changelog +## 2.19.1 + +### Various fixes & improvements + +- Copy scope.client reference as well (#3857) by @sl0thentr0py +- fix(spotlight): Don't give up on Spotlight on 3 errors (#3856) by @BYK +- fix(django): Fix errors when instrumenting Django cache (#3855) by @BYK +- Script for checking if our instrumented libs are python 3.13 compatible (#3425) by @antonpirker +- Add missing stack frames (#3673) by @antonpirker +- fix(grpc): Return proper metadata object instead of list in… (#3205) by @fdellekart +- Improve ray tests (#3846) by @antonpirker +- Test with celery 5.5.0rc3 (#3842) by @sentrivana +- Revert "Fix spans for streaming responses in WSGI based frameworks (#3798)" (#3836) by @antonpirker +- Fix asyncio testing setup (#3832) by @sl0thentr0py +- build(deps): bump codecov/codecov-action from 5.0.2 to 5.0.7 (#3821) by @dependabot +- Fix CI (#3834) by @sentrivana +- ref(flags): rename launch darkly hook to match JS SDK (#3743) by @aliu39 +- Use new clickhouse gh action (#3826) by @antonpirker +- Fix spans for streaming responses in WSGI based frameworks (#3798) by @antonpirker + ## 2.19.0 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 55d5295381..4f5c210322 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "2.19.0" +release = "2.19.1" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index 6750e85f99..f338543dee 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -581,4 +581,4 @@ def _get_default_options(): del _get_default_options -VERSION = "2.19.0" +VERSION = "2.19.1" diff --git a/setup.py b/setup.py index fda3daa229..7782d57a36 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="2.19.0", + version="2.19.1", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From 231a6a1d5eb5026415542ef2c2355e468bc69f66 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Thu, 5 Dec 2024 15:53:50 +0100 Subject: [PATCH 08/14] Update CHANGELOG.md --- CHANGELOG.md | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d1d0a78ce8..eb45f28c7e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,21 +4,19 @@ ### Various fixes & improvements -- Copy scope.client reference as well (#3857) by @sl0thentr0py -- fix(spotlight): Don't give up on Spotlight on 3 errors (#3856) by @BYK -- fix(django): Fix errors when instrumenting Django cache (#3855) by @BYK -- Script for checking if our instrumented libs are python 3.13 compatible (#3425) by @antonpirker +- Fix errors when instrumenting Django cache (#3855) by @BYK +- Copy `scope.client` reference as well (#3857) by @sl0thentr0py +- Don't give up on Spotlight on 3 errors (#3856) by @BYK - Add missing stack frames (#3673) by @antonpirker -- fix(grpc): Return proper metadata object instead of list in… (#3205) by @fdellekart -- Improve ray tests (#3846) by @antonpirker -- Test with celery 5.5.0rc3 (#3842) by @sentrivana -- Revert "Fix spans for streaming responses in WSGI based frameworks (#3798)" (#3836) by @antonpirker +- Fix wrong metadata type in async gRPC interceptor (#3205) by @fdellekart +- Rename launch darkly hook to match JS SDK (#3743) by @aliu39 +- Script for checking if our instrumented libs are Python 3.13 compatible (#3425) by @antonpirker +- Improve Ray tests (#3846) by @antonpirker +- Test with Celery `5.5.0rc3` (#3842) by @sentrivana - Fix asyncio testing setup (#3832) by @sl0thentr0py -- build(deps): bump codecov/codecov-action from 5.0.2 to 5.0.7 (#3821) by @dependabot +- Bump `codecov/codecov-action` from `5.0.2` to `5.0.7` (#3821) by @dependabot - Fix CI (#3834) by @sentrivana -- ref(flags): rename launch darkly hook to match JS SDK (#3743) by @aliu39 -- Use new clickhouse gh action (#3826) by @antonpirker -- Fix spans for streaming responses in WSGI based frameworks (#3798) by @antonpirker +- Use new ClickHouse GH action (#3826) by @antonpirker ## 2.19.0 From 7ab7fe67496fce2396edcb5bc8a64645601a1218 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 5 Dec 2024 16:16:49 +0100 Subject: [PATCH 09/14] Cleanup chalice test environment (#3858) --- tox.ini | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tox.ini b/tox.ini index 8c6f9eda86..d3bd83cb03 100644 --- a/tox.ini +++ b/tox.ini @@ -391,11 +391,9 @@ deps = {py3.7}-celery: importlib-metadata<5.0 # Chalice + chalice: pytest-chalice==0.0.5 chalice-v1.16: chalice~=1.16.0 chalice-latest: chalice - chalice: pytest-chalice==0.0.5 - - {py3.7,py3.8}-chalice: botocore~=1.31 # Clickhouse Driver clickhouse_driver-v0.2.0: clickhouse_driver~=0.2.0 From 7c70b9c1455917c3e35416e53712ebf25b7d6236 Mon Sep 17 00:00:00 2001 From: Neel Shah Date: Thu, 5 Dec 2024 17:35:07 +0100 Subject: [PATCH 10/14] Fix leftover scope test --- tests/test_scope.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_scope.py b/tests/test_scope.py index 858295536c..e7194e6caf 100644 --- a/tests/test_scope.py +++ b/tests/test_scope.py @@ -21,6 +21,7 @@ use_isolation_scope, setup_scope_context_management, ) +from tests.conftest import ApproxDict @pytest.fixture(autouse=True) @@ -800,8 +801,8 @@ def test_nested_scopes_with_tags(sentry_init, capture_envelopes): transaction = envelope.items[0].get_transaction_event() assert transaction["tags"] == {"isolation_scope1": 1, "current_scope2": 1, "trx": 1} - assert transaction["spans"][0]["tags"] == {"a": 1} - assert transaction["spans"][1]["tags"] == {"b": 1} + assert transaction["spans"][0]["tags"] == ApproxDict({"a": 1}) + assert transaction["spans"][1]["tags"] == ApproxDict({"b": 1}) def test_should_send_default_pii_true(sentry_init): From bcadb616b09793e852f96b987956dff7bbb0e122 Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Thu, 5 Dec 2024 17:36:34 +0100 Subject: [PATCH 11/14] Sampling context improvements (#3847) --- MIGRATION_GUIDE.md | 199 +++++++++++---------- sentry_sdk/integrations/_wsgi_common.py | 16 +- sentry_sdk/integrations/aiohttp.py | 7 +- sentry_sdk/integrations/asgi.py | 11 +- sentry_sdk/integrations/aws_lambda.py | 15 +- sentry_sdk/integrations/celery/__init__.py | 13 +- sentry_sdk/integrations/gcp.py | 10 +- sentry_sdk/integrations/rq.py | 16 +- sentry_sdk/integrations/tornado.py | 9 +- sentry_sdk/integrations/wsgi.py | 11 +- tests/integrations/aiohttp/test_aiohttp.py | 5 +- tests/integrations/asgi/test_asgi.py | 3 +- tests/integrations/aws_lambda/test_aws.py | 3 +- tests/integrations/celery/test_celery.py | 11 +- tests/integrations/gcp/test_gcp.py | 15 +- tests/integrations/rq/test_rq.py | 16 +- tests/integrations/tornado/test_tornado.py | 3 +- tests/integrations/wsgi/test_wsgi.py | 9 +- 18 files changed, 221 insertions(+), 151 deletions(-) diff --git a/MIGRATION_GUIDE.md b/MIGRATION_GUIDE.md index 6f0aeb4510..d78abe14c5 100644 --- a/MIGRATION_GUIDE.md +++ b/MIGRATION_GUIDE.md @@ -20,102 +20,109 @@ Looking to upgrade from Sentry SDK 2.x to 3.x? Here's a comprehensive list of wh - Redis integration: In Redis pipeline spans there is no `span["data"]["redis.commands"]` that contains a dict `{"count": 3, "first_ten": ["cmd1", "cmd2", ...]}` but instead `span["data"]["redis.commands.count"]` (containing `3`) and `span["data"]["redis.commands.first_ten"]` (containing `["cmd1", "cmd2", ...]`). - clickhouse-driver integration: The query is now available under the `db.query.text` span attribute (only if `send_default_pii` is `True`). - `sentry_sdk.init` now returns `None` instead of a context manager. -- The `sampling_context` argument of `traces_sampler` now additionally contains all span attributes known at span start. -- If you're using the Celery integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `celery_job` dictionary anymore. Instead, the individual keys are now available as: - - | Dictionary keys | Sampling context key | - | ---------------------- | -------------------- | - | `celery_job["args"]` | `celery.job.args` | - | `celery_job["kwargs"]` | `celery.job.kwargs` | - | `celery_job["task"]` | `celery.job.task` | - - Note that all of these are serialized, i.e., not the original `args` and `kwargs` but rather OpenTelemetry-friendly span attributes. - -- If you're using the AIOHTTP integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `aiohttp_request` object anymore. Instead, some of the individual properties of the request are accessible, if available, as follows: - - | Request property | Sampling context key(s) | - | ---------------- | ------------------------------- | - | `path` | `url.path` | - | `query_string` | `url.query` | - | `method` | `http.request.method` | - | `host` | `server.address`, `server.port` | - | `scheme` | `url.scheme` | - | full URL | `url.full` | - -- If you're using the Tornado integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `tornado_request` object anymore. Instead, some of the individual properties of the request are accessible, if available, as follows: - - | Request property | Sampling context key(s) | - | ---------------- | --------------------------------------------------- | - | `path` | `url.path` | - | `query` | `url.query` | - | `protocol` | `url.scheme` | - | `method` | `http.request.method` | - | `host` | `server.address`, `server.port` | - | `version` | `network.protocol.name`, `network.protocol.version` | - | full URL | `url.full` | - -- If you're using the generic WSGI integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `wsgi_environ` object anymore. Instead, the individual properties of the environment are accessible, if available, as follows: - - | Env property | Sampling context key(s) | - | ----------------- | ------------------------------------------------- | - | `PATH_INFO` | `url.path` | - | `QUERY_STRING` | `url.query` | - | `REQUEST_METHOD` | `http.request.method` | - | `SERVER_NAME` | `server.address` | - | `SERVER_PORT` | `server.port` | - | `SERVER_PROTOCOL` | `server.protocol.name`, `server.protocol.version` | - | `wsgi.url_scheme` | `url.scheme` | - | full URL | `url.full` | - -- If you're using the generic ASGI integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `asgi_scope` object anymore. Instead, the individual properties of the scope, if available, are accessible as follows: - - | Scope property | Sampling context key(s) | - | -------------- | ------------------------------- | - | `type` | `network.protocol.name` | - | `scheme` | `url.scheme` | - | `path` | `url.path` | - | `query` | `url.query` | - | `http_version` | `network.protocol.version` | - | `method` | `http.request.method` | - | `server` | `server.address`, `server.port` | - | `client` | `client.address`, `client.port` | - | full URL | `url.full` | - -- If you're using the RQ integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `rq_job` object anymore. Instead, the individual properties of the job and the queue, if available, are accessible as follows: - - | RQ property | Sampling context key(s) | - | --------------- | ---------------------------- | - | `rq_job.args` | `rq.job.args` | - | `rq_job.kwargs` | `rq.job.kwargs` | - | `rq_job.func` | `rq.job.func` | - | `queue.name` | `messaging.destination.name` | - | `rq_job.id` | `messaging.message.id` | - - Note that `rq.job.args`, `rq.job.kwargs`, and `rq.job.func` are serialized and not the actual objects on the job. - -- If you're using the AWS Lambda integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `aws_event` and `aws_context` objects anymore. Instead, the following, if available, is accessible: - - | AWS property | Sampling context key(s) | - | ------------------------------------------- | ----------------------- | - | `aws_event["httpMethod"]` | `http.request.method` | - | `aws_event["queryStringParameters"]` | `url.query` | - | `aws_event["path"]` | `url.path` | - | full URL | `url.full` | - | `aws_event["headers"]["X-Forwarded-Proto"]` | `network.protocol.name` | - | `aws_event["headers"]["Host"]` | `server.address` | - | `aws_context["function_name"]` | `faas.name` | - -- If you're using the GCP integration, the `sampling_context` argument of `traces_sampler` doesn't contain the `gcp_env` and `gcp_event` keys anymore. Instead, the following, if available, is accessible: - - | Old sampling context key | New sampling context key | - | --------------------------------- | -------------------------- | - | `gcp_env["function_name"]` | `faas.name` | - | `gcp_env["function_region"]` | `faas.region` | - | `gcp_env["function_project"]` | `gcp.function.project` | - | `gcp_env["function_identity"]` | `gcp.function.identity` | - | `gcp_env["function_entry_point"]` | `gcp.function.entry_point` | - | `gcp_event.method` | `http.request.method` | - | `gcp_event.query_string` | `url.query` | +- The `sampling_context` argument of `traces_sampler` and `profiles_sampler` now additionally contains all span attributes known at span start. +- The integration-specific content of the `sampling_context` argument of `traces_sampler` and `profiles_sampler` now looks different. + - The Celery integration doesn't add the `celery_job` dictionary anymore. Instead, the individual keys are now available as: + + | Dictionary keys | Sampling context key | Example | + | ---------------------- | --------------------------- | ------------------------------ | + | `celery_job["args"]` | `celery.job.args.{index}` | `celery.job.args.0` | + | `celery_job["kwargs"]` | `celery.job.kwargs.{kwarg}` | `celery.job.kwargs.kwarg_name` | + | `celery_job["task"]` | `celery.job.task` | | + + Note that all of these are serialized, i.e., not the original `args` and `kwargs` but rather OpenTelemetry-friendly span attributes. + + - The AIOHTTP integration doesn't add the `aiohttp_request` object anymore. Instead, some of the individual properties of the request are accessible, if available, as follows: + + | Request property | Sampling context key(s) | + | ----------------- | ------------------------------- | + | `path` | `url.path` | + | `query_string` | `url.query` | + | `method` | `http.request.method` | + | `host` | `server.address`, `server.port` | + | `scheme` | `url.scheme` | + | full URL | `url.full` | + | `request.headers` | `http.request.header.{header}` | + + - The Tornado integration doesn't add the `tornado_request` object anymore. Instead, some of the individual properties of the request are accessible, if available, as follows: + + | Request property | Sampling context key(s) | + | ----------------- | --------------------------------------------------- | + | `path` | `url.path` | + | `query` | `url.query` | + | `protocol` | `url.scheme` | + | `method` | `http.request.method` | + | `host` | `server.address`, `server.port` | + | `version` | `network.protocol.name`, `network.protocol.version` | + | full URL | `url.full` | + | `request.headers` | `http.request.header.{header}` | + + - The WSGI integration doesn't add the `wsgi_environ` object anymore. Instead, the individual properties of the environment are accessible, if available, as follows: + + | Env property | Sampling context key(s) | + | ----------------- | ------------------------------------------------- | + | `PATH_INFO` | `url.path` | + | `QUERY_STRING` | `url.query` | + | `REQUEST_METHOD` | `http.request.method` | + | `SERVER_NAME` | `server.address` | + | `SERVER_PORT` | `server.port` | + | `SERVER_PROTOCOL` | `server.protocol.name`, `server.protocol.version` | + | `wsgi.url_scheme` | `url.scheme` | + | full URL | `url.full` | + | `HTTP_*` | `http.request.header.{header}` | + + - The ASGI integration doesn't add the `asgi_scope` object anymore. Instead, the individual properties of the scope, if available, are accessible as follows: + + | Scope property | Sampling context key(s) | + | -------------- | ------------------------------- | + | `type` | `network.protocol.name` | + | `scheme` | `url.scheme` | + | `path` | `url.path` | + | `query` | `url.query` | + | `http_version` | `network.protocol.version` | + | `method` | `http.request.method` | + | `server` | `server.address`, `server.port` | + | `client` | `client.address`, `client.port` | + | full URL | `url.full` | + | `headers` | `http.request.header.{header}` | + + -The RQ integration doesn't add the `rq_job` object anymore. Instead, the individual properties of the job and the queue, if available, are accessible as follows: + + | RQ property | Sampling context key | Example | + | --------------- | ---------------------------- | ---------------------- | + | `rq_job.args` | `rq.job.args.{index}` | `rq.job.args.0` | + | `rq_job.kwargs` | `rq.job.kwargs.{kwarg}` | `rq.job.args.my_kwarg` | + | `rq_job.func` | `rq.job.func` | | + | `queue.name` | `messaging.destination.name` | | + | `rq_job.id` | `messaging.message.id` | | + + Note that `rq.job.args`, `rq.job.kwargs`, and `rq.job.func` are serialized and not the actual objects on the job. + + - The AWS Lambda integration doesn't add the `aws_event` and `aws_context` objects anymore. Instead, the following, if available, is accessible: + + | AWS property | Sampling context key(s) | + | ------------------------------------------- | ------------------------------- | + | `aws_event["httpMethod"]` | `http.request.method` | + | `aws_event["queryStringParameters"]` | `url.query` | + | `aws_event["path"]` | `url.path` | + | full URL | `url.full` | + | `aws_event["headers"]["X-Forwarded-Proto"]` | `network.protocol.name` | + | `aws_event["headers"]["Host"]` | `server.address` | + | `aws_context["function_name"]` | `faas.name` | + | `aws_event["headers"]` | `http.request.headers.{header}` | + + - The GCP integration doesn't add the `gcp_env` and `gcp_event` keys anymore. Instead, the following, if available, is accessible: + + | Old sampling context key | New sampling context key | + | --------------------------------- | ------------------------------ | + | `gcp_env["function_name"]` | `faas.name` | + | `gcp_env["function_region"]` | `faas.region` | + | `gcp_env["function_project"]` | `gcp.function.project` | + | `gcp_env["function_identity"]` | `gcp.function.identity` | + | `gcp_env["function_entry_point"]` | `gcp.function.entry_point` | + | `gcp_event.method` | `http.request.method` | + | `gcp_event.query_string` | `url.query` | + | `gcp_event.headers` | `http.request.header.{header}` | ### Removed diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 072a102b7c..8adbe47224 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -3,7 +3,7 @@ import sentry_sdk from sentry_sdk.scope import should_send_default_pii -from sentry_sdk.utils import AnnotatedValue, logger +from sentry_sdk.utils import AnnotatedValue, logger, SENSITIVE_DATA_SUBSTITUTE try: from django.http.request import RawPostDataException @@ -221,6 +221,20 @@ def _filter_headers(headers): } +def _request_headers_to_span_attributes(headers): + # type: (dict[str, str]) -> dict[str, str] + attributes = {} + + headers = _filter_headers(headers) + + for header, value in headers.items(): + if isinstance(value, AnnotatedValue): + value = SENSITIVE_DATA_SUBSTITUTE + attributes[f"http.request.header.{header.lower()}"] = value + + return attributes + + def _in_http_status_code_range(code, code_ranges): # type: (object, list[HttpStatusCodeRange]) -> bool for target in code_ranges: diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index ccc4593606..59bc70e5d4 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -13,6 +13,7 @@ from sentry_sdk.sessions import track_session from sentry_sdk.integrations._wsgi_common import ( _filter_headers, + _request_headers_to_span_attributes, request_body_within_bounds, ) from sentry_sdk.tracing import ( @@ -389,11 +390,11 @@ def _prepopulate_attributes(request): except ValueError: attributes["server.address"] = request.host - try: + with capture_internal_exceptions(): url = f"{request.scheme}://{request.host}{request.path}" # noqa: E231 if request.query_string: attributes["url.full"] = f"{url}?{request.query_string}" - except Exception: - pass + + attributes.update(_request_headers_to_span_attributes(dict(request.headers))) return attributes diff --git a/sentry_sdk/integrations/asgi.py b/sentry_sdk/integrations/asgi.py index 80c24b8cb6..4a3fe830eb 100644 --- a/sentry_sdk/integrations/asgi.py +++ b/sentry_sdk/integrations/asgi.py @@ -21,6 +21,7 @@ ) from sentry_sdk.integrations._wsgi_common import ( DEFAULT_HTTP_METHODS_TO_CAPTURE, + _request_headers_to_span_attributes, ) from sentry_sdk.sessions import track_session from sentry_sdk.tracing import ( @@ -32,6 +33,7 @@ ) from sentry_sdk.utils import ( ContextVar, + capture_internal_exceptions, event_from_exception, HAS_REAL_CONTEXTVARS, CONTEXTVARS_ERROR_MESSAGE, @@ -348,11 +350,12 @@ def _prepopulate_attributes(scope): try: host, port = scope[attr] attributes[f"{attr}.address"] = host - attributes[f"{attr}.port"] = port + if port is not None: + attributes[f"{attr}.port"] = port except Exception: pass - try: + with capture_internal_exceptions(): full_url = _get_url(scope) query = _get_query(scope) if query: @@ -360,7 +363,7 @@ def _prepopulate_attributes(scope): full_url = f"{full_url}?{query}" attributes["url.full"] = full_url - except Exception: - pass + + attributes.update(_request_headers_to_span_attributes(_get_headers(scope))) return attributes diff --git a/sentry_sdk/integrations/aws_lambda.py b/sentry_sdk/integrations/aws_lambda.py index 177d73a638..cd2b3cc417 100644 --- a/sentry_sdk/integrations/aws_lambda.py +++ b/sentry_sdk/integrations/aws_lambda.py @@ -20,7 +20,10 @@ reraise, ) from sentry_sdk.integrations import Integration -from sentry_sdk.integrations._wsgi_common import _filter_headers +from sentry_sdk.integrations._wsgi_common import ( + _filter_headers, + _request_headers_to_span_attributes, +) from typing import TYPE_CHECKING @@ -162,7 +165,7 @@ def sentry_handler(aws_event, aws_context, *args, **kwargs): name=aws_context.function_name, source=TRANSACTION_SOURCE_COMPONENT, origin=AwsLambdaIntegration.origin, - attributes=_prepopulate_attributes(aws_event, aws_context), + attributes=_prepopulate_attributes(request_data, aws_context), ): try: return handler(aws_event, aws_context, *args, **kwargs) @@ -468,6 +471,7 @@ def _event_from_error_json(error_json): def _prepopulate_attributes(aws_event, aws_context): + # type: (Any, Any) -> dict[str, Any] attributes = { "cloud.provider": "aws", } @@ -486,10 +490,15 @@ def _prepopulate_attributes(aws_event, aws_context): url += f"?{aws_event['queryStringParameters']}" attributes["url.full"] = url - headers = aws_event.get("headers") or {} + headers = {} + if aws_event.get("headers") and isinstance(aws_event["headers"], dict): + headers = aws_event["headers"] + if headers.get("X-Forwarded-Proto"): attributes["network.protocol.name"] = headers["X-Forwarded-Proto"] if headers.get("Host"): attributes["server.address"] = headers["Host"] + attributes.update(_request_headers_to_span_attributes(headers)) + return attributes diff --git a/sentry_sdk/integrations/celery/__init__.py b/sentry_sdk/integrations/celery/__init__.py index 0b66bbf05c..a943871335 100644 --- a/sentry_sdk/integrations/celery/__init__.py +++ b/sentry_sdk/integrations/celery/__init__.py @@ -20,7 +20,6 @@ ensure_integration_enabled, event_from_exception, reraise, - _serialize_span_attribute, ) from typing import TYPE_CHECKING @@ -514,9 +513,17 @@ def sentry_publish(self, *args, **kwargs): def _prepopulate_attributes(task, args, kwargs): + # type: (Any, *Any, **Any) -> dict[str, str] attributes = { "celery.job.task": task.name, - "celery.job.args": _serialize_span_attribute(args), - "celery.job.kwargs": _serialize_span_attribute(kwargs), } + + for i, arg in enumerate(args): + with capture_internal_exceptions(): + attributes[f"celery.job.args.{i}"] = str(arg) + + for kwarg, value in kwargs.items(): + with capture_internal_exceptions(): + attributes[f"celery.job.kwargs.{kwarg}"] = str(value) + return attributes diff --git a/sentry_sdk/integrations/gcp.py b/sentry_sdk/integrations/gcp.py index 2f17464f70..dd23ad1e0a 100644 --- a/sentry_sdk/integrations/gcp.py +++ b/sentry_sdk/integrations/gcp.py @@ -7,7 +7,10 @@ import sentry_sdk from sentry_sdk.consts import OP from sentry_sdk.integrations import Integration -from sentry_sdk.integrations._wsgi_common import _filter_headers +from sentry_sdk.integrations._wsgi_common import ( + _filter_headers, + _request_headers_to_span_attributes, +) from sentry_sdk.scope import should_send_default_pii from sentry_sdk.tracing import TRANSACTION_SOURCE_COMPONENT from sentry_sdk.utils import ( @@ -236,6 +239,7 @@ def _get_google_cloud_logs_url(final_time): def _prepopulate_attributes(gcp_event): + # type: (Any) -> dict[str, Any] attributes = { "cloud.provider": "gcp", } @@ -248,4 +252,8 @@ def _prepopulate_attributes(gcp_event): if getattr(gcp_event, key, None): attributes[attr] = getattr(gcp_event, key) + if hasattr(gcp_event, "headers"): + headers = gcp_event.headers + attributes.update(_request_headers_to_span_attributes(headers)) + return attributes diff --git a/sentry_sdk/integrations/rq.py b/sentry_sdk/integrations/rq.py index b097b253ce..fb99fc1b89 100644 --- a/sentry_sdk/integrations/rq.py +++ b/sentry_sdk/integrations/rq.py @@ -6,7 +6,6 @@ from sentry_sdk.integrations.logging import ignore_logger from sentry_sdk.tracing import TRANSACTION_SOURCE_TASK from sentry_sdk.utils import ( - _serialize_span_attribute, capture_internal_exceptions, ensure_integration_enabled, event_from_exception, @@ -183,6 +182,7 @@ def _prepopulate_attributes(job, queue): # type: (Job, Queue) -> dict[str, Any] attributes = { "messaging.system": "rq", + "rq.job.id": job.id, } for prop, attr in JOB_PROPERTY_TO_ATTRIBUTE.items(): @@ -193,14 +193,20 @@ def _prepopulate_attributes(job, queue): if getattr(queue, prop, None) is not None: attributes[attr] = getattr(queue, prop) - for key in ("args", "kwargs"): - if getattr(job, key, None): - attributes[f"rq.job.{key}"] = _serialize_span_attribute(getattr(job, key)) + if getattr(job, "args", None): + for i, arg in enumerate(job.args): + with capture_internal_exceptions(): + attributes[f"rq.job.args.{i}"] = str(arg) + + if getattr(job, "kwargs", None): + for kwarg, value in job.kwargs.items(): + with capture_internal_exceptions(): + attributes[f"rq.job.kwargs.{kwarg}"] = str(value) func = job.func if callable(func): func = func.__name__ - attributes["rq.job.func"] = _serialize_span_attribute(func) + attributes["rq.job.func"] = str(func) return attributes diff --git a/sentry_sdk/integrations/tornado.py b/sentry_sdk/integrations/tornado.py index 591f59ec03..bb40fbf625 100644 --- a/sentry_sdk/integrations/tornado.py +++ b/sentry_sdk/integrations/tornado.py @@ -22,6 +22,7 @@ RequestExtractor, _filter_headers, _is_json_content_type, + _request_headers_to_span_attributes, ) from sentry_sdk.integrations.logging import ignore_logger @@ -246,7 +247,7 @@ def _prepopulate_attributes(request): except ValueError: attributes["network.protocol.name"] = request.version - if getattr(request, "host", None) is not None: + if getattr(request, "host", None): try: address, port = request.host.split(":") attributes["server.address"] = address @@ -254,9 +255,9 @@ def _prepopulate_attributes(request): except ValueError: attributes["server.address"] = request.host - try: + with capture_internal_exceptions(): attributes["url.full"] = request.full_url() - except Exception: - pass + + attributes.update(_request_headers_to_span_attributes(request.headers)) return attributes diff --git a/sentry_sdk/integrations/wsgi.py b/sentry_sdk/integrations/wsgi.py index 726a310482..7f7360a341 100644 --- a/sentry_sdk/integrations/wsgi.py +++ b/sentry_sdk/integrations/wsgi.py @@ -9,9 +9,9 @@ from sentry_sdk.integrations._wsgi_common import ( DEFAULT_HTTP_METHODS_TO_CAPTURE, _filter_headers, + _request_headers_to_span_attributes, ) from sentry_sdk.sessions import track_session -from sentry_sdk.scope import use_isolation_scope from sentry_sdk.tracing import Transaction, TRANSACTION_SOURCE_ROUTE from sentry_sdk.utils import ( ContextVar, @@ -324,6 +324,7 @@ def event_processor(event, hint): def _prepopulate_attributes(wsgi_environ, use_x_forwarded_for=False): + # type: (dict[str, str], bool) -> dict[str, str] """Extract span attributes from the WSGI environment.""" attributes = {} @@ -339,11 +340,13 @@ def _prepopulate_attributes(wsgi_environ, use_x_forwarded_for=False): except Exception: attributes["network.protocol.name"] = wsgi_environ["SERVER_PROTOCOL"] - try: + with capture_internal_exceptions(): url = get_request_url(wsgi_environ, use_x_forwarded_for) query = wsgi_environ.get("QUERY_STRING") attributes["url.full"] = f"{url}?{query}" - except Exception: - pass + + attributes.update( + _request_headers_to_span_attributes(dict(_get_headers(wsgi_environ))) + ) return attributes diff --git a/tests/integrations/aiohttp/test_aiohttp.py b/tests/integrations/aiohttp/test_aiohttp.py index 8327832acc..4b491b152e 100644 --- a/tests/integrations/aiohttp/test_aiohttp.py +++ b/tests/integrations/aiohttp/test_aiohttp.py @@ -309,7 +309,9 @@ async def kangaroo_handler(request): app.router.add_get("/tricks/kangaroo", kangaroo_handler) client = await aiohttp_client(app) - await client.get("/tricks/kangaroo?jump=high") + await client.get( + "/tricks/kangaroo?jump=high", headers={"Custom-Header": "Custom Value"} + ) assert traces_sampler.call_count == 1 sampling_context = traces_sampler.call_args_list[0][0][0] @@ -324,6 +326,7 @@ async def kangaroo_handler(request): assert sampling_context["http.request.method"] == "GET" assert sampling_context["server.address"] == "127.0.0.1" assert sampling_context["server.port"].isnumeric() + assert sampling_context["http.request.header.custom-header"] == "Custom Value" @pytest.mark.asyncio diff --git a/tests/integrations/asgi/test_asgi.py b/tests/integrations/asgi/test_asgi.py index adfd798c72..153117f8ee 100644 --- a/tests/integrations/asgi/test_asgi.py +++ b/tests/integrations/asgi/test_asgi.py @@ -733,6 +733,7 @@ def dummy_traces_sampler(sampling_context): assert sampling_context["http.request.method"] == "GET" assert sampling_context["network.protocol.version"] == "1.1" assert sampling_context["network.protocol.name"] == "http" + assert sampling_context["http.request.header.custom-header"] == "Custom Value" sentry_init( traces_sampler=dummy_traces_sampler, @@ -742,4 +743,4 @@ def dummy_traces_sampler(sampling_context): app = SentryAsgiMiddleware(asgi3_app) async with TestClient(app) as client: - await client.get("/test?hello=there") + await client.get("/test?hello=there", headers={"Custom-Header": "Custom Value"}) diff --git a/tests/integrations/aws_lambda/test_aws.py b/tests/integrations/aws_lambda/test_aws.py index c1235ae0a0..e58fab292d 100644 --- a/tests/integrations/aws_lambda/test_aws.py +++ b/tests/integrations/aws_lambda/test_aws.py @@ -625,6 +625,7 @@ def test_handler(event, context): "url.full": "http://x.io/sit/stay/rollover?repeat=twice", "network.protocol.name": "http", "server.address": "x.io", + "http.request.header.custom-header": "Custom Value", } ) ) @@ -643,7 +644,7 @@ def test_handler(event, context): ) """ ), - b'{"httpMethod": "GET", "path": "/sit/stay/rollover", "query_string": {"repeat": "again"}, "headers": {"Host": "x.io", "X-Forwarded-Proto": "http"}}', + b'{"httpMethod": "GET", "path": "/sit/stay/rollover", "query_string": {"repeat": "again"}, "headers": {"Host": "x.io", "X-Forwarded-Proto": "http", "Custom-Header": "Custom Value"}}', ) assert response["Payload"]["AssertionError raised"] is False diff --git a/tests/integrations/celery/test_celery.py b/tests/integrations/celery/test_celery.py index 119e0d0e39..1011429098 100644 --- a/tests/integrations/celery/test_celery.py +++ b/tests/integrations/celery/test_celery.py @@ -13,7 +13,6 @@ _wrap_task_run, ) from sentry_sdk.integrations.celery.beat import _get_headers -from sentry_sdk.utils import _serialize_span_attribute from tests.conftest import ApproxDict @@ -448,12 +447,10 @@ def walk_dogs(x, y): sampling_context = traces_sampler.call_args_list[1][0][0] assert sampling_context["celery.job.task"] == "dog_walk" - assert sampling_context["celery.job.args"] == _serialize_span_attribute( - args_kwargs["args"] - ) - assert sampling_context["celery.job.kwargs"] == _serialize_span_attribute( - args_kwargs["kwargs"] - ) + for i, arg in enumerate(args_kwargs["args"]): + assert sampling_context[f"celery.job.args.{i}"] == str(arg) + for kwarg, value in args_kwargs["kwargs"].items(): + assert sampling_context[f"celery.job.kwargs.{kwarg}"] == str(value) def test_abstract_task(capture_events, celery, celery_invocation): diff --git a/tests/integrations/gcp/test_gcp.py b/tests/integrations/gcp/test_gcp.py index f33c1b35d7..3ea97cf0e6 100644 --- a/tests/integrations/gcp/test_gcp.py +++ b/tests/integrations/gcp/test_gcp.py @@ -293,11 +293,11 @@ def test_traces_sampler_gets_correct_values_in_sampling_context( dedent( """ functionhandler = None - event = { - "type": "chase", - "chasers": ["Maisey", "Charlie"], - "num_squirrels": 2, - } + + from collections import namedtuple + GCPEvent = namedtuple("GCPEvent", ["headers"]) + event = GCPEvent(headers={"Custom-Header": "Custom Value"}) + def cloud_function(functionhandler, event): # this runs after the transaction has started, which means we # can make assertions about traces_sampler @@ -310,14 +310,15 @@ def cloud_function(functionhandler, event): "gcp.function.entry_point": "cloud_function", "gcp.function.project": "SquirrelChasing", "cloud.provider": "gcp", + "http.request.header.custom-header": "Custom Value", }) ) except AssertionError: # catch the error and return it because the error itself will # get swallowed by the SDK as an "internal exception" - return {"AssertionError raised": True,} + return {"AssertionError raised": True} - return {"AssertionError raised": False,} + return {"AssertionError raised": False} """ ) + FUNCTIONS_PRELUDE diff --git a/tests/integrations/rq/test_rq.py b/tests/integrations/rq/test_rq.py index fbe5a521d3..c7eeb377e6 100644 --- a/tests/integrations/rq/test_rq.py +++ b/tests/integrations/rq/test_rq.py @@ -227,13 +227,23 @@ def test_traces_sampler_gets_correct_values_in_sampling_context(sentry_init): queue = rq.Queue(connection=FakeStrictRedis()) worker = rq.SimpleWorker([queue], connection=queue.connection) - queue.enqueue(do_trick, "Bodhi", trick="roll over") + queue.enqueue( + do_trick, + "Bodhi", + {"age": 5}, + trick="roll over", + times=2, + followup=["fetch", "give paw"], + ) worker.work(burst=True) sampling_context = traces_sampler.call_args_list[0][0][0] assert sampling_context["messaging.system"] == "rq" - assert sampling_context["rq.job.args"] == ["Bodhi"] - assert sampling_context["rq.job.kwargs"] == '{"trick": "roll over"}' + assert sampling_context["rq.job.args.0"] == "Bodhi" + assert sampling_context["rq.job.args.1"] == "{'age': 5}" + assert sampling_context["rq.job.kwargs.trick"] == "roll over" + assert sampling_context["rq.job.kwargs.times"] == "2" + assert sampling_context["rq.job.kwargs.followup"] == "['fetch', 'give paw']" assert sampling_context["rq.job.func"] == "do_trick" assert sampling_context["messaging.message.id"] assert sampling_context["messaging.destination.name"] == "default" diff --git a/tests/integrations/tornado/test_tornado.py b/tests/integrations/tornado/test_tornado.py index 7ad974c535..837da07434 100644 --- a/tests/integrations/tornado/test_tornado.py +++ b/tests/integrations/tornado/test_tornado.py @@ -467,6 +467,7 @@ def traces_sampler(sampling_context): assert sampling_context["server.port"].isnumeric() assert sampling_context["network.protocol.name"] == "HTTP" assert sampling_context["network.protocol.version"] == "1.1" + assert sampling_context["http.request.header.custom-header"] == "Custom Value" return True @@ -476,4 +477,4 @@ def traces_sampler(sampling_context): ) client = tornado_testcase(Application([(r"/hi", HelloHandler)])) - client.fetch("/hi?foo=bar") + client.fetch("/hi?foo=bar", headers={"Custom-Header": "Custom Value"}) diff --git a/tests/integrations/wsgi/test_wsgi.py b/tests/integrations/wsgi/test_wsgi.py index 5aad355277..487ccbfd69 100644 --- a/tests/integrations/wsgi/test_wsgi.py +++ b/tests/integrations/wsgi/test_wsgi.py @@ -1,5 +1,4 @@ from collections import Counter -from datetime import datetime from unittest import mock import pytest @@ -327,10 +326,7 @@ def dogpark(environ, start_response): assert error_event["contexts"]["trace"]["trace_id"] == trace_id -def test_traces_sampler_gets_correct_values_in_sampling_context( - sentry_init, - DictionaryContaining, # noqa:N803 -): +def test_traces_sampler_gets_correct_values_in_sampling_context(sentry_init): def app(environ, start_response): start_response("200 OK", []) return ["Go get the ball! Good dog!"] @@ -343,13 +339,14 @@ def traces_sampler(sampling_context): assert ( sampling_context["url.full"] == "http://localhost/dogs/are/great/?cats=too" ) + assert sampling_context["http.request.header.custom-header"] == "Custom Value" return True sentry_init(send_default_pii=True, traces_sampler=traces_sampler) app = SentryWsgiMiddleware(app) client = Client(app) - client.get("/dogs/are/great/?cats=too") + client.get("/dogs/are/great/?cats=too", headers={"Custom-Header": "Custom Value"}) def test_session_mode_defaults_to_request_mode_in_wsgi_handler( From 8f9461e1a0bc497e6333b4d955561a904beb9dae Mon Sep 17 00:00:00 2001 From: Colton Allen Date: Fri, 6 Dec 2024 02:11:03 -0600 Subject: [PATCH 12/14] Deepcopy and ensure get_all function always terminates (#3861) @aliu39 discovered that under certain circumstances a process can get stuck in an infinite loop. Andrew fixed this by using `deepcopy` which prevents the infinite loop and fixes a bug where the LRU returns incorrect results. Additionally, I've added a terminating loop in case there are any future bugs we've missed. Closes: https://github.com/getsentry/sentry-python/issues/3862 Out of precaution, we disabled flagpole evaluation tracking Sentry while we wait for this to be merged. --- sentry_sdk/_lru_cache.py | 14 +++++++++++--- tests/test_lru_cache.py | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/_lru_cache.py b/sentry_sdk/_lru_cache.py index ec557b1093..825c773529 100644 --- a/sentry_sdk/_lru_cache.py +++ b/sentry_sdk/_lru_cache.py @@ -62,7 +62,7 @@ """ -from copy import copy +from copy import copy, deepcopy SENTINEL = object() @@ -95,7 +95,7 @@ def __copy__(self): cache = LRUCache(self.max_size) cache.full = self.full cache.cache = copy(self.cache) - cache.root = copy(self.root) + cache.root = deepcopy(self.root) return cache def set(self, key, value): @@ -167,7 +167,15 @@ def get(self, key, default=None): def get_all(self): nodes = [] node = self.root[NEXT] - while node is not self.root: + + # To ensure the loop always terminates we iterate to the maximum + # size of the LRU cache. + for _ in range(self.max_size): + # The cache may not be full. We exit early if we've wrapped + # around to the head. + if node is self.root: + break nodes.append((node[KEY], node[VALUE])) node = node[NEXT] + return nodes diff --git a/tests/test_lru_cache.py b/tests/test_lru_cache.py index 3e9c0ac964..cab9bbc7eb 100644 --- a/tests/test_lru_cache.py +++ b/tests/test_lru_cache.py @@ -1,4 +1,5 @@ import pytest +from copy import copy from sentry_sdk._lru_cache import LRUCache @@ -58,3 +59,20 @@ def test_cache_get_all(): assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] cache.get(1) assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] + + +def test_cache_copy(): + cache = LRUCache(3) + cache.set(0, 0) + cache.set(1, 1) + + copied = copy(cache) + cache.set(2, 2) + cache.set(3, 3) + assert copied.get_all() == [(0, 0), (1, 1)] + assert cache.get_all() == [(1, 1), (2, 2), (3, 3)] + + copied = copy(cache) + cache.get(1) + assert copied.get_all() == [(1, 1), (2, 2), (3, 3)] + assert cache.get_all() == [(2, 2), (3, 3), (1, 1)] From 163762f107710cdd1c36040a54806418f3ec4c8c Mon Sep 17 00:00:00 2001 From: getsentry-bot Date: Fri, 6 Dec 2024 08:12:00 +0000 Subject: [PATCH 13/14] release: 2.19.2 --- CHANGELOG.md | 7 +++++++ docs/conf.py | 2 +- sentry_sdk/consts.py | 2 +- setup.py | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eb45f28c7e..af4eb04fef 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 2.19.2 + +### Various fixes & improvements + +- Deepcopy and ensure get_all function always terminates (#3861) by @cmanallen +- Cleanup chalice test environment (#3858) by @antonpirker + ## 2.19.1 ### Various fixes & improvements diff --git a/docs/conf.py b/docs/conf.py index 4f5c210322..3ecdbe2e68 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -31,7 +31,7 @@ copyright = "2019-{}, Sentry Team and Contributors".format(datetime.now().year) author = "Sentry Team and Contributors" -release = "2.19.1" +release = "2.19.2" version = ".".join(release.split(".")[:2]) # The short X.Y version. diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index f338543dee..0bb71cb98d 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -581,4 +581,4 @@ def _get_default_options(): del _get_default_options -VERSION = "2.19.1" +VERSION = "2.19.2" diff --git a/setup.py b/setup.py index 7782d57a36..da3adcab42 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ def get_file_text(file_name): setup( name="sentry-sdk", - version="2.19.1", + version="2.19.2", author="Sentry Team and Contributors", author_email="hello@sentry.io", url="https://github.com/getsentry/sentry-python", From 26479b22d51cc9544e4c1bf515fc8590f83589bc Mon Sep 17 00:00:00 2001 From: Ivana Kellyer Date: Fri, 6 Dec 2024 10:04:31 +0100 Subject: [PATCH 14/14] Use stdlib pathlib in ready-yet script (#3863) --- scripts/ready_yet/requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/ready_yet/requirements.txt b/scripts/ready_yet/requirements.txt index e0590b89c6..69f9472fa5 100644 --- a/scripts/ready_yet/requirements.txt +++ b/scripts/ready_yet/requirements.txt @@ -1,3 +1,2 @@ requests -pathlib -tox \ No newline at end of file +tox