From aa6e8fd05ca5812213c96cdaf125ab3ae23726f8 Mon Sep 17 00:00:00 2001 From: Daniel Szoke <7881302+szokeasaurusrex@users.noreply.github.com> Date: Thu, 21 Nov 2024 11:32:32 +0100 Subject: [PATCH] fix(falcon): Don't exhaust request body stream (#3768) Only read the cached `request._media`, since reading `request.media` will exhaust the `request.bounded_stream` if it has not been read before. Note that this means that we will now only send the JSON request body to Sentry if the Falcon request handler reads the JSON data. Fixes #3761 Co-authored-by: Anton Pirker --- sentry_sdk/integrations/falcon.py | 44 ++++++++++++----------- tests/integrations/falcon/test_falcon.py | 45 ++++++++++++++++++++++++ 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/sentry_sdk/integrations/falcon.py b/sentry_sdk/integrations/falcon.py index 00ac106e15..ce771d16e7 100644 --- a/sentry_sdk/integrations/falcon.py +++ b/sentry_sdk/integrations/falcon.py @@ -43,6 +43,12 @@ FALCON3 = False +_FALCON_UNSET = None # type: Optional[object] +if FALCON3: # falcon.request._UNSET is only available in Falcon 3.0+ + with capture_internal_exceptions(): + from falcon.request import _UNSET as _FALCON_UNSET # type: ignore[import-not-found, no-redef] + + class FalconRequestExtractor(RequestExtractor): def env(self): # type: () -> Dict[str, Any] @@ -73,27 +79,23 @@ def raw_data(self): else: return None - if FALCON3: - - def json(self): - # type: () -> Optional[Dict[str, Any]] - try: - return self.request.media - except falcon.errors.HTTPBadRequest: - return None - - else: - - def json(self): - # type: () -> Optional[Dict[str, Any]] - try: - return self.request.media - except falcon.errors.HTTPBadRequest: - # NOTE(jmagnusson): We return `falcon.Request._media` here because - # falcon 1.4 doesn't do proper type checking in - # `falcon.Request.media`. This has been fixed in 2.0. - # Relevant code: https://github.com/falconry/falcon/blob/1.4.1/falcon/request.py#L953 - return self.request._media + def json(self): + # type: () -> Optional[Dict[str, Any]] + # fallback to cached_media = None if self.request._media is not available + cached_media = None + with capture_internal_exceptions(): + # self.request._media is the cached self.request.media + # value. It is only available if self.request.media + # has already been accessed. Therefore, reading + # self.request._media will not exhaust the raw request + # stream (self.request.bounded_stream) because it has + # already been read if self.request._media is set. + cached_media = self.request._media + + if cached_media is not _FALCON_UNSET: + return cached_media + + return None class SentryFalconMiddleware: diff --git a/tests/integrations/falcon/test_falcon.py b/tests/integrations/falcon/test_falcon.py index 0607d3fdeb..51a1d94334 100644 --- a/tests/integrations/falcon/test_falcon.py +++ b/tests/integrations/falcon/test_falcon.py @@ -460,3 +460,48 @@ def test_span_origin(sentry_init, capture_events, make_client): (_, event) = events assert event["contexts"]["trace"]["origin"] == "auto.http.falcon" + + +def test_falcon_request_media(sentry_init): + # test_passed stores whether the test has passed. + test_passed = False + + # test_failure_reason stores the reason why the test failed + # if test_passed is False. The value is meaningless when + # test_passed is True. + test_failure_reason = "test endpoint did not get called" + + class SentryCaptureMiddleware: + def process_request(self, _req, _resp): + # This capture message forces Falcon event processors to run + # before the request handler runs + sentry_sdk.capture_message("Processing request") + + class RequestMediaResource: + def on_post(self, req, _): + nonlocal test_passed, test_failure_reason + raw_data = req.bounded_stream.read() + + # If the raw_data is empty, the request body stream + # has been exhausted by the SDK. Test should fail in + # this case. + test_passed = raw_data != b"" + test_failure_reason = "request body has been read" + + sentry_init(integrations=[FalconIntegration()]) + + try: + app_class = falcon.App # Falcon ≥3.0 + except AttributeError: + app_class = falcon.API # Falcon <3.0 + + app = app_class(middleware=[SentryCaptureMiddleware()]) + app.add_route("/read_body", RequestMediaResource()) + + client = falcon.testing.TestClient(app) + + client.simulate_post("/read_body", json={"foo": "bar"}) + + # Check that simulate_post actually calls the resource, and + # that the SDK does not exhaust the request body stream. + assert test_passed, test_failure_reason