diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 22cdc9b415..b6702797e9 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -420,6 +420,10 @@ def update_request(self, request: Request) -> None: request.url = self._next_reference + # Clear the query parameters from the previous request otherwise they + # will be appended to the next URL in Session.prepare_request + request.params = None + class HeaderLinkPaginator(BaseNextUrlPaginator): """A paginator that uses the 'Link' header in HTTP responses diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py index 9ca54e814c..e5d31c52d2 100644 --- a/tests/sources/helpers/rest_client/test_paginators.py +++ b/tests/sources/helpers/rest_client/test_paginators.py @@ -3,6 +3,7 @@ import pytest from requests.models import Response, Request +from requests import Session from dlt.sources.helpers.rest_client.paginators import ( SinglePagePaginator, @@ -157,6 +158,30 @@ def test_update_request(self, test_case): paginator.update_request(request) assert request.url == test_case["expected"] + def test_no_duplicate_params_on_update_request(self): + paginator = JSONResponsePaginator() + + request = Request( + method="GET", + url="http://example.com/api/resource", + params={"param1": "value1"}, + ) + + session = Session() + + response = Mock(Response, json=lambda: {"next": "/api/resource?page=2¶m1=value1"}) + paginator.update_state(response) + paginator.update_request(request) + + assert request.url == "http://example.com/api/resource?page=2¶m1=value1" + + # RESTClient._send_request() calls Session.prepare_request() which + # updates the URL with the query parameters from the request object. + prepared_request = session.prepare_request(request) + + # The next request should just use the "next" URL without any duplicate parameters. + assert prepared_request.url == "http://example.com/api/resource?page=2¶m1=value1" + class TestSinglePagePaginator: def test_update_state(self):