From a65c1c06a1cc226cf68b33034b465855fa8d5565 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Wed, 1 May 2024 19:49:48 +0300 Subject: [PATCH 1/9] Add PageNumberPaginator class; refactor OffsetPaginator; add tests --- dlt/sources/helpers/rest_client/paginators.py | 151 ++++++++++++++---- .../helpers/rest_client/test_paginators.py | 49 +++++- 2 files changed, 165 insertions(+), 35 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index cf06284c61..a1786e851a 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -1,5 +1,5 @@ from abc import ABC, abstractmethod -from typing import Optional +from typing import Optional, Dict, Any from urllib.parse import urlparse, urljoin from requests import Response, Request @@ -64,7 +64,116 @@ def update_request(self, request: Request) -> None: return -class OffsetPaginator(BasePaginator): +class BaseNumericPaginator(BasePaginator): + def __init__( + self, + param_name: str, + initial_value: int, + total_path: jsonpath.TJsonPath, + value_step: int, + error_message_items: str = "items", + ): + super().__init__() + self.param_name = param_name + self.current_value = initial_value + self.total_path = jsonpath.compile_path(total_path) + self.value_step = value_step + self.error_message_items = error_message_items + + def update_state(self, response: Response) -> None: + response_json = response.json() + values = jsonpath.find_values(self.total_path, response_json) + total = values[0] if values else None + if total is None: + self._handle_missing_total(response_json) + + try: + total = int(total) + except ValueError: + self._handle_invalid_total(total) + + self.current_value += self.value_step + + if self.current_value >= total: + self._has_next_page = False + + def _handle_missing_total(self, response_json: Dict[str, Any]) -> None: + raise ValueError( + f"Total {self.error_message_items} is not found in the response in" + f" {self.__class__.__name__}. Expected a response with a '{self.total_path}' key, got" + f" {response_json}" + ) + + def _handle_invalid_total(self, total: Any) -> None: + raise ValueError( + f"'{self.total_path}' is not an integer in the response in {self.__class__.__name__}." + f" Expected an integer, got {total}" + ) + + def update_request(self, request: Request) -> None: + if request.params is None: + request.params = {} + request.params[self.param_name] = self.current_value + + +class PageNumberPaginator(BaseNumericPaginator): + """A paginator that uses page number-based pagination strategy. + + For example, consider an API located at `https://api.example.com/items` + that supports pagination through page number and page size query parameters, + and provides the total number of pages in its responses, as shown below: + + { + "items": [...], + "total_pages": 10 + } + + To use `PageNumberPaginator` with such an API, you can instantiate `RESTClient` + as follows: + + from dlt.sources.helpers.rest_client import RESTClient + + client = RESTClient( + base_url="https://api.example.com", + paginator=PageNumberPaginator( + total_pages_path="total_pages" + ) + ) + + @dlt.resource + def get_items(): + for page in client.paginate("/items", params={"size": 100}): + yield page + + Note that we pass the `size` parameter in the initial request to the API. + The `PageNumberPaginator` will automatically increment the page number for + each subsequent request until all items are fetched. + """ + + def __init__( + self, + initial_page: int = 1, + page_param: str = "page", + total_pages_path: jsonpath.TJsonPath = "total", + ): + """ + Args: + initial_page (int): The initial page number. + page_param (str): The query parameter name for the page number. + Defaults to 'page'. + total_pages_path (jsonpath.TJsonPath): The JSONPath expression for + the total number of pages. Defaults to 'total'. + """ + super().__init__( + param_name=page_param, + initial_value=initial_page, + total_path=total_pages_path, + value_step=1, + error_message_items="pages", + ) + + +class OffsetPaginator(BaseNumericPaginator): """A paginator that uses offset-based pagination strategy. This paginator is useful for APIs where pagination is controlled @@ -123,41 +232,17 @@ def __init__( total_path (jsonpath.TJsonPath): The JSONPath expression for the total number of items. """ - super().__init__() - self.offset_param = offset_param + super().__init__( + param_name=offset_param, + initial_value=initial_offset, + total_path=total_path, + value_step=initial_limit, + ) self.limit_param = limit_param - self.total_path = jsonpath.compile_path(total_path) - - self.offset = initial_offset self.limit = initial_limit - def update_state(self, response: Response) -> None: - """Extracts the total count from the response and updates the offset.""" - values = jsonpath.find_values(self.total_path, response.json()) - total = values[0] if values else None - - if total is None: - raise ValueError(f"Total count not found in response for {self.__class__.__name__}") - - try: - total = int(total) - except ValueError: - raise ValueError( - f"Total count is not an integer in response for {self.__class__.__name__}. " - f"Expected an integer, got {total}" - ) - - self.offset += self.limit - - if self.offset >= total: - self._has_next_page = False - def update_request(self, request: Request) -> None: - """Updates the request with the offset and limit query parameters.""" - if request.params is None: - request.params = {} - - request.params[self.offset_param] = self.offset + super().update_request(request) request.params[self.limit_param] = self.limit diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py index 042eb6839b..be7adfbf23 100644 --- a/tests/sources/helpers/rest_client/test_paginators.py +++ b/tests/sources/helpers/rest_client/test_paginators.py @@ -7,6 +7,7 @@ from dlt.sources.helpers.rest_client.paginators import ( SinglePagePaginator, OffsetPaginator, + PageNumberPaginator, HeaderLinkPaginator, JSONResponsePaginator, ) @@ -177,7 +178,7 @@ def test_update_state(self): paginator = OffsetPaginator(initial_offset=0, initial_limit=10) response = Mock(Response, json=lambda: {"total": 20}) paginator.update_state(response) - assert paginator.offset == 10 + assert paginator.current_value == 10 assert paginator.has_next_page is True # Test for reaching the end @@ -188,7 +189,7 @@ def test_update_state_with_string_total(self): paginator = OffsetPaginator(0, 10) response = Mock(Response, json=lambda: {"total": "20"}) paginator.update_state(response) - assert paginator.offset == 10 + assert paginator.current_value == 10 assert paginator.has_next_page is True def test_update_state_with_invalid_total(self): @@ -202,3 +203,47 @@ def test_update_state_without_total(self): response = Mock(Response, json=lambda: {}) with pytest.raises(ValueError): paginator.update_state(response) + + +class TestPageNumberPaginator: + def test_update_state(self): + paginator = PageNumberPaginator(initial_page=1, total_pages_path="total_pages") + response = Mock(Response, json=lambda: {"total_pages": 3}) + paginator.update_state(response) + assert paginator.current_value == 2 + assert paginator.has_next_page is True + + # Test for reaching the end + paginator.update_state(response) + assert paginator.has_next_page is False + + def test_update_state_with_string_total_pages(self): + paginator = PageNumberPaginator(1) + response = Mock(Response, json=lambda: {"total": "3"}) + paginator.update_state(response) + assert paginator.current_value == 2 + assert paginator.has_next_page is True + + def test_update_state_with_invalid_total_pages(self): + paginator = PageNumberPaginator(1) + response = Mock(Response, json=lambda: {"total_pages": "invalid"}) + with pytest.raises(ValueError): + paginator.update_state(response) + + def test_update_state_without_total_pages(self): + paginator = PageNumberPaginator(1) + response = Mock(Response, json=lambda: {}) + with pytest.raises(ValueError): + paginator.update_state(response) + + def test_update_request(self): + paginator = PageNumberPaginator(initial_page=1, page_param="page") + request = Mock(Request) + response = Mock(Response, json=lambda: {"total": 3}) + paginator.update_state(response) + request.params = {} + paginator.update_request(request) + assert request.params["page"] == 2 + paginator.update_state(response) + paginator.update_request(request) + assert request.params["page"] == 3 From 6f510dba14f34dc1f127662d3c9a16cbbc26d2b3 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Wed, 1 May 2024 20:00:09 +0300 Subject: [PATCH 2/9] Add docstrings to BaseNumericPaginator --- dlt/sources/helpers/rest_client/paginators.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index a1786e851a..a9bb545018 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -65,6 +65,12 @@ def update_request(self, request: Request) -> None: class BaseNumericPaginator(BasePaginator): + """A base paginator class for paginators that use a numeric parameter + for pagination, such as page number or offset. + + See `PageNumberPaginator` and `OffsetPaginator` for examples. + """ + def __init__( self, param_name: str, @@ -73,6 +79,17 @@ def __init__( value_step: int, error_message_items: str = "items", ): + """ + Args: + param_name (str): The query parameter name for the numeric value. + For example, 'page'. + initial_value (int): The initial value of the numeric parameter. + total_path (jsonpath.TJsonPath): The JSONPath expression for the total + number of items. + value_step (int): The step size to increment the numeric parameter. + error_message_items (str): The name of the items in the error message. + Defaults to 'items'. + """ super().__init__() self.param_name = param_name self.current_value = initial_value From f0ed91a981e8d68fc2dfcdc010cdab257809fd9a Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 14:02:24 +0200 Subject: [PATCH 3/9] Rename BaseNumericPaginator --- dlt/sources/helpers/rest_client/paginators.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 3d3e48fcaa..1816d04993 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -76,7 +76,7 @@ def update_request(self, request: Request) -> None: return -class BaseNumericPaginator(BasePaginator): +class RangePaginator(BasePaginator): """A base paginator class for paginators that use a numeric parameter for pagination, such as page number or offset. @@ -151,7 +151,7 @@ def update_request(self, request: Request) -> None: request.params[self.param_name] = self.current_value -class PageNumberPaginator(BaseNumericPaginator): +class PageNumberPaginator(RangePaginator): """A paginator that uses page number-based pagination strategy. For example, consider an API located at `https://api.example.com/items` @@ -208,7 +208,7 @@ def __init__( ) -class OffsetPaginator(BaseNumericPaginator): +class OffsetPaginator(RangePaginator): """A paginator that uses offset-based pagination strategy. This paginator is useful for APIs where pagination is controlled From f7f7860683140f3b5591102118b0eabfc4b4e36d Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 14:28:56 +0200 Subject: [PATCH 4/9] Add maximum_value parameter to RangePaginator and its subclasses --- dlt/sources/helpers/rest_client/paginators.py | 25 +++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 1816d04993..b57ab9c4a5 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -89,6 +89,7 @@ def __init__( initial_value: int, total_path: jsonpath.TJsonPath, value_step: int, + maximum_value: Optional[int] = None, error_message_items: str = "items", ): """ @@ -97,7 +98,12 @@ def __init__( For example, 'page'. initial_value (int): The initial value of the numeric parameter. total_path (jsonpath.TJsonPath): The JSONPath expression for the total - number of items. + number of items. For example, if the JSON response is + `{"items": [...], "total": 100}`, the `total_path` would be 'total'. + maximum_value (int): The maximum value for the numeric parameter. + If provided, pagination will stop once this value is reached + or exceeded, even if more data is available. This allows you + to limit the maximum range for pagination. Defaults to None. value_step (int): The step size to increment the numeric parameter. error_message_items (str): The name of the items in the error message. Defaults to 'items'. @@ -107,6 +113,7 @@ def __init__( self.current_value = initial_value self.total_path = jsonpath.compile_path(total_path) self.value_step = value_step + self.maximum_value = maximum_value self.error_message_items = error_message_items def init_request(self, request: Request) -> None: @@ -129,7 +136,9 @@ def update_state(self, response: Response) -> None: self.current_value += self.value_step - if self.current_value >= total: + if self.current_value >= total or ( + self.maximum_value is not None and self.current_value >= self.maximum_value + ): self._has_next_page = False def _handle_missing_total(self, response_json: Dict[str, Any]) -> None: @@ -190,6 +199,7 @@ def __init__( initial_page: int = 1, page_param: str = "page", total_pages_path: jsonpath.TJsonPath = "total", + maximum_page: Optional[int] = None, ): """ Args: @@ -198,12 +208,17 @@ def __init__( Defaults to 'page'. total_pages_path (jsonpath.TJsonPath): The JSONPath expression for the total number of pages. Defaults to 'total'. + maximum_page (int): The maximum page number. If provided, pagination + will stop once this page is reached or exceeded, even if more + data is available. This allows you to limit the maximum number + of pages for pagination. Defaults to None. """ super().__init__( param_name=page_param, initial_value=initial_page, total_path=total_pages_path, value_step=1, + maximum_value=maximum_page, error_message_items="pages", ) @@ -253,6 +268,7 @@ def __init__( offset_param: str = "offset", limit_param: str = "limit", total_path: jsonpath.TJsonPath = "total", + maximum_offset: Optional[int] = None, ) -> None: """ Args: @@ -266,12 +282,17 @@ def __init__( Defaults to 'limit'. total_path (jsonpath.TJsonPath): The JSONPath expression for the total number of items. + maximum_offset (int): The maximum offset value. If provided, + pagination will stop once this offset is reached or exceeded, + even if more data is available. This allows you to limit the + maximum range for pagination. Defaults to None. """ super().__init__( param_name=offset_param, initial_value=initial_offset, total_path=total_path, value_step=initial_limit, + maximum_value=maximum_offset, ) self.limit_param = limit_param self.limit = initial_limit From e95a232cb67966c42cd62bda5a38ceece23f9f2c Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 15:09:13 +0200 Subject: [PATCH 5/9] Change RangePaginator to allow for both maximum_value and total_path parameters --- dlt/sources/helpers/rest_client/paginators.py | 44 +++++++++++-------- 1 file changed, 25 insertions(+), 19 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index b57ab9c4a5..7ccbb5e97a 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -87,9 +87,9 @@ def __init__( self, param_name: str, initial_value: int, - total_path: jsonpath.TJsonPath, value_step: int, maximum_value: Optional[int] = None, + total_path: Optional[jsonpath.TJsonPath] = None, error_message_items: str = "items", ): """ @@ -97,23 +97,27 @@ def __init__( param_name (str): The query parameter name for the numeric value. For example, 'page'. initial_value (int): The initial value of the numeric parameter. - total_path (jsonpath.TJsonPath): The JSONPath expression for the total - number of items. For example, if the JSON response is - `{"items": [...], "total": 100}`, the `total_path` would be 'total'. - maximum_value (int): The maximum value for the numeric parameter. + value_step (int): The step size to increment the numeric parameter. + maximum_value (int, optional): The maximum value for the numeric parameter. If provided, pagination will stop once this value is reached or exceeded, even if more data is available. This allows you - to limit the maximum range for pagination. Defaults to None. - value_step (int): The step size to increment the numeric parameter. + to limit the maximum range for pagination. + If not provided, `total_path` must be specified. Defaults to None. + total_path (jsonpath.TJsonPath, optional): The JSONPath expression + for the total number of items. For example, if the JSON response is + `{"items": [...], "total": 100}`, the `total_path` would be 'total'. + If not provided, `maximum_value` must be specified. error_message_items (str): The name of the items in the error message. Defaults to 'items'. """ super().__init__() + if total_path is None and maximum_value is None: + raise ValueError("Either `total_path` or `maximum_value` must be provided.") self.param_name = param_name self.current_value = initial_value - self.total_path = jsonpath.compile_path(total_path) self.value_step = value_step self.maximum_value = maximum_value + self.total_path = jsonpath.compile_path(total_path) if total_path else None self.error_message_items = error_message_items def init_request(self, request: Request) -> None: @@ -123,20 +127,22 @@ def init_request(self, request: Request) -> None: request.params[self.param_name] = self.current_value def update_state(self, response: Response) -> None: - response_json = response.json() - values = jsonpath.find_values(self.total_path, response_json) - total = values[0] if values else None - if total is None: - self._handle_missing_total(response_json) - - try: - total = int(total) - except ValueError: - self._handle_invalid_total(total) + total = None + if self.total_path: + response_json = response.json() + values = jsonpath.find_values(self.total_path, response_json) + total = values[0] if values else None + if total is None: + self._handle_missing_total(response_json) + + try: + total = int(total) + except ValueError: + self._handle_invalid_total(total) self.current_value += self.value_step - if self.current_value >= total or ( + if (total is not None and self.current_value >= total) or ( self.maximum_value is not None and self.current_value >= self.maximum_value ): self._has_next_page = False From b09d2aa2f976023872abf89eb385e7fe54d93861 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 15:21:22 +0200 Subject: [PATCH 6/9] Extend examples --- dlt/sources/helpers/rest_client/paginators.py | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 7ccbb5e97a..978edb3676 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -198,6 +198,20 @@ def get_items(): Note that we pass the `size` parameter in the initial request to the API. The `PageNumberPaginator` will automatically increment the page number for each subsequent request until all items are fetched. + + If the API does not provide the total number of pages, you can use the + `maximum_page` parameter to limit the number of pages to fetch. For example: + + client = RESTClient( + base_url="https://api.example.com", + paginator=PageNumberPaginator( + maximum_page=5 + ) + ) + + ... + + In this case, pagination will stop after fetching 5 pages of data. """ def __init__( @@ -265,6 +279,20 @@ def get_items(): Note that we pass the `limit` parameter in the initial request to the API. The `OffsetPaginator` will automatically increment the offset for each subsequent request until all items are fetched. + + If the API does not provide the total count of items, you can use the + `maximum_offset` parameter to limit the number of items to fetch. For example: + + client = RESTClient( + base_url="https://api.example.com", + paginator=OffsetPaginator( + initial_limit=100, + maximum_offset=1000 + ) + ) + ... + + In this case, pagination will stop after fetching 1000 items. """ def __init__( From dd528c8713899f0b5bbb5536809e1f32343a2917 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 15:47:08 +0200 Subject: [PATCH 7/9] Rename initial_limit -> limit, initial_offset -> offset --- dlt/sources/helpers/rest_client/paginators.py | 43 +++++++++++++------ .../helpers/rest_client/test_paginators.py | 4 +- 2 files changed, 33 insertions(+), 14 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 978edb3676..10a36a9c7a 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from typing import Optional, Dict, Any from urllib.parse import urlparse, urljoin +import warnings from requests import Response, Request from dlt.common import jsonpath @@ -208,7 +209,6 @@ def get_items(): maximum_page=5 ) ) - ... In this case, pagination will stop after fetching 5 pages of data. @@ -267,16 +267,15 @@ class OffsetPaginator(RangePaginator): client = RESTClient( base_url="https://api.example.com", paginator=OffsetPaginator( - initial_limit=100, + limit=100, total_path="total" ) ) @dlt.resource def get_items(): - for page in client.paginate("/items", params={"limit": 100}): + for page in client.paginate("/items"): yield page - Note that we pass the `limit` parameter in the initial request to the API. The `OffsetPaginator` will automatically increment the offset for each subsequent request until all items are fetched. @@ -286,7 +285,7 @@ def get_items(): client = RESTClient( base_url="https://api.example.com", paginator=OffsetPaginator( - initial_limit=100, + limit=100, maximum_offset=1000 ) ) @@ -297,18 +296,20 @@ def get_items(): def __init__( self, - initial_limit: int, - initial_offset: int = 0, + limit: int, + offset: int = 0, offset_param: str = "offset", limit_param: str = "limit", total_path: jsonpath.TJsonPath = "total", maximum_offset: Optional[int] = None, + initial_limit: Optional[int] = None, + initial_offset: Optional[int] = None, ) -> None: """ Args: - initial_limit (int): The maximum number of items to retrieve + limit (int): The maximum number of items to retrieve in each request. - initial_offset (int): The offset for the first request. + offset (int): The offset for the first request. Defaults to 0. offset_param (str): The query parameter name for the offset. Defaults to 'offset'. @@ -320,16 +321,34 @@ def __init__( pagination will stop once this offset is reached or exceeded, even if more data is available. This allows you to limit the maximum range for pagination. Defaults to None. + initial_limit (int, optional): Deprecated, use `limit` instead. + initial_offset (int, optional): Deprecated, use `offset` instead. """ + if initial_limit is not None: + warnings.warn( + "The 'initial_limit' argument is deprecated and will be removed in future versions." + " Use 'limit' instead.", + DeprecationWarning, + ) + limit = initial_limit + + if initial_offset is not None: + warnings.warn( + "The 'initial_offset' argument is deprecated and will be removed in future" + " versions. Use 'offset' instead.", + DeprecationWarning, + ) + offset = initial_offset + super().__init__( param_name=offset_param, - initial_value=initial_offset, + initial_value=offset, total_path=total_path, - value_step=initial_limit, + value_step=limit, maximum_value=maximum_offset, ) self.limit_param = limit_param - self.limit = initial_limit + self.limit = limit def init_request(self, request: Request) -> None: super().init_request(request) diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py index 8b86b0c6c2..a928b1164c 100644 --- a/tests/sources/helpers/rest_client/test_paginators.py +++ b/tests/sources/helpers/rest_client/test_paginators.py @@ -175,7 +175,7 @@ def test_update_state_with_next(self): class TestOffsetPaginator: def test_update_state(self): - paginator = OffsetPaginator(initial_offset=0, initial_limit=10) + paginator = OffsetPaginator(offset=0, limit=10) response = Mock(Response, json=lambda: {"total": 20}) paginator.update_state(response) assert paginator.current_value == 10 @@ -205,7 +205,7 @@ def test_update_state_without_total(self): paginator.update_state(response) def test_init_request(self): - paginator = OffsetPaginator(initial_offset=123, initial_limit=42) + paginator = OffsetPaginator(offset=123, limit=42) request = Mock(Request) request.params = {} From ad48c0cfeddff81ee696661a41d3e1bb119044d8 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Mon, 6 May 2024 16:32:36 +0200 Subject: [PATCH 8/9] Rename total_pages_path parameter, add tests --- dlt/sources/helpers/rest_client/paginators.py | 8 +++---- .../helpers/rest_client/test_paginators.py | 24 ++++++++++++++++++- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 10a36a9c7a..7a10598917 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -187,7 +187,7 @@ class PageNumberPaginator(RangePaginator): client = RESTClient( base_url="https://api.example.com", paginator=PageNumberPaginator( - total_pages_path="total_pages" + total_path="total_pages" ) ) @@ -218,7 +218,7 @@ def __init__( self, initial_page: int = 1, page_param: str = "page", - total_pages_path: jsonpath.TJsonPath = "total", + total_path: jsonpath.TJsonPath = "total", maximum_page: Optional[int] = None, ): """ @@ -226,7 +226,7 @@ def __init__( initial_page (int): The initial page number. page_param (str): The query parameter name for the page number. Defaults to 'page'. - total_pages_path (jsonpath.TJsonPath): The JSONPath expression for + total_path (jsonpath.TJsonPath): The JSONPath expression for the total number of pages. Defaults to 'total'. maximum_page (int): The maximum page number. If provided, pagination will stop once this page is reached or exceeded, even if more @@ -236,7 +236,7 @@ def __init__( super().__init__( param_name=page_param, initial_value=initial_page, - total_path=total_pages_path, + total_path=total_path, value_step=1, maximum_value=maximum_page, error_message_items="pages", diff --git a/tests/sources/helpers/rest_client/test_paginators.py b/tests/sources/helpers/rest_client/test_paginators.py index a928b1164c..9ca54e814c 100644 --- a/tests/sources/helpers/rest_client/test_paginators.py +++ b/tests/sources/helpers/rest_client/test_paginators.py @@ -227,10 +227,21 @@ def test_init_request(self): assert next_request.params["offset"] == 165 assert next_request.params["limit"] == 42 + def test_maximum_offset(self): + paginator = OffsetPaginator(offset=0, limit=50, maximum_offset=100, total_path=None) + response = Mock(Response, json=lambda: {"items": []}) + paginator.update_state(response) # Offset 0 to 50 + assert paginator.current_value == 50 + assert paginator.has_next_page is True + + paginator.update_state(response) # Offset 50 to 100 + assert paginator.current_value == 100 + assert paginator.has_next_page is False + class TestPageNumberPaginator: def test_update_state(self): - paginator = PageNumberPaginator(initial_page=1, total_pages_path="total_pages") + paginator = PageNumberPaginator(initial_page=1, total_path="total_pages") response = Mock(Response, json=lambda: {"total_pages": 3}) paginator.update_state(response) assert paginator.current_value == 2 @@ -270,3 +281,14 @@ def test_update_request(self): paginator.update_state(response) paginator.update_request(request) assert request.params["page"] == 3 + + def test_maximum_page(self): + paginator = PageNumberPaginator(initial_page=1, maximum_page=3, total_path=None) + response = Mock(Response, json=lambda: {"items": []}) + paginator.update_state(response) # Page 1 + assert paginator.current_value == 2 + assert paginator.has_next_page is True + + paginator.update_state(response) # Page 2 + assert paginator.current_value == 3 + assert paginator.has_next_page is False From 5dfb6785772d3e5df000ffa5035f2d8912eb6dc0 Mon Sep 17 00:00:00 2001 From: Anton Burnashev Date: Tue, 7 May 2024 17:36:12 +0200 Subject: [PATCH 9/9] Remove deprecated kwargs --- dlt/sources/helpers/rest_client/paginators.py | 21 ------------------- 1 file changed, 21 deletions(-) diff --git a/dlt/sources/helpers/rest_client/paginators.py b/dlt/sources/helpers/rest_client/paginators.py index 7a10598917..fc0ce459ad 100644 --- a/dlt/sources/helpers/rest_client/paginators.py +++ b/dlt/sources/helpers/rest_client/paginators.py @@ -1,7 +1,6 @@ from abc import ABC, abstractmethod from typing import Optional, Dict, Any from urllib.parse import urlparse, urljoin -import warnings from requests import Response, Request from dlt.common import jsonpath @@ -302,8 +301,6 @@ def __init__( limit_param: str = "limit", total_path: jsonpath.TJsonPath = "total", maximum_offset: Optional[int] = None, - initial_limit: Optional[int] = None, - initial_offset: Optional[int] = None, ) -> None: """ Args: @@ -321,25 +318,7 @@ def __init__( pagination will stop once this offset is reached or exceeded, even if more data is available. This allows you to limit the maximum range for pagination. Defaults to None. - initial_limit (int, optional): Deprecated, use `limit` instead. - initial_offset (int, optional): Deprecated, use `offset` instead. """ - if initial_limit is not None: - warnings.warn( - "The 'initial_limit' argument is deprecated and will be removed in future versions." - " Use 'limit' instead.", - DeprecationWarning, - ) - limit = initial_limit - - if initial_offset is not None: - warnings.warn( - "The 'initial_offset' argument is deprecated and will be removed in future" - " versions. Use 'offset' instead.", - DeprecationWarning, - ) - offset = initial_offset - super().__init__( param_name=offset_param, initial_value=offset,