From 3abbb792f6c192ac66d1d5d50af67f90a9d92640 Mon Sep 17 00:00:00 2001 From: Yueh-Shun Li Date: Tue, 13 Aug 2024 22:13:27 +0800 Subject: [PATCH] feat: support returning and resuming partial results Add a parameter `keep_failed` to `Dials.list_all()`, specifying whether to return the paginated partial results when an HTTP request fails. Add a parameter `resume_from` to `Dials.list_all()` to accept paginated partial results and resume the fetching. Add a field `exception` to `PaginatedBaseModel` to record the exception that causes the request to fail. If applied, users would be able to write something like: ```python data = None data = dials.h1d.list_all(, keep_failed=True, resume_from=data) ``` and resume by from the terminal. --- cmsdials/utils/api_client.py | 50 ++++++++++++++++++++++++++++++++---- cmsdials/utils/base_model.py | 8 +++++- 2 files changed, 52 insertions(+), 6 deletions(-) diff --git a/cmsdials/utils/api_client.py b/cmsdials/utils/api_client.py index 4d4e81d..1ec0753 100644 --- a/cmsdials/utils/api_client.py +++ b/cmsdials/utils/api_client.py @@ -1,6 +1,8 @@ from importlib import util as importlib_util +from traceback import format_exception_only from typing import Optional from urllib.parse import parse_qs, urlparse +from warnings import warn import requests from requests.exceptions import HTTPError @@ -99,10 +101,29 @@ def list(self, filters=None): raise ValueError("pagination model is None and response is not a list.") - def __list_sync(self, filters, max_pages: Optional[int] = None, enable_progress: bool = False): - next_token = None + def __list_sync( + self, + filters, + max_pages: Optional[int] = None, + enable_progress: bool = False, + keep_failed: bool = False, + resume_from=None, + ): + next_string: Optional[str] = None results = [] is_last_page = False + + if resume_from is not None: + results = resume_from.results + next_string = resume_from.next + if next_string is None and len(results): + warn( + "resume_from.next is None while resume_from.result is not empty, doing nothing.", + RuntimeWarning, + stacklevel=2, + ) + is_last_page = True + total_pages = 0 use_tqdm = TQDM_INSTALLED and enable_progress @@ -110,12 +131,31 @@ def __list_sync(self, filters, max_pages: Optional[int] = None, enable_progress: progress = tqdm(desc="Progress", total=1) while is_last_page is False: + next_token = parse_qs(urlparse(next_string).query).get("next_token") if next_string else None curr_filters = self.filter_class(**filters.dict()) curr_filters.next_token = next_token - response = self.list(curr_filters) + try: + response = self.list(curr_filters) + except Exception as e: + if use_tqdm: + progress.close() + + if not keep_failed: + raise e + warn( + "HTTP request failed, returning partial results. Exception: " + "\n".join(format_exception_only(e)), + RuntimeWarning, + stacklevel=2, + ) + return self.pagination_model( + next=next_string, + previous=None, + results=results, + exception=e, + ) results.extend(response.results) - is_last_page = response.next is None - next_token = parse_qs(urlparse(response.next).query).get("next_token") if response.next else None + next_string = response.next + is_last_page = next_string is None total_pages += 1 max_pages_reached = max_pages and total_pages >= max_pages if use_tqdm: diff --git a/cmsdials/utils/base_model.py b/cmsdials/utils/base_model.py index 23d0f29..a399fdd 100644 --- a/cmsdials/utils/base_model.py +++ b/cmsdials/utils/base_model.py @@ -1,6 +1,7 @@ from importlib import util as importlib_util +from typing import Optional -from pydantic import BaseModel +from pydantic import BaseModel, Field if importlib_util.find_spec("pandas"): @@ -17,6 +18,11 @@ def cleandict(self): class PaginatedBaseModel(BaseModel): + class Config: + arbitrary_types_allowed = True + + exception: Optional[BaseException] = Field(default=None) + def to_pandas(self): if PANDAS_NOT_INSTALLED: raise RuntimeError(