From ffed9388e09c5bcf977bed856d49422306dfee27 Mon Sep 17 00:00:00 2001 From: Digital <34419970+DigitalDwagon@users.noreply.github.com> Date: Wed, 9 Oct 2024 00:11:12 -0400 Subject: [PATCH] add a --hard-retries option (#30) * feat: add a --hard-retries option * fix: don't cut off the help message for --retries in the middle of a sentence * move hard_reties config to OtherConfig * ci: fix --------- Co-authored-by: yzqzss --- .github/workflows/test-dumogenerator.yml | 4 ++-- wikiteam3/dumpgenerator/api/page_titles.py | 2 +- wikiteam3/dumpgenerator/cli/cli.py | 9 +++++++-- wikiteam3/dumpgenerator/config.py | 3 +++ wikiteam3/dumpgenerator/dump/image/image.py | 2 +- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test-dumogenerator.yml b/.github/workflows/test-dumogenerator.yml index 47ae2f28..9934f3e3 100644 --- a/.github/workflows/test-dumogenerator.yml +++ b/.github/workflows/test-dumogenerator.yml @@ -14,7 +14,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.12", "3.13.0-rc.1"] + python-version: ["3.8", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -40,4 +40,4 @@ jobs: python -m wikiteam3.dumpgenerator -h - name: Test with pytest run: | - cd wikiteam3/dumpgenerator && pytest && cd ../../ + pytest diff --git a/wikiteam3/dumpgenerator/api/page_titles.py b/wikiteam3/dumpgenerator/api/page_titles.py index 84575243..c5d8eabb 100644 --- a/wikiteam3/dumpgenerator/api/page_titles.py +++ b/wikiteam3/dumpgenerator/api/page_titles.py @@ -27,7 +27,7 @@ def getPageTitlesAPI(config: Config, session: requests.Session): delay_session = SessionMonkeyPatch( session=session, config=config, add_delay=True, delay_msg="Session delay: "+__name__, - hard_retries=3 + hard_retries=3 # TODO: --hard-retries ) delay_session.hijack() for namespace in namespaces: diff --git a/wikiteam3/dumpgenerator/cli/cli.py b/wikiteam3/dumpgenerator/cli/cli.py index a472d468..37e34647 100644 --- a/wikiteam3/dumpgenerator/cli/cli.py +++ b/wikiteam3/dumpgenerator/cli/cli.py @@ -49,7 +49,10 @@ def getArgumentParser(): "if you wanna reuse the connection]" ) parser.add_argument( - "--retries", metavar="5", default=5, help="Maximum number of retries for " + "--retries", metavar="5", default=5, help="Maximum number of retries for each request before failing." + ) + parser.add_argument( + "--hard-retries", metavar="3", default=3, help="Maximum number of hard retries for each request before failing. (for now, this only controls the hard retries during images downloading)" ) parser.add_argument("--path", help="path to store wiki dump at") parser.add_argument( @@ -290,7 +293,7 @@ def get_parameters(params=None) -> Tuple[Config, OtherConfig]: # Create session mod_requests_text(requests) # monkey patch # type: ignore session = requests.Session() - patch_sess = SessionMonkeyPatch(session=session, hard_retries=1) + patch_sess = SessionMonkeyPatch(session=session, hard_retries=1) # hard retry once to avoid spending too much time on initial detection patch_sess.hijack() def print_request(r: requests.Response, *args, **kwargs): # TODO: use logging @@ -548,6 +551,8 @@ def sleep(self, response=None): assert_max_images = args.assert_max_images, assert_max_images_bytes = args.assert_max_images_bytes, + hard_retries = int(args.hard_retries), + upload = args.upload, uploader_args = args.uploader_args, ) diff --git a/wikiteam3/dumpgenerator/config.py b/wikiteam3/dumpgenerator/config.py index 9238fad8..517502e3 100644 --- a/wikiteam3/dumpgenerator/config.py +++ b/wikiteam3/dumpgenerator/config.py @@ -113,5 +113,8 @@ class OtherConfig: assert_max_images: Optional[int] assert_max_images_bytes: Optional[int] + hard_retries: int + """ Number of hard retries """ + upload: bool uploader_args: List[str] \ No newline at end of file diff --git a/wikiteam3/dumpgenerator/dump/image/image.py b/wikiteam3/dumpgenerator/dump/image/image.py index 5e1db029..9ecde193 100644 --- a/wikiteam3/dumpgenerator/dump/image/image.py +++ b/wikiteam3/dumpgenerator/dump/image/image.py @@ -107,7 +107,7 @@ def modify_headers(headers: Optional[Dict] = None) -> Dict: return headers - patch_sess = SessionMonkeyPatch(session=session, config=config, hard_retries=3) + patch_sess = SessionMonkeyPatch(session=session, config=config, hard_retries=other.hard_retries) patch_sess.hijack() ia_session = requests.Session()