diff --git a/wikiteam3/dumpgenerator/cli/cli.py b/wikiteam3/dumpgenerator/cli/cli.py index 8dfa03e8..6e29b4bd 100644 --- a/wikiteam3/dumpgenerator/cli/cli.py +++ b/wikiteam3/dumpgenerator/cli/cli.py @@ -30,7 +30,7 @@ url2prefix_from_config, ) from wikiteam3.utils.login import uniLogin -from wikiteam3.utils.monkey_patch import WakeTLSAdapter +from wikiteam3.utils.monkey_patch import SessionMonkeyPatch, WakeTLSAdapter from wikiteam3.utils.user_agent import setup_random_UserAgent @@ -293,6 +293,8 @@ def get_parameters(params=None) -> Tuple[Config, OtherConfig]: # Create session mod_requests_text(requests) # monkey patch # type: ignore session = requests.Session() + patch_sess = SessionMonkeyPatch(session=session, hard_retries=1) + patch_sess.hijack() def print_request(r: requests.Response, *args, **kwargs): # TODO: use logging # print("H:", r.request.headers) @@ -575,4 +577,5 @@ def sleep(self, response=None): "If you know that this is unnecessary, you can manually specify '--delay 0.0'." ) + patch_sess.release() return config, other diff --git a/wikiteam3/utils/monkey_patch.py b/wikiteam3/utils/monkey_patch.py index 9e13a34b..8c85c7c8 100644 --- a/wikiteam3/utils/monkey_patch.py +++ b/wikiteam3/utils/monkey_patch.py @@ -91,7 +91,8 @@ class SessionMonkeyPatch: def __init__(self,*, session: requests.Session, config: Optional[Config]=None, add_delay: bool=False, delay_msg: Optional[str]=None, hard_retries: int=0, - free_timeout_connections: bool=True, vaild_lft_sec: int=60 * 3 + free_timeout_connections: bool=True, vaild_lft_sec: int=60 * 3, + accept_encoding: str="", ): """ hard_retries: hard retries, default 0 (no retry) @@ -110,6 +111,8 @@ def __init__(self,*, session: requests.Session, config: Optional[Config]=None, self.vaild_lft_sec = vaild_lft_sec self.last_clear_time = time.time() + self.accept_encoding = accept_encoding + def clear_timeouted_pools(self): for adapter in self.session.adapters.values(): adapter: requests.adapters.HTTPAdapter @@ -131,6 +134,8 @@ def new_send(request: requests.PreparedRequest, **kwargs): if hard_retries_left <= 0: raise ValueError('hard_retries must be positive') + accept_encoding = '' + while hard_retries_left > 0: try: if self.add_delay: @@ -139,6 +144,9 @@ def new_send(request: requests.PreparedRequest, **kwargs): if self.free_timeout_connections: self.clear_timeouted_pools() + if _accept_encoding := accept_encoding or self.accept_encoding or request.headers.get("Accept-Encoding", ""): + request.headers["Accept-Encoding"] = _accept_encoding + return self.old_send_method(request, **kwargs) except (KeyboardInterrupt, requests.exceptions.ContentDecodingError): # don't retry raise @@ -149,6 +157,11 @@ def new_send(request: requests.PreparedRequest, **kwargs): print('Hard retry... (%d), due to: %s' % (hard_retries_left, e)) + # workaround for https://wiki.erischan.org/index.php/Main_Page and other ChunkedEncodingError sites + if isinstance(e, requests.exceptions.ChunkedEncodingError): + accept_encoding = 'identity' + print('retry with Accept-Encoding:', accept_encoding) + # if --bypass-cdn-image-compression is enabled, retry with different url assert isinstance(request.url, str) if '_wikiteam3_nocdn=' in request.url: