Skip to content

Commit

Permalink
retry with Accept-Encoding: identity header when `ChunkedEncodingEr…
Browse files Browse the repository at this point in the history
…ror` occur
  • Loading branch information
yzqzss committed Apr 8, 2024
1 parent ecc1b90 commit 01dd1aa
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
5 changes: 4 additions & 1 deletion wikiteam3/dumpgenerator/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
url2prefix_from_config,
)
from wikiteam3.utils.login import uniLogin
from wikiteam3.utils.monkey_patch import WakeTLSAdapter
from wikiteam3.utils.monkey_patch import SessionMonkeyPatch, WakeTLSAdapter
from wikiteam3.utils.user_agent import setup_random_UserAgent


Expand Down Expand Up @@ -293,6 +293,8 @@ def get_parameters(params=None) -> Tuple[Config, OtherConfig]:
# Create session
mod_requests_text(requests) # monkey patch # type: ignore
session = requests.Session()
patch_sess = SessionMonkeyPatch(session=session, hard_retries=1)
patch_sess.hijack()
def print_request(r: requests.Response, *args, **kwargs):
# TODO: use logging
# print("H:", r.request.headers)
Expand Down Expand Up @@ -575,4 +577,5 @@ def sleep(self, response=None):
"If you know that this is unnecessary, you can manually specify '--delay 0.0'."
)

patch_sess.release()
return config, other
15 changes: 14 additions & 1 deletion wikiteam3/utils/monkey_patch.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ class SessionMonkeyPatch:
def __init__(self,*, session: requests.Session, config: Optional[Config]=None,
add_delay: bool=False, delay_msg: Optional[str]=None,
hard_retries: int=0,
free_timeout_connections: bool=True, vaild_lft_sec: int=60 * 3
free_timeout_connections: bool=True, vaild_lft_sec: int=60 * 3,
accept_encoding: str="",
):
"""
hard_retries: hard retries, default 0 (no retry)
Expand All @@ -110,6 +111,8 @@ def __init__(self,*, session: requests.Session, config: Optional[Config]=None,
self.vaild_lft_sec = vaild_lft_sec
self.last_clear_time = time.time()

self.accept_encoding = accept_encoding

def clear_timeouted_pools(self):
for adapter in self.session.adapters.values():
adapter: requests.adapters.HTTPAdapter
Expand All @@ -131,6 +134,8 @@ def new_send(request: requests.PreparedRequest, **kwargs):
if hard_retries_left <= 0:
raise ValueError('hard_retries must be positive')

accept_encoding = ''

while hard_retries_left > 0:
try:
if self.add_delay:
Expand All @@ -139,6 +144,9 @@ def new_send(request: requests.PreparedRequest, **kwargs):
if self.free_timeout_connections:
self.clear_timeouted_pools()

if _accept_encoding := accept_encoding or self.accept_encoding or request.headers.get("Accept-Encoding", ""):
request.headers["Accept-Encoding"] = _accept_encoding

return self.old_send_method(request, **kwargs)
except (KeyboardInterrupt, requests.exceptions.ContentDecodingError): # don't retry
raise
Expand All @@ -149,6 +157,11 @@ def new_send(request: requests.PreparedRequest, **kwargs):

print('Hard retry... (%d), due to: %s' % (hard_retries_left, e))

# workaround for https://wiki.erischan.org/index.php/Main_Page and other ChunkedEncodingError sites
if isinstance(e, requests.exceptions.ChunkedEncodingError):
accept_encoding = 'identity'
print('retry with Accept-Encoding:', accept_encoding)

# if --bypass-cdn-image-compression is enabled, retry with different url
assert isinstance(request.url, str)
if '_wikiteam3_nocdn=' in request.url:
Expand Down

0 comments on commit 01dd1aa

Please sign in to comment.