From 882da2cef284f8c22da8e0326c4e7ed92a99ffb6 Mon Sep 17 00:00:00 2001 From: pauliyobo Date: Thu, 30 Oct 2025 12:51:42 +0100 Subject: [PATCH] Add User-Agent to http_resource requests --- bookworm/app.py | 7 ++++++- bookworm/http_tools/http_resource.py | 8 +++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/bookworm/app.py b/bookworm/app.py index 0c7cd62f..08cfc0da 100644 --- a/bookworm/app.py +++ b/bookworm/app.py @@ -50,7 +50,7 @@ """ -def get_version_info(version_string=version): +def get_version_info(version_string: str = version) -> dict: pattern = re.compile( r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE ) @@ -58,3 +58,8 @@ def get_version_info(version_string=version): if not mat: raise ValueError return mat.groupdict() + +def user_agent() -> str: + # Wikipedia will reject requests that does not respect their User-Agent policy + # see: https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy + return f"{name}/{version} ({url}; {author_email})" diff --git a/bookworm/http_tools/http_resource.py b/bookworm/http_tools/http_resource.py index 6b4ba557..cb9c758c 100644 --- a/bookworm/http_tools/http_resource.py +++ b/bookworm/http_tools/http_resource.py @@ -11,6 +11,7 @@ import requests from bookworm import typehints as t +from bookworm import app from bookworm.logger import logger log = logger.getChild(__name__) @@ -144,11 +145,16 @@ def can_report_progress(self): @dataclass class HttpResource: url: str + headers: dict[str, str] | None = None def download(self) -> ResourceDownloadRequest: try: + headers = self.headers or {} + headers.update({ + 'User-Agent': app.user_agent(), + }) log.info(f"Requesting resource: {self.url}") - requested_resource = requests.get(self.url, stream=True) + requested_resource = requests.get(self.url, headers=headers, stream=True) requested_resource.raise_for_status() except requests.RequestException as e: log.exception(f"Faild to get resource from {self.url}", exc_info=True)