Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion bookworm/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,16 @@
"""


def get_version_info(version_string=version):
def get_version_info(version_string: str = version) -> dict:
pattern = re.compile(
r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE
)
mat = pattern.match(version_string)
if not mat:
raise ValueError
return mat.groupdict()

def user_agent() -> str:
# Wikipedia will reject requests that does not respect their User-Agent policy
# see: https://foundation.wikimedia.org/wiki/Policy:Wikimedia_Foundation_User-Agent_Policy
return f"{name}/{version} ({url}; {author_email})"
8 changes: 7 additions & 1 deletion bookworm/http_tools/http_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import requests

from bookworm import typehints as t
from bookworm import app
from bookworm.logger import logger

log = logger.getChild(__name__)
Expand Down Expand Up @@ -144,11 +145,16 @@ def can_report_progress(self):
@dataclass
class HttpResource:
url: str
headers: dict[str, str] | None = None

def download(self) -> ResourceDownloadRequest:
try:
headers = self.headers or {}
headers.update({
'User-Agent': app.user_agent(),
})
log.info(f"Requesting resource: {self.url}")
requested_resource = requests.get(self.url, stream=True)
requested_resource = requests.get(self.url, headers=headers, stream=True)
requested_resource.raise_for_status()
except requests.RequestException as e:
log.exception(f"Faild to get resource from {self.url}", exc_info=True)
Expand Down