From a94bd175685289d18b749c16493cb420f7d9e171 Mon Sep 17 00:00:00 2001 From: Joscha Date: Thu, 5 May 2022 14:23:19 +0200 Subject: [PATCH] Mention href for users of link_regex option --- CHANGELOG.md | 1 + PFERD/crawl/kit_ipd_crawler.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dfd5f53..0e2b293 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,7 @@ ambiguous situations. ### Fixed - IPD crawler crashes on some sites +- Mention hrefs in IPD crawler for users of `link_regex` option ## 3.4.0 - 2022-05-01 diff --git a/PFERD/crawl/kit_ipd_crawler.py b/PFERD/crawl/kit_ipd_crawler.py index 58e71f8..78fe0b1 100644 --- a/PFERD/crawl/kit_ipd_crawler.py +++ b/PFERD/crawl/kit_ipd_crawler.py @@ -45,7 +45,7 @@ class KitIpdFolder: def explain(self) -> None: log.explain_topic(f"Folder {self.name!r}") for file in self.files: - log.explain(f"File {file.name!r}") + log.explain(f"File {file.name!r} (href={file.url!r})") def __hash__(self) -> int: return self.name.__hash__() @@ -113,7 +113,7 @@ async def _fetch_items(self) -> Set[Union[KitIpdFile, KitIpdFolder]]: else: file = self._extract_file(element) items.add(file) - log.explain_topic(f"Orphan file {file.name!r}") + log.explain_topic(f"Orphan file {file.name!r} (href={file.url!r})") log.explain("Attributing it to root folder") return items