Skip to content

Commit

Permalink
normalize URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
snshn committed May 20, 2024
1 parent 7c12def commit 5c4f1d4
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,7 @@ class AsyncScraper(Protocol):
Note that scrapers in harambe should be functions, not classes.
"""

async def scrape(self, sdk: "SDK", url: URL, context: Context) -> None:
...
async def scrape(self, sdk: "SDK", url: URL, context: Context) -> None: ...


class SDK:
Expand Down Expand Up @@ -105,7 +104,8 @@ async def enqueue(self, *urls: URL, context: Optional[Context] = None) -> None:
context["__url"] = self.page.url

for url in urls:
await self._notify_observers("on_queue_url", url, context)
normalized_url = normalize_url(url, self.page.url) if hasattr(self.page, "url") else url
await self._notify_observers("on_queue_url", normalized_url, context)

async def paginate(
self,
Expand Down Expand Up @@ -337,7 +337,8 @@ async def run_from_file(
if setup:
await setup(sdk)
await page.goto(listing["url"])
await scraper(sdk,
await scraper(
sdk,
listing["url"],
listing["context"],
)
Expand Down

0 comments on commit 5c4f1d4

Please sign in to comment.