Skip to content

Commit

Permalink
allow element handle paging
Browse files Browse the repository at this point in the history
  • Loading branch information
awtkns committed Feb 24, 2024
1 parent e45e590 commit a22530b
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 12 deletions.
32 changes: 21 additions & 11 deletions harambe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from functools import wraps
from typing import Callable, List, Optional, Protocol, Union, Awaitable

from playwright.async_api import Page, ProxySettings, async_playwright
from playwright.async_api import Page, ProxySettings, async_playwright, ElementHandle
from playwright_stealth import stealth_async

from harambe.observer import LocalStorageObserver, LoggingObserver, OutputObserver
Expand Down Expand Up @@ -90,21 +90,31 @@ async def enqueue(self, *urls: URL, context: Optional[Context] = None) -> None:
*[o.on_queue_url(url, context) for o in self._observers]
)

async def paginate(self, next_url: Callable[..., Awaitable[URL]]) -> None:
async def paginate(self, next_page: Callable[..., Awaitable[URL | ElementHandle | None]]) -> None:
"""
Navigate to the next page of a listing.
:param next_url: the url of the next page
:param next_page: the url or ElementHandle of the next page
"""
try:
next_url = await next_url()

if next_url.startswith("?"):
# TODO: merge query params
next_url = self.page.url.split("?")[0] + next_url

await self.page.goto(next_url)
await self._scraper(self, next_url, self._context)
next_page = await next_page()
if not next_page:
return

next_url = ""
if isinstance(next_page, ElementHandle):
await next_page.click(timeout=1000)
next_url = self.page.url

elif isinstance(next_page, str):
next_url = next_page
if next_url.startswith("?"):
# TODO: merge query params
next_url = self.page.url.split("?")[0] + next_url
await self.page.goto(next_url)

if next_url:
await self._scraper(self, next_url, self._context)
except: # noqa: E722
return

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "harambe-sdk"
version = "0.6.1"
version = "0.6.2"
description = "Data extraction SDK for Playwright 🐒🍌"
authors = ["awtkns <[email protected]>"]
readme = "README.md"
Expand Down

0 comments on commit a22530b

Please sign in to comment.