Skip to content

Commit eb3b841

Browse files
committed
feat(scraper): add grace_period_seconds parameter for JS rendering delay
1 parent 61a9264 commit eb3b841

File tree

1 file changed

+4
-1
lines changed

1 file changed

+4
-1
lines changed

src/scraper/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from .helpers.content_selectors import _wait_for_content_stabilization
1111
from .helpers.html_utils import _extract_and_clean_html, _extract_markdown_and_text, _is_content_too_short
1212
from .helpers.errors import _navigate_and_handle_errors, _handle_cloudflare_block
13+
import asyncio
1314

1415
logger = Logger(__name__)
1516

@@ -30,7 +31,8 @@ def extract_and_format_content(html_content, elements_to_remove, url):
3031

3132
async def extract_text_from_url(url: str,
3233
custom_elements_to_remove: list = None,
33-
custom_timeout: int = None) -> dict:
34+
custom_timeout: int = None,
35+
grace_period_seconds: float = 2.0) -> dict:
3436
timeout_seconds = custom_timeout if custom_timeout is not None else DEFAULT_TIMEOUT_SECONDS
3537

3638
try:
@@ -70,6 +72,7 @@ async def extract_text_from_url(url: str,
7072
}
7173

7274
logger.debug(f"Extracting content from: {page.url}")
75+
await asyncio.sleep(grace_period_seconds)
7376
html_content = await page.content()
7477

7578
is_blocked, cf_error = _handle_cloudflare_block(

0 commit comments

Comments
 (0)