Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -149,10 +149,6 @@ def __init__(
non-default configuration.
kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
"""
# Some sub crawler kwargs are internally modified. Prepare copies.
basic_crawler_kwargs_for_static_crawler = deepcopy(kwargs)
basic_crawler_kwargs_for_pw_crawler = deepcopy(kwargs)

# Adaptive crawling related.
self.rendering_type_predictor = rendering_type_predictor or DefaultRenderingTypePredictor()
self.result_checker = result_checker or (lambda _: True)
Expand All @@ -170,11 +166,11 @@ def __init__(
# Each sub crawler will use custom logger .
static_logger = getLogger('Subcrawler_static')
static_logger.setLevel(logging.ERROR)
basic_crawler_kwargs_for_static_crawler['_logger'] = static_logger
basic_crawler_kwargs_for_static_crawler: _BasicCrawlerOptions = {'_logger': static_logger, **kwargs}

pw_logger = getLogger('Subcrawler_playwright')
pw_logger.setLevel(logging.ERROR)
basic_crawler_kwargs_for_pw_crawler['_logger'] = pw_logger
basic_crawler_kwargs_for_pw_crawler: _BasicCrawlerOptions = {'_logger': pw_logger, **kwargs}

# Initialize sub crawlers to create their pipelines.
static_crawler_class = AbstractHttpCrawler.create_parsed_http_crawler_class(static_parser=static_parser)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
AdaptiveContextError,
)
from crawlee.statistics import Statistics
from crawlee.storage_clients import SqlStorageClient
from crawlee.storages import KeyValueStore

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Iterator
from pathlib import Path

from yarl import URL

Expand Down Expand Up @@ -726,3 +728,23 @@ async def request_handler(context: AdaptivePlaywrightCrawlingContext) -> None:
await crawler.run(test_urls[:1])

mocked_h3_handler.assert_called_once_with(None)


async def test_adaptive_playwright_crawler_with_sql_storage(test_urls: list[str], tmp_path: Path) -> None:
"""Tests that AdaptivePlaywrightCrawler can be initialized with SqlStorageClient."""
storage_dir = tmp_path / 'test_table.db'

async with SqlStorageClient(connection_string=f'sqlite+aiosqlite:///{storage_dir}') as storage_client:
crawler = AdaptivePlaywrightCrawler.with_beautifulsoup_static_parser(
storage_client=storage_client,
)

mocked_handler = Mock()

@crawler.router.default_handler
async def request_handler(_context: AdaptivePlaywrightCrawlingContext) -> None:
mocked_handler()

await crawler.run(test_urls[:1])

mocked_handler.assert_called()
Loading