Skip to content

Commit

Permalink
fix: fix session managment with retire (apify#947)
Browse files Browse the repository at this point in the history
### Description

- Fix cases where a session was not excluded from the pool after calling
`session.retire()` inside a successful `request_handler`.
  • Loading branch information
Mantisus authored Jan 31, 2025
1 parent 3dffdca commit caee03f
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/crawlee/crawlers/_basic/_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ async def __run_task_function(self) -> None:

request.state = RequestState.DONE

if context.session:
if context.session and context.session.is_usable:
context.session.mark_good()

self._statistics.record_request_processing_finish(statistics_id)
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/crawlers/_basic/test_basic_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from crawlee.errors import SessionError, UserDefinedErrorHandlerError
from crawlee.events._local_event_manager import LocalEventManager
from crawlee.request_loaders import RequestList, RequestManagerTandem
from crawlee.sessions import SessionPool
from crawlee.statistics import FinalStatistics
from crawlee.storage_clients import MemoryStorageClient
from crawlee.storage_clients._memory import DatasetClient
Expand Down Expand Up @@ -1169,3 +1170,32 @@ async def handler(context: BasicCrawlingContext) -> None:
await asyncio.gather(crawler_run_task, add_request_task)

mocked_handler.assert_has_calls(expected_handler_calls)


@pytest.mark.parametrize(
('retire'),
[
pytest.param(False, id='without retire'),
pytest.param(True, id='with retire'),
],
)
async def test_session_retire_in_user_handler(*, retire: bool) -> None:
crawler = BasicCrawler(session_pool=SessionPool(max_pool_size=1))
sessions = list[str]()

@crawler.router.default_handler
async def handler(context: BasicCrawlingContext) -> None:
if context.session:
sessions.append(context.session.id)

context.session.retire() if retire else None

await context.add_requests(['http://b.com/'])

await crawler.run(['http://a.com/'])

# The session should differ if `retire` was called and match otherwise since pool size == 1
if retire:
assert sessions[1] != sessions[0]
else:
assert sessions[1] == sessions[0]

0 comments on commit caee03f

Please sign in to comment.