@@ -315,7 +315,7 @@ async def get_input_state(
315315 ),
316316 logger = self ._logger ,
317317 )
318- return SubCrawlerRun (result = result )
318+ return SubCrawlerRun (result = result , run_context = context_linked_to_result )
319319 except Exception as e :
320320 return SubCrawlerRun (exception = e )
321321
@@ -371,7 +371,8 @@ async def _run_request_handler(self, context: BasicCrawlingContext) -> None:
371371 self .track_http_only_request_handler_runs ()
372372
373373 static_run = await self ._crawl_one (rendering_type = 'static' , context = context )
374- if static_run .result and self .result_checker (static_run .result ):
374+ if static_run .result and static_run .run_context and self .result_checker (static_run .result ):
375+ self ._update_context_from_copy (context , static_run .run_context )
375376 self ._context_result_map [context ] = static_run .result
376377 return
377378 if static_run .exception :
@@ -402,13 +403,10 @@ async def _run_request_handler(self, context: BasicCrawlingContext) -> None:
402403 if pw_run .exception is not None :
403404 raise pw_run .exception
404405
405- if pw_run .result :
406- self ._context_result_map [context ] = pw_run .result
407-
406+ if pw_run .result and pw_run .run_context :
408407 if should_detect_rendering_type :
409408 detection_result : RenderingType
410409 static_run = await self ._crawl_one ('static' , context = context , state = old_state_copy )
411-
412410 if static_run .result and self .result_comparator (static_run .result , pw_run .result ):
413411 detection_result = 'static'
414412 else :
@@ -417,6 +415,9 @@ async def _run_request_handler(self, context: BasicCrawlingContext) -> None:
417415 context .log .debug (f'Detected rendering type { detection_result } for { context .request .url } ' )
418416 self .rendering_type_predictor .store_result (context .request , detection_result )
419417
418+ self ._update_context_from_copy (context , pw_run .run_context )
419+ self ._context_result_map [context ] = pw_run .result
420+
420421 def pre_navigation_hook (
421422 self ,
422423 hook : Callable [[AdaptivePlaywrightPreNavCrawlingContext ], Awaitable [None ]] | None = None ,
@@ -451,8 +452,32 @@ def track_browser_request_handler_runs(self) -> None:
451452 def track_rendering_type_mispredictions (self ) -> None :
452453 self .statistics .state .rendering_type_mispredictions += 1
453454
455+ def _update_context_from_copy (self , context : BasicCrawlingContext , context_copy : BasicCrawlingContext ) -> None :
456+ """Update mutable fields of `context` from `context_copy`.
457+
458+ Uses object.__setattr__ to bypass frozen dataclass restrictions,
459+ allowing state synchronization after isolated crawler execution.
460+ """
461+ updating_attributes = {
462+ 'request' : ('headers' , 'user_data' ),
463+ 'session' : ('_user_data' , '_usage_count' , '_error_score' , '_cookies' ),
464+ }
465+
466+ for attr , sub_attrs in updating_attributes .items ():
467+ original_sub_obj = getattr (context , attr )
468+ copy_sub_obj = getattr (context_copy , attr )
469+
470+ # Check that both sub objects are not None
471+ if original_sub_obj is None or copy_sub_obj is None :
472+ continue
473+
474+ for sub_attr in sub_attrs :
475+ new_value = getattr (copy_sub_obj , sub_attr )
476+ object .__setattr__ (original_sub_obj , sub_attr , new_value )
477+
454478
455479@dataclass (frozen = True )
456480class SubCrawlerRun :
457481 result : RequestHandlerRunResult | None = None
458482 exception : Exception | None = None
483+ run_context : BasicCrawlingContext | None = None
0 commit comments