@@ -368,17 +368,30 @@ def __init__(
368368 self .eval_results_queue = eval_results_queue
369369 self .actor_manager = actor_manager
370370
371+ # For caching should_stop status.
372+ self ._last_should_stop_update = float ("-inf" )
373+ self ._should_stop_value = False
374+ self ._should_stop_timeout_s = 5
375+
376+ def _should_stop (self ) -> bool :
377+ if (time .perf_counter () - self ._last_should_stop_update ) > self ._should_stop_timeout_s :
378+ should_stop_ref = self .actor_manager .should_stop .remote ()
379+ ready_refs , _ = ray .wait ([should_stop_ref ], timeout = 0.1 )
380+ if ready_refs :
381+ self ._should_stop_value = ray .get (ready_refs [0 ])
382+ self ._last_should_stop_update = time .perf_counter ()
383+ else :
384+ ray .cancel (should_stop_ref )
385+ return self ._should_stop_value
386+
371387 def process_from_queue (self , timeout : float = 60.0 ):
372388 """Run generation loop using LLMEngine directly, with optional tool support.
373389
374390 Returns:
375391 int: Number of requests processed (0 or 1)
376392 """
377393 while True :
378- # Non-blocking check for should_stop using ray.wait
379- should_stop_ref = self .actor_manager .should_stop .remote ()
380- ready_refs , _ = ray .wait ([should_stop_ref ], timeout = 0.1 )
381- if ready_refs and ray .get (ready_refs [0 ]):
394+ if self ._should_stop ():
382395 return 0
383396
384397 try :
0 commit comments