Skip to content

Commit cf92137

Browse files
Caches should_stop so we're not hammering Ray. (#973)
* Now, caches should_stop value. * Updated code to maintain last value on timeout. * Cleaned up conditional.
1 parent f455ef0 commit cf92137

File tree

1 file changed

+17
-4
lines changed

1 file changed

+17
-4
lines changed

open_instruct/vllm_utils3.py

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,17 +368,30 @@ def __init__(
368368
self.eval_results_queue = eval_results_queue
369369
self.actor_manager = actor_manager
370370

371+
# For caching should_stop status.
372+
self._last_should_stop_update = float("-inf")
373+
self._should_stop_value = False
374+
self._should_stop_timeout_s = 5
375+
376+
def _should_stop(self) -> bool:
377+
if (time.perf_counter() - self._last_should_stop_update) > self._should_stop_timeout_s:
378+
should_stop_ref = self.actor_manager.should_stop.remote()
379+
ready_refs, _ = ray.wait([should_stop_ref], timeout=0.1)
380+
if ready_refs:
381+
self._should_stop_value = ray.get(ready_refs[0])
382+
self._last_should_stop_update = time.perf_counter()
383+
else:
384+
ray.cancel(should_stop_ref)
385+
return self._should_stop_value
386+
371387
def process_from_queue(self, timeout: float = 60.0):
372388
"""Run generation loop using LLMEngine directly, with optional tool support.
373389
374390
Returns:
375391
int: Number of requests processed (0 or 1)
376392
"""
377393
while True:
378-
# Non-blocking check for should_stop using ray.wait
379-
should_stop_ref = self.actor_manager.should_stop.remote()
380-
ready_refs, _ = ray.wait([should_stop_ref], timeout=0.1)
381-
if ready_refs and ray.get(ready_refs[0]):
394+
if self._should_stop():
382395
return 0
383396

384397
try:

0 commit comments

Comments
 (0)