diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index 7d526b25ed193..2ff1a1ead99c1 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -1,3 +1,4 @@ +import os import pickle import time from contextlib import contextmanager @@ -18,12 +19,6 @@ VLLM_RINGBUFFER_WARNING_INTERVAL = envs.VLLM_RINGBUFFER_WARNING_INTERVAL -# time to wait if the queue is full or empty -# if we sleep for too short, it will consume too much CPU -# if we sleep for too long, it will slow down the writer/reader -# 0.1 us is a good balance -RINGBUFFER_SLEEP_INTERVAL = 1e-7 - logger = init_logger(__name__) @@ -333,8 +328,8 @@ def acquire_write(self): # if this block is not ready to write, # we need to wait until it is read by all readers - # wait for a while - time.sleep(RINGBUFFER_SLEEP_INTERVAL) + # Release the processor to other threads + os.sched_yield() # if we wait for a long time, we should warn the user if (time.monotonic() - start_time > @@ -387,8 +382,8 @@ def acquire_read(self): # if this block is not ready, # we need to wait until it is written - # wait for a while - time.sleep(RINGBUFFER_SLEEP_INTERVAL) + # Release the processor to other threads + os.sched_yield() # if we wait for a long time, we should warn the user if (time.monotonic() - start_time >