From e2b00d7fcb1578064b5d54cd287ad23f35399d82 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Mon, 4 Nov 2024 20:08:21 -0500 Subject: [PATCH] [Core] Use os.sched_yield in ShmRingBuffer instead of time.sleep (#9994) Signed-off-by: Tyler Michael Smith Signed-off-by: Sumit Dubey --- .../device_communicators/shm_broadcast.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index 7d526b25ed193..2ff1a1ead99c1 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -1,3 +1,4 @@ +import os import pickle import time from contextlib import contextmanager @@ -18,12 +19,6 @@ VLLM_RINGBUFFER_WARNING_INTERVAL = envs.VLLM_RINGBUFFER_WARNING_INTERVAL -# time to wait if the queue is full or empty -# if we sleep for too short, it will consume too much CPU -# if we sleep for too long, it will slow down the writer/reader -# 0.1 us is a good balance -RINGBUFFER_SLEEP_INTERVAL = 1e-7 - logger = init_logger(__name__) @@ -333,8 +328,8 @@ def acquire_write(self): # if this block is not ready to write, # we need to wait until it is read by all readers - # wait for a while - time.sleep(RINGBUFFER_SLEEP_INTERVAL) + # Release the processor to other threads + os.sched_yield() # if we wait for a long time, we should warn the user if (time.monotonic() - start_time > @@ -387,8 +382,8 @@ def acquire_read(self): # if this block is not ready, # we need to wait until it is written - # wait for a while - time.sleep(RINGBUFFER_SLEEP_INTERVAL) + # Release the processor to other threads + os.sched_yield() # if we wait for a long time, we should warn the user if (time.monotonic() - start_time >