From 9512f594d2875d6501fe3dcd9162f313c78acb73 Mon Sep 17 00:00:00 2001 From: ZedalHuang <1520787127@qq.com> Date: Thu, 25 Jul 2024 15:06:19 +0800 Subject: [PATCH] use await in _background_process_outputs to improve api_server throughput --- llumnix/entrypoints/vllm/api_server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llumnix/entrypoints/vllm/api_server.py b/llumnix/entrypoints/vllm/api_server.py index 21663cd9..344b879b 100644 --- a/llumnix/entrypoints/vllm/api_server.py +++ b/llumnix/entrypoints/vllm/api_server.py @@ -49,7 +49,8 @@ async def _background_process_outputs(): while True: - request_outputs = request_output_queue.get_nowait_batch(num_items=request_output_queue.qsize()) + qsize = await request_output_queue.actor.qsize.remote() + request_outputs = await request_output_queue.actor.get_nowait_batch.remote(qsize) for request_output in request_outputs: request_id = request_output.request_id # Request could be dispatched twice when manager is dead, the first request will free the request_streams when finished. @@ -59,7 +60,6 @@ async def _background_process_outputs(): if request_output.finished: request_streams[request_id].finish() del request_streams[request_id] - await asyncio.sleep(0.01) # pylint: disable=unused-argument @asynccontextmanager