Skip to content

Commit

Permalink
use await in _background_process_outputs to improve api_server throug…
Browse files Browse the repository at this point in the history
…hput
  • Loading branch information
ZeldaHuang committed Jul 25, 2024
1 parent cd57e21 commit 9512f59
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions llumnix/entrypoints/vllm/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@

async def _background_process_outputs():
while True:
request_outputs = request_output_queue.get_nowait_batch(num_items=request_output_queue.qsize())
qsize = await request_output_queue.actor.qsize.remote()
request_outputs = await request_output_queue.actor.get_nowait_batch.remote(qsize)
for request_output in request_outputs:
request_id = request_output.request_id
# Request could be dispatched twice when manager is dead, the first request will free the request_streams when finished.
Expand All @@ -59,7 +60,6 @@ async def _background_process_outputs():
if request_output.finished:
request_streams[request_id].finish()
del request_streams[request_id]
await asyncio.sleep(0.01)

# pylint: disable=unused-argument
@asynccontextmanager
Expand Down

0 comments on commit 9512f59

Please sign in to comment.