Skip to content

Commit

Permalink
Fix
Browse files Browse the repository at this point in the history
  • Loading branch information
s5u13b committed Nov 28, 2024
1 parent d28afc0 commit 0e56a16
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 3 deletions.
10 changes: 9 additions & 1 deletion llumnix/backends/vllm/llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,20 +190,25 @@ def _process_model_outputs(
seq_group_metadata_list = new_seq_group_metadata_list
for ignored_seq_group in ignored_seq_groups:
server_infos.append(ignored_seq_group.server_info)

for server_info in server_infos:
if hasattr(server_info, 'request_timestamps'):
server_info.request_timestamps.engine_process_model_outputs_timestamp_begin = time.time()

request_outputs = super()._process_model_outputs(output, scheduled_seq_groups, ignored_seq_groups, seq_group_metadata_list)

for request_output, server_info in zip(request_outputs, server_infos):
if hasattr(server_info, 'request_timestamps'):
request_output.request_timestamps = server_info.request_timestamps
request_output.request_timestamps.engine_process_model_outputs_timestamp_end = time.time()

# TODO(ZeldaHuang): Use LlumnixRequestOutput to store llumnix output args.
return request_outputs, server_infos

async def step_async(self) -> Tuple[List[RequestOutput], List[ServerInfo]]:
step_begin_time = time.time()
request_outputs, server_infos = await super().step_async()

for request_output in request_outputs:
if hasattr(request_output, 'request_timestamps'):
request_output.request_timestamps.engine_step_timestamp_begin = step_begin_time
Expand All @@ -225,9 +230,12 @@ async def step_async(self) -> Tuple[List[RequestOutput], List[ServerInfo]]:
tot_blocks.extend(blocks)
tot_blocks = set(tot_blocks)
instance_info.num_blocks_last_running_request = len(tot_blocks)

self.instance_info = instance_info

if request_outputs:
self.put_queue_args_queue.put_nowait((request_outputs, server_infos))
self.instance_info = instance_info

for request_output in request_outputs:
if hasattr(request_output, 'request_timestamps'):
request_output.request_timestamps.engine_step_postprocess_timestamp_end = time.time()
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_test/test_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def run_bench_command(command):
# pylint: disable=broad-except
except subprocess.TimeoutExpired:
process.kill()
print("bench_test timed out after 30 minutes.")
assert False, "bench_test timed out after {} minutes.".format(BENCH_TEST_TIMEOUT_MINS)

with open("performance.txt", "w", encoding="utf-8") as f:
f.write(parse_log_file())
Expand Down
2 changes: 1 addition & 1 deletion tests/e2e_test/test_migration.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def run_bench_command(command):
# pylint: disable=broad-except
except subprocess.TimeoutExpired:
process.kill()
print("bench_test timed out after {} minutes.".format(MIGRATION_BENCH_TIMEOUT_MINS))
assert False, "migration_test timed out after {} minutes.".format(MIGRATION_BENCH_TIMEOUT_MINS)

await asyncio.sleep(3)

Expand Down

0 comments on commit 0e56a16

Please sign in to comment.