Skip to content

Commit

Permalink
[perf] fix perf regression from vllm-project#12253 (vllm-project#12380)
Browse files Browse the repository at this point in the history
Signed-off-by: youkaichao <[email protected]>
  • Loading branch information
youkaichao authored Jan 24, 2025
1 parent 0e74d79 commit 6dd94db
Showing 1 changed file with 4 additions and 1 deletion.
5 changes: 4 additions & 1 deletion vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,6 @@ def __init__(self,
self.enable_prompt_adapter = (self.runner.prompt_adapter_config
is not None)
self.multi_modal_input_mapper = self.runner.multi_modal_input_mapper
self.decode_only = True

# Attention metadata inputs.
if self.attn_backend is not None:
Expand All @@ -477,6 +476,10 @@ def prepare(self,
finished_requests_ids: Optional[List[str]] = None) -> None:
self.finished_requests_ids = finished_requests_ids

# if the current batch is decode-only.
# will be set to False if there is any non-decode request.
self.decode_only = True

# Intermediate data (data in CPU before going to GPU) for
# the current sequence group.
self.inter_data_list: List[
Expand Down

0 comments on commit 6dd94db

Please sign in to comment.