Skip to content

Commit

Permalink
Pipeline Parallel: Guard for KeyErrors at request abort (vllm-project…
Browse files Browse the repository at this point in the history
…#6587)

Signed-off-by: Travis Johnson <[email protected]>
  • Loading branch information
tjohnson31415 authored and jimpang committed Jul 24, 2024
1 parent 3c537e4 commit 6aed998
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
5 changes: 4 additions & 1 deletion vllm/engine/async_llm_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,10 @@ def process_request_output(self,
"""Process a request output from the engine."""
request_id = request_output.request_id

self._request_streams[request_id].put(request_output)
# Guard against a KeyError which can occur if the request was aborted
# while the output was generated
if (stream := self._request_streams.get(request_id)) is not None:
stream.put(request_output)
if request_output.finished:
if verbose:
logger.info("Finished request %s.", request_id)
Expand Down
6 changes: 5 additions & 1 deletion vllm/engine/output_processor/single_step.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,11 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
for parent_seq in parent_seqs
}
for sample in samples:
parent_child_dict[sample.parent_seq_id].append(sample)
# Guard against a KeyError which can occur if the request was
# aborted while the output was generated
if (child_list :=
parent_child_dict.get(sample.parent_seq_id)) is not None:
child_list.append(sample)
# List of (child, parent)
child_seqs: List[Tuple[Sequence, Sequence]] = []

Expand Down

0 comments on commit 6aed998

Please sign in to comment.