Pipeline Parallel: Guard for KeyErrors at request abort (vllm-project…

…#6587) Signed-off-by: Travis Johnson <[email protected]>
xjpang · Jul 24, 2024 · 6aed998 · 6aed998
1 parent 3c537e4
commit 6aed998
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 2 deletions.
diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py
@@ -131,7 +131,10 @@ def process_request_output(self,
         """Process a request output from the engine."""
         request_id = request_output.request_id
 
-        self._request_streams[request_id].put(request_output)
+        # Guard against a KeyError which can occur if the request was aborted
+        # while the output was generated
+        if (stream := self._request_streams.get(request_id)) is not None:
+            stream.put(request_output)
         if request_output.finished:
             if verbose:
                 logger.info("Finished request %s.", request_id)

diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py
@@ -90,7 +90,11 @@ def _process_sequence_group_outputs(self, seq_group: SequenceGroup,
             for parent_seq in parent_seqs
         }
         for sample in samples:
-            parent_child_dict[sample.parent_seq_id].append(sample)
+            # Guard against a KeyError which can occur if the request was
+            # aborted while the output was generated
+            if (child_list :=
+                    parent_child_dict.get(sample.parent_seq_id)) is not None:
+                child_list.append(sample)
         # List of (child, parent)
         child_seqs: List[Tuple[Sequence, Sequence]] = []