diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py index eb1512ca17822..a73b4c825b11c 100644 --- a/vllm/engine/multiprocessing/engine.py +++ b/vllm/engine/multiprocessing/engine.py @@ -5,6 +5,7 @@ import cloudpickle import zmq +from ray.exceptions import RayTaskError from vllm import AsyncEngineArgs, SamplingParams # yapf conflicts with isort for this block @@ -305,6 +306,11 @@ def _health_check(self): def _send_outputs(self, outputs: REQUEST_OUTPUTS_T): """Send List of RequestOutput to RPCClient.""" if outputs: + # RayTaskError might not pickelable here. We need to unpack the + # underlying exception as the real exception in the output. + if (isinstance(outputs, RPCError) + and isinstance(outputs.exception, RayTaskError)): + outputs.exception = outputs.exception.cause output_bytes = pickle.dumps(outputs) self.output_socket.send_multipart((output_bytes, ), copy=False)