From 2adb4409e0359039135b5aa6501994da12aa5a26 Mon Sep 17 00:00:00 2001 From: Yan Ma Date: Mon, 28 Oct 2024 15:13:03 +0800 Subject: [PATCH] [Bugfix] Fix ray instance detect issue (#9439) --- vllm/executor/ray_utils.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index aa546ebada473..993d279890820 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -232,9 +232,16 @@ def initialize_ray_cluster( # Connect to a ray cluster. if current_platform.is_rocm() or current_platform.is_xpu(): - ray.init(address=ray_address, - ignore_reinit_error=True, - num_gpus=parallel_config.world_size) + # Try to connect existing ray instance and create a new one if not found + try: + ray.init("auto") + except ConnectionError: + logger.warning( + "No existing RAY instance detected. " + "A new instance will be launched with current node resources.") + ray.init(address=ray_address, + ignore_reinit_error=True, + num_gpus=parallel_config.world_size) else: ray.init(address=ray_address, ignore_reinit_error=True)