diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 0e0ec311023eb..46c92e10b360c 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -544,7 +544,7 @@ async def run_server(args, **uvicorn_kwargs) -> None: # This avoids race conditions with ray. # see https://github.com/vllm-project/vllm/issues/8204 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.bind((args.host, args.port)) + sock.bind(("", args.port)) def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index f4dd9df9587ce..a089985ac9758 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -77,7 +77,7 @@ def __call__( def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parser.add_argument("--host", type=nullable_str, - default="0.0.0.0", + default=None, help="host name") parser.add_argument("--port", type=int, default=8000, help="port number") parser.add_argument(