From 62fd8d7c9423bbc0fcff75f65ca8e33940070344 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Sat, 14 Sep 2024 21:53:25 +0000 Subject: [PATCH 1/7] Bind remote server to port 0 --- tests/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index f6c2be17ebdcf..d3a51f095e57a 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -76,9 +76,7 @@ def __init__(self, "when `auto_port=True`.") # Don't mutate the input args - vllm_serve_args = vllm_serve_args + [ - "--port", str(get_open_port()) - ] + vllm_serve_args = vllm_serve_args + ["--port", str(0)] parser = FlexibleArgumentParser( description="vLLM's remote OpenAI server.") From 42b9a1e8066c685d4f4da0c4fe5f5ec48e769c0e Mon Sep 17 00:00:00 2001 From: kevin314 Date: Sat, 14 Sep 2024 23:53:41 -0500 Subject: [PATCH 2/7] Revert "Bind remote server to port 0" This reverts commit 3bda16ee59daf3d9a637248673dac7cd90a8a97e. --- tests/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index d3a51f095e57a..f6c2be17ebdcf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -76,7 +76,9 @@ def __init__(self, "when `auto_port=True`.") # Don't mutate the input args - vllm_serve_args = vllm_serve_args + ["--port", str(0)] + vllm_serve_args = vllm_serve_args + [ + "--port", str(get_open_port()) + ] parser = FlexibleArgumentParser( description="vLLM's remote OpenAI server.") From 2bac8b1f7c96bc88934a06c2f0da5d7e1af7fa72 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Sun, 15 Sep 2024 00:09:36 -0500 Subject: [PATCH 3/7] Set VLLM_PORT env variable --- tests/utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index f6c2be17ebdcf..41ee0ca97aecc 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -179,6 +179,7 @@ def compare_two_settings(model: str, env1: The first set of environment variables to pass to the API server. env2: The second set of environment variables to pass to the API server. """ + os.environ["VLLM_PORT"] = "8001" trust_remote_code = "--trust-remote-code" if trust_remote_code in arg1 or trust_remote_code in arg2: @@ -298,6 +299,8 @@ def compare_two_settings(model: str, "texts": texts, }) + os.environ.pop("VLLM_PORT") + n = len(results) // 2 arg1_results = results[:n] arg2_results = results[n:] @@ -491,6 +494,7 @@ async def completions_with_server_args( Returns: OpenAI Completion instance ''' + os.environ["VLLM_PORT"] = "8001" outputs = None with RemoteOpenAIServer(model_name, @@ -503,6 +507,8 @@ async def completions_with_server_args( stream=False, max_tokens=5, logprobs=num_logprobs) + os.environ.pop("VLLM_PORT") + assert outputs is not None return outputs From 9d4a07c547d794d3bfaa29c0ab87cd010ce6e261 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Sun, 15 Sep 2024 00:14:39 -0500 Subject: [PATCH 4/7] Run linter --- tests/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/utils.py b/tests/utils.py index 41ee0ca97aecc..a21009bd0f04f 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -508,7 +508,7 @@ async def completions_with_server_args( max_tokens=5, logprobs=num_logprobs) os.environ.pop("VLLM_PORT") - + assert outputs is not None return outputs From 103957f04f9af6861d14d7e7739f3238376f9b46 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Sun, 15 Sep 2024 01:01:30 -0500 Subject: [PATCH 5/7] Trigger Build From 4558420f052535e06940ae6fe476a5cab138e1c9 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Mon, 16 Sep 2024 04:22:03 +0000 Subject: [PATCH 6/7] Revert "Set VLLM_PORT env variable" --- tests/utils.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/utils.py b/tests/utils.py index a21009bd0f04f..f6c2be17ebdcf 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -179,7 +179,6 @@ def compare_two_settings(model: str, env1: The first set of environment variables to pass to the API server. env2: The second set of environment variables to pass to the API server. """ - os.environ["VLLM_PORT"] = "8001" trust_remote_code = "--trust-remote-code" if trust_remote_code in arg1 or trust_remote_code in arg2: @@ -299,8 +298,6 @@ def compare_two_settings(model: str, "texts": texts, }) - os.environ.pop("VLLM_PORT") - n = len(results) // 2 arg1_results = results[:n] arg2_results = results[n:] @@ -494,7 +491,6 @@ async def completions_with_server_args( Returns: OpenAI Completion instance ''' - os.environ["VLLM_PORT"] = "8001" outputs = None with RemoteOpenAIServer(model_name, @@ -507,8 +503,6 @@ async def completions_with_server_args( stream=False, max_tokens=5, logprobs=num_logprobs) - os.environ.pop("VLLM_PORT") - assert outputs is not None return outputs From 44aea07906cf258722267f70d0075e3093b0d8d7 Mon Sep 17 00:00:00 2001 From: kevin314 Date: Mon, 16 Sep 2024 04:44:10 +0000 Subject: [PATCH 7/7] Bind api server port before starting engine --- vllm/entrypoints/openai/api_server.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b50fc6a265f8d..3d1d832986c1e 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -5,6 +5,7 @@ import os import re import signal +import socket import tempfile from argparse import Namespace from contextlib import asynccontextmanager @@ -525,6 +526,9 @@ async def run_server(args, **uvicorn_kwargs) -> None: logger.info("vLLM API server version %s", VLLM_VERSION) logger.info("args: %s", args) + temp_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + temp_socket.bind(("", args.port)) + def signal_handler(*_) -> None: # Interrupt server on sigterm while initializing raise KeyboardInterrupt("terminated") @@ -541,6 +545,8 @@ def signal_handler(*_) -> None: model_config = await async_engine_client.get_model_config() init_app_state(async_engine_client, model_config, app.state, args) + temp_socket.close() + shutdown_task = await serve_http( app, limit_concurrency=async_engine_client.limit_concurrency,