From fc386c4c94af0843d288992bb01cdbd1c5f3ddd6 Mon Sep 17 00:00:00 2001 From: Joe Runde Date: Fri, 26 Jul 2024 09:41:11 -0600 Subject: [PATCH] :art: format Signed-off-by: Joe Runde --- vllm/entrypoints/openai/api_server.py | 2 ++ vllm/entrypoints/openai/cli_args.py | 11 +++++------ 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 06b710bc4a6f1..f0b3f90e8ae79 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -201,6 +201,7 @@ async def runtime_error_handler(_, __): # In this case we cannot await the server shutdown here because # this handler must first return to close the connection for # this request. + global server server.should_exit = True return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR) @@ -212,6 +213,7 @@ async def engine_dead_handler(_, __): if not args.keep_alive_on_engine_death: logger.fatal("AsyncLLMEngine is already dead, terminating server " "process") + global server server.should_exit = True return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR) diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index b9cd9e8bb4bdc..4080281b950c9 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -134,12 +134,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help="When --max-logprobs is specified, represents single tokens as" "strings of the form 'token_id:{token_id}' so that tokens that" "are not JSON-encodable can be identified.") - parser.add_argument( - "--keep-alive-on-engine-death", - action="store_true", - help="The default behavior is to stop the server " - "process when the LLM engine dies. Set this flag to " - "keep the server up instead.") + parser.add_argument("--keep-alive-on-engine-death", + action="store_true", + help="The default behavior is to stop the server " + "process when the LLM engine dies. Set this flag to " + "keep the server up instead.") parser = AsyncEngineArgs.add_cli_args(parser)