From fc386c4c94af0843d288992bb01cdbd1c5f3ddd6 Mon Sep 17 00:00:00 2001
From: Joe Runde <Joseph.Runde@ibm.com>
Date: Fri, 26 Jul 2024 09:41:11 -0600
Subject: [PATCH] :art: format

Signed-off-by: Joe Runde <Joseph.Runde@ibm.com>
---
 vllm/entrypoints/openai/api_server.py |  2 ++
 vllm/entrypoints/openai/cli_args.py   | 11 +++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py
index 06b710bc4a6f1..f0b3f90e8ae79 100644
--- a/vllm/entrypoints/openai/api_server.py
+++ b/vllm/entrypoints/openai/api_server.py
@@ -201,6 +201,7 @@ async def runtime_error_handler(_, __):
             # In this case we cannot await the server shutdown here because
             # this handler must first return to close the connection for
             # this request.
+            global server
             server.should_exit = True
 
         return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
@@ -212,6 +213,7 @@ async def engine_dead_handler(_, __):
         if not args.keep_alive_on_engine_death:
             logger.fatal("AsyncLLMEngine is already dead, terminating server "
                          "process")
+            global server
             server.should_exit = True
 
         return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)
diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py
index b9cd9e8bb4bdc..4080281b950c9 100644
--- a/vllm/entrypoints/openai/cli_args.py
+++ b/vllm/entrypoints/openai/cli_args.py
@@ -134,12 +134,11 @@ def make_arg_parser(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
         help="When --max-logprobs is specified, represents single tokens as"
         "strings of the form 'token_id:{token_id}' so that tokens that"
         "are not JSON-encodable can be identified.")
-    parser.add_argument(
-        "--keep-alive-on-engine-death",
-        action="store_true",
-        help="The default behavior is to stop the server "
-        "process when the LLM engine dies. Set this flag to "
-        "keep the server up instead.")
+    parser.add_argument("--keep-alive-on-engine-death",
+                        action="store_true",
+                        help="The default behavior is to stop the server "
+                        "process when the LLM engine dies. Set this flag to "
+                        "keep the server up instead.")
 
     parser = AsyncEngineArgs.add_cli_args(parser)