Skip to content

Commit

Permalink
♻️ shutdown server programatically
Browse files Browse the repository at this point in the history
Signed-off-by: Joe Runde <[email protected]>
  • Loading branch information
joerunde committed Jul 22, 2024
1 parent befd693 commit 99e54d3
Showing 1 changed file with 27 additions and 13 deletions.
40 changes: 27 additions & 13 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import asyncio
import contextlib
import importlib
import inspect
import os
import re
import signal
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import Optional, Set
Expand Down Expand Up @@ -45,6 +44,7 @@

TIMEOUT_KEEP_ALIVE = 5 # seconds

server: uvicorn.Server
engine: AsyncLLMEngine
engine_args: AsyncEngineArgs
openai_serving_chat: OpenAIServingChat
Expand Down Expand Up @@ -196,7 +196,12 @@ async def runtime_error_handler(_, __):
and not engine.is_running):
logger.fatal("AsyncLLMEngine has failed, terminating server "
"process")
os.kill(os.getpid(), signal.SIGTERM)
# See discussions here on shutting down a uvicorn server
# https://github.com/encode/uvicorn/discussions/1103
# In this case we cannot await the server shutdown here because
# this handler must first return to close the connection for
# this request.
server.should_exit = True

return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)

Expand All @@ -207,7 +212,7 @@ async def engine_dead_handler(_, __):
if not args.keep_alive_on_engine_death:
logger.fatal("AsyncLLMEngine is already dead, terminating server "
"process")
os.kill(os.getpid(), signal.SIGTERM)
server.should_exit = True

return Response(status_code=HTTPStatus.INTERNAL_SERVER_ERROR)

Expand Down Expand Up @@ -322,15 +327,24 @@ def run_server(args, llm_engine=None):
methods = ', '.join(route.methods)
logger.info("Route: %s, Methods: %s", route.path, methods)

uvicorn.run(app,
host=args.host,
port=args.port,
log_level=args.uvicorn_log_level,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs)
# Configure and build the uvicorn server
# See `uvicorn.run()` for reference
uvicorn_config = uvicorn.Config(app,
host=args.host,
port=args.port,
log_level=args.uvicorn_log_level,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs)

global server
server = uvicorn.Server(config=uvicorn_config)

# Run the server and block until it exits
with contextlib.suppress(KeyboardInterrupt):
server.run()


if __name__ == "__main__":
Expand Down

0 comments on commit 99e54d3

Please sign in to comment.