Skip to content

Commit

Permalink
🐛 add backwards compatibility
Browse files Browse the repository at this point in the history
Signed-off-by: Joe Runde <[email protected]>
  • Loading branch information
joerunde committed Aug 9, 2024
1 parent 49f738c commit a7d89dc
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 31 deletions.
24 changes: 6 additions & 18 deletions src/vllm_tgis_adapter/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import asyncio
import contextlib
import signal
from concurrent.futures import FIRST_COMPLETED
from typing import TYPE_CHECKING

Expand Down Expand Up @@ -35,6 +34,7 @@ async def start_servers(args: argparse.Namespace) -> None:
run_http_server(args, engine),
name="http_server",
)
# The http server task will catch interrupt signals for us
tasks.append(http_server_task)

grpc_server_task = loop.create_task(
Expand All @@ -43,29 +43,17 @@ async def start_servers(args: argparse.Namespace) -> None:
)
tasks.append(grpc_server_task)

def signal_handler() -> None:
# prevents the uvicorn signal handler to exit early
for task in tasks:
task.cancel()

async def override_signal_handler() -> None:
loop = asyncio.get_running_loop()

for sig in (signal.SIGINT, signal.SIGTERM):
loop.add_signal_handler(sig, signal_handler)

await override_signal_handler()

with contextlib.suppress(asyncio.CancelledError):
# Both server tasks will exit normally on shutdown, so we await
# FIRST_COMPLETED to catch either one shutting down.
await asyncio.wait(
tasks,
return_when=FIRST_COMPLETED,
)
await asyncio.wait(tasks, return_when=FIRST_COMPLETED)
# Once either server shuts down, cancel the other
for task in tasks:
task.cancel()

# Final wait for both servers to finish
await asyncio.wait(tasks)

check_for_failed_tasks(tasks)


Expand Down
35 changes: 22 additions & 13 deletions src/vllm_tgis_adapter/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,25 @@ async def run_http_server(

app = await init_app(engine, args) # type: ignore[arg-type]

await serve_http(
app,
engine,
host=args.host,
port=args.port,
log_level=args.uvicorn_log_level,
timeout_keep_alive=TIMEOUT_KEEP_ALIVE,
ssl_keyfile=args.ssl_keyfile,
ssl_certfile=args.ssl_certfile,
ssl_ca_certs=args.ssl_ca_certs,
ssl_cert_reqs=args.ssl_cert_reqs,
**uvicorn_kwargs,
)
serve_kwargs = {
"host": args.host,
"port": args.port,
"log_level": args.uvicorn_log_level,
"timeout_keep_alive": TIMEOUT_KEEP_ALIVE,
"ssl_keyfile": args.ssl_keyfile,
"ssl_certfile": args.ssl_certfile,
"ssl_ca_certs": args.ssl_ca_certs,
"ssl_cert_reqs": args.ssl_cert_reqs,
}
serve_kwargs.update(uvicorn_kwargs)

try:
shutdown_coro = await serve_http(app, engine, **serve_kwargs)
except TypeError:
# vllm 0.5.4 backwards compatibility
# HTTP server will not shut itself down when the engine dies
shutdown_coro = await serve_http(app, **serve_kwargs)

# launcher.serve_http returns a shutdown coroutine to await
# (The double await is intentional)
await shutdown_coro

0 comments on commit a7d89dc

Please sign in to comment.