Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

api: better startup failure UX #881

Merged
merged 1 commit into from
Dec 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions aphrodite/endpoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from contextlib import asynccontextmanager
from distutils.util import strtobool
from http import HTTPStatus
from typing import AsyncGenerator, AsyncIterator, List, Set, Tuple
from typing import AsyncGenerator, AsyncIterator, List, Optional, Set, Tuple

from fastapi import APIRouter, FastAPI, Request
from fastapi.exceptions import RequestValidationError
Expand Down Expand Up @@ -108,7 +108,14 @@ async def _force_log():


@asynccontextmanager
async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
async def build_async_engine_client(
args: Namespace) -> AsyncIterator[Optional[AsyncEngineClient]]:
"""
Create AsyncEngineClient, either:
- in-process using the AsyncAphrodite Directly
- multiprocess using AsyncAphrodite RPC
Returns the Client or None if the creation failed.
"""
# Context manager to handle async_engine_client lifecycle
# Ensures everything is shutdown and cleaned up on error/exit
global engine_args
Expand Down Expand Up @@ -168,11 +175,13 @@ async def build_async_engine_client(args) -> AsyncIterator[AsyncEngineClient]:
try:
await async_engine_client.setup()
break
except TimeoutError as e:
except TimeoutError:
if not rpc_server_process.is_alive():
raise RuntimeError(
"The server process died before "
"responding to the readiness probe") from e
logger.error(
"RPCServer process died before responding "
"to readiness probe")
yield None
return
yield async_engine_client
finally:
# Ensure rpc server process was terminated
Expand Down Expand Up @@ -759,6 +768,9 @@ async def init_app(
async def run_server(args, **uvicorn_kwargs) -> None:

async with build_async_engine_client(args) as async_engine_client:
# If None, creation of the client failed and we exit.
if async_engine_client is None:
return
app = await init_app(async_engine_client, args)

protocol = "https" if args.ssl_certfile else "http"
Expand Down
2 changes: 1 addition & 1 deletion aphrodite/server/launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ async def dummy_shutdown() -> None:
port = uvicorn_kwargs["port"]
process = find_process_using_port(port)
if process is not None:
logger.debug(
logger.info(
f"port {port} is used by process {process} launched with "
f"command:\n{' '.join(process.cmdline())}")
logger.info("Gracefully stopping http server")
Expand Down
28 changes: 15 additions & 13 deletions tests/endpoints/openai/test_mp_api_server.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import time

import pytest

from aphrodite.common.utils import FlexibleArgumentParser
Expand All @@ -8,19 +10,19 @@
@pytest.mark.asyncio
async def test_mp_crash_detection():

with pytest.raises(RuntimeError) as excinfo:
parser = FlexibleArgumentParser(
description="Aphrodite's remote OpenAI server.")
parser = make_arg_parser(parser)
args = parser.parse_args([])
# use an invalid tensor_parallel_size to trigger the
# error in the server
args.tensor_parallel_size = 65536

async with build_async_engine_client(args):
pass
assert "The server process died before responding to the readiness probe"\
in str(excinfo.value)
parser = FlexibleArgumentParser(
description="Aphrodite's remote OpenAI server.")
parser = make_arg_parser(parser)
args = parser.parse_args([])
# use an invalid tensor_parallel_size to trigger the
# error in the server
args.tensor_parallel_size = 65536
start = time.perf_counter()
async with build_async_engine_client(args):
pass
end = time.perf_counter()
assert end - start < 60, ("Expected Aphrodite to gracefully shutdown in "
"<60s if there is an error in the startup.")


@pytest.mark.asyncio
Expand Down
Loading