From b3a181f8eadf95f9e023c86e4eaa6ae8eeab2fbf Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 19 Jul 2024 11:55:13 +0800 Subject: [PATCH] [Bugfix][Frontend] Fix missing `/metrics` endpoint (#6463) --- tests/entrypoints/openai/test_basic.py | 61 ++++++++++++++++++++++++++ vllm/entrypoints/openai/api_server.py | 14 +++--- 2 files changed, 70 insertions(+), 5 deletions(-) create mode 100644 tests/entrypoints/openai/test_basic.py diff --git a/tests/entrypoints/openai/test_basic.py b/tests/entrypoints/openai/test_basic.py new file mode 100644 index 0000000000000..2c721d9ba7609 --- /dev/null +++ b/tests/entrypoints/openai/test_basic.py @@ -0,0 +1,61 @@ +from http import HTTPStatus + +import openai +import pytest +import requests + +from vllm.version import __version__ as VLLM_VERSION + +from ...utils import RemoteOpenAIServer + +MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" + + +@pytest.fixture(scope="module") +def server(): + args = [ + # use half precision for speed and memory savings in CI environment + "--dtype", + "bfloat16", + "--max-model-len", + "8192", + "--enforce-eager", + "--max-num-seqs", + "128", + ] + + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="module") +def client(server): + return server.get_async_client() + + +@pytest.mark.asyncio +async def test_show_version(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/version") + response.raise_for_status() + + assert response.json() == {"version": VLLM_VERSION} + + +@pytest.mark.asyncio +async def test_check_health(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/health") + + assert response.status_code == HTTPStatus.OK + + +@pytest.mark.asyncio +async def test_log_metrics(client: openai.AsyncOpenAI): + base_url = str(client.base_url)[:-3].strip("/") + + response = requests.get(base_url + "/metrics") + + assert response.status_code == HTTPStatus.OK diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b6bf08e5fae60..421d398d32f25 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -73,11 +73,13 @@ async def _force_log(): router = APIRouter() -# Add prometheus asgi middleware to route /metrics requests -route = Mount("/metrics", make_asgi_app()) -# Workaround for 307 Redirect for /metrics -route.path_regex = re.compile('^/metrics(?P.*)$') -router.routes.append(route) + +def mount_metrics(app: fastapi.FastAPI): + # Add prometheus asgi middleware to route /metrics requests + metrics_route = Mount("/metrics", make_asgi_app()) + # Workaround for 307 Redirect for /metrics + metrics_route.path_regex = re.compile('^/metrics(?P.*)$') + app.routes.append(metrics_route) @router.get("/health") @@ -167,6 +169,8 @@ def build_app(args): app.include_router(router) app.root_path = args.root_path + mount_metrics(app) + app.add_middleware( CORSMiddleware, allow_origins=args.allowed_origins,