Skip to content

Commit fc4e6d3

Browse files
[TRTLLM-7183][test] Feature fix model issue for disagg serving (NVIDIA#7785)
Signed-off-by: FredricZ-2007 <[email protected]>
1 parent c98b946 commit fc4e6d3

File tree

3 files changed

+21
-3
lines changed

3 files changed

+21
-3
lines changed

tests/integration/defs/perf/test_perf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2007,7 +2007,7 @@ def _get_disagg_client_command(self):
20072007
f'{self._working_dir}/server_config.yaml', '-p',
20082008
f'{client_dir}/prompts.json', '--ignore-eos',
20092009
'--server-start-timeout',
2010-
str(1800)
2010+
str(3600)
20112011
]
20122012
return client_cmd
20132013

tests/integration/defs/perf/utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919
import os
2020
import re
2121
import subprocess
22+
import time
2223
from datetime import datetime
2324
from enum import Enum
2425
from pathlib import Path
2526
from typing import Dict, List, NamedTuple, Optional
2627

28+
import requests
2729
from _pytest.nodes import Item
2830
from _pytest.python import Function
2931
from defs.trt_test_alternative import (check_output, popen, print_error,
@@ -316,6 +318,19 @@ class PerfDisaggScriptTestCmds(NamedTuple):
316318
client_cmd: List[str]
317319
benchmark_cmd: List[str]
318320

321+
def wait_for_endpoint_ready(self, url: str, timeout: int = 600):
322+
start = time.monotonic()
323+
while time.monotonic() - start < timeout:
324+
try:
325+
time.sleep(1)
326+
if requests.get(url).status_code == 200:
327+
print(f"endpoint {url} is ready")
328+
return
329+
except Exception as err:
330+
print(f"endpoint {url} is not ready, with exception: {err}")
331+
print_error(
332+
f"Endpoint {url} did not become ready within {timeout} seconds")
333+
319334
def run_cmd(self, cmd_idx: int, venv) -> str:
320335
output = ""
321336
try:
@@ -340,6 +355,9 @@ def run_cmd(self, cmd_idx: int, venv) -> str:
340355
stderr=subprocess.STDOUT,
341356
env=venv._new_env,
342357
shell=True) as server_proc):
358+
self.wait_for_endpoint_ready(
359+
f"http://localhost:8000/health",
360+
timeout=1800) # 30 minutes for large models
343361
check_output(self.client_cmd, env=venv._new_env)
344362
output += check_output(self.benchmark_cmd, env=venv._new_env)
345363
finally:

tests/integration/test_lists/qa/llm_perf_sanity.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -222,5 +222,5 @@ llm_perf_sanity:
222222
- perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
223223
- perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.85-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8] TIMEOUT(100)
224224
- perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
225-
- perf/test_perf.py::test_perf[deepseek_v3_lite-disagg_server-ctx_dp:4-gen_tp:4]
226-
- perf/test_perf.py::test_perf[llama_v3.1_70b-disagg_server-ctx_dp:4-gen_tp:4]
225+
- perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-disagg_server-ctx_dp:4-gen_tp:4]
226+
- perf/test_perf.py::test_perf[llama_v3.1_8b-disagg_server-ctx_dp:4-gen_tp:4]

0 commit comments

Comments
 (0)