[TRTLLM-7183][test] Feature fix model issue for disagg serving (NVIDIA#7785)

fredricz-20070104 · web-flow · commit fc4e6d37022d · 2025-09-19T10:12:55.000+08:00
Signed-off-by: FredricZ-2007 &lt;226039983+fredricz-20070104@users.noreply.github.com&gt;
diff --git a/tests/integration/defs/perf/test_perf.py b/tests/integration/defs/perf/test_perf.py
@@ -2007,7 +2007,7 @@ def _get_disagg_client_command(self):
             f'{self._working_dir}/server_config.yaml', '-p',
             f'{client_dir}/prompts.json', '--ignore-eos',
             '--server-start-timeout',
-            str(1800)
+            str(3600)
         ]
         return client_cmd
 
diff --git a/tests/integration/defs/perf/utils.py b/tests/integration/defs/perf/utils.py
@@ -19,11 +19,13 @@
 import os
 import re
 import subprocess
+import time
 from datetime import datetime
 from enum import Enum
 from pathlib import Path
 from typing import Dict, List, NamedTuple, Optional
 
+import requests
 from _pytest.nodes import Item
 from _pytest.python import Function
 from defs.trt_test_alternative import (check_output, popen, print_error,
@@ -316,6 +318,19 @@ class PerfDisaggScriptTestCmds(NamedTuple):
     client_cmd: List[str]
     benchmark_cmd: List[str]
 
+    def wait_for_endpoint_ready(self, url: str, timeout: int = 600):
+        start = time.monotonic()
+        while time.monotonic() - start < timeout:
+            try:
+                time.sleep(1)
+                if requests.get(url).status_code == 200:
+                    print(f"endpoint {url} is ready")
+                    return
+            except Exception as err:
+                print(f"endpoint {url} is not ready, with exception: {err}")
+        print_error(
+            f"Endpoint {url} did not become ready within {timeout} seconds")
+
     def run_cmd(self, cmd_idx: int, venv) -> str:
         output = ""
         try:
@@ -340,6 +355,9 @@ def run_cmd(self, cmd_idx: int, venv) -> str:
                           stderr=subprocess.STDOUT,
                           env=venv._new_env,
                           shell=True) as server_proc):
+                self.wait_for_endpoint_ready(
+                    f"http://localhost:8000/health",
+                    timeout=1800)  # 30 minutes for large models
                 check_output(self.client_cmd, env=venv._new_env)
                 output += check_output(self.benchmark_cmd, env=venv._new_env)
         finally:
diff --git a/tests/integration/test_lists/qa/llm_perf_sanity.yml b/tests/integration/test_lists/qa/llm_perf_sanity.yml
@@ -222,5 +222,5 @@ llm_perf_sanity:
   - perf/test_perf.py::test_perf[llama_v4_maverick_17b_128e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.6-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8]
   - perf/test_perf.py::test_perf[llama_v4_scout_17b_16e_instruct_fp8-bench-pytorch-float8-maxbs:1024-maxnt:20000-kv_frac:0.85-input_output_len:20000,2000-reqs:1000-ep:8-tp:8-gpus:8] TIMEOUT(100)
   - perf/test_perf.py::test_perf[qwen3_235b_a22b_fp8-bench-pytorch-float8-input_output_len:1000,2000-con:256-ep:8-gpus:8] TIMEOUT(60)
-  - perf/test_perf.py::test_perf[deepseek_v3_lite-disagg_server-ctx_dp:4-gen_tp:4]
-  - perf/test_perf.py::test_perf[llama_v3.1_70b-disagg_server-ctx_dp:4-gen_tp:4]
+  - perf/test_perf.py::test_perf[deepseek_v3_lite_fp8-disagg_server-ctx_dp:4-gen_tp:4]
+  - perf/test_perf.py::test_perf[llama_v3.1_8b-disagg_server-ctx_dp:4-gen_tp:4]

Original file line number	Diff line number	Diff line change
`@@ -2007,7 +2007,7 @@ def _get_disagg_client_command(self):`
`2007`	`2007`	`f'{self._working_dir}/server_config.yaml', '-p',`
`2008`	`2008`	`f'{client_dir}/prompts.json', '--ignore-eos',`
`2009`	`2009`	`'--server-start-timeout',`
`2010`		`- str(1800)`
	`2010`	`+ str(3600)`
`2011`	`2011`	`]`
`2012`	`2012`	`return client_cmd`
`2013`	`2013`