Skip to content

Commit b1dc84b

Browse files
[TRTLLM-7399][test] Add DS-R1/Qwen3 test cases for RTX 6000 (#7662)
Signed-off-by: Pamela <[email protected]> Signed-off-by: Pamela Peng <[email protected]> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
1 parent 48fda86 commit b1dc84b

File tree

3 files changed

+30
-0
lines changed

3 files changed

+30
-0
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3022,9 +3022,13 @@ def test_nvfp4(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
30223022
[
30233023
(4, 1, 4, False, False, False, "TRTLLM",
30243024
True), # TP8 has bug when we use TRTLLM moe backend and eagle3
3025+
(4, 1, 4, False, False, False, "CUTLASS", False),
3026+
(4, 1, 4, False, False, False, "CUTLASS", True),
30253027
],
30263028
ids=[
30273029
"latency_moe_trtllm_eagle3",
3030+
"latency_moe_cutlass",
3031+
"latency_moe_cutlass_eagle3",
30283032
],
30293033
)
30303034
def test_nvfp4_4gpus(self, tp_size, pp_size, ep_size, attention_dp,

tests/integration/defs/test_e2e.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,6 +1844,14 @@ def test_ptp_quickstart(llm_root, llm_venv):
18441844
pytest.param('Qwen3-30B-A3B',
18451845
'Qwen3/Qwen3-30B-A3B',
18461846
marks=pytest.mark.skip_less_device_memory(80000)),
1847+
pytest.param(
1848+
'Qwen3-30B-A3B_fp8_hf',
1849+
'Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf',
1850+
marks=(skip_pre_hopper, pytest.mark.skip_less_device_memory(40000))),
1851+
pytest.param(
1852+
'Qwen3-30B-A3B_nvfp4_hf',
1853+
'Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf',
1854+
marks=(skip_pre_blackwell, pytest.mark.skip_less_device_memory(20000))),
18471855
pytest.param('Llama3.3-70B-FP8',
18481856
'modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp8',
18491857
marks=skip_pre_blackwell),

tests/integration/test_lists/test-db/l0_rtx_pro_6000.yml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,12 @@ l0_rtx_pro_6000:
3030
- test_e2e.py::test_ptp_quickstart_advanced[Nemotron-Super-49B-v1-FP8-nemotron-nas/Llama-3_3-Nemotron-Super-49B-v1-FP8]
3131
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-NVFP4-nvfp4-quantized/Mixtral-8x7B-Instruct-v0.1]
3232
- test_e2e.py::test_ptp_quickstart_advanced[Mixtral-8x7B-FP8-Mixtral-8x7B-Instruct-v0.1-fp8]
33+
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B-Qwen3/Qwen3-30B-A3B] # 3mins
34+
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_fp8_hf-Qwen3/saved_models_Qwen3-30B-A3B_fp8_hf] # 3mins
35+
- test_e2e.py::test_ptp_quickstart_advanced[Qwen3-30B-A3B_nvfp4_hf-Qwen3/saved_models_Qwen3-30B-A3B_nvfp4_hf] # 2mins
36+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=False] # 8mins
37+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[latency_moe_cutlass-torch_compile=True] # 8 mins
38+
3339
- condition:
3440
ranges:
3541
system_gpu_count:
@@ -81,3 +87,15 @@ l0_rtx_pro_6000:
8187
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=CUTLASS-mtp_nextn=2-pp4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
8288
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
8389
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]
90+
# - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[False] # hopper only
91+
# - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_auto_dtype[True]
92+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=0]
93+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2]
94+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=0]
95+
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[llguidance-mtp_nextn=2]
96+
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_cutlass] # 20 mins
97+
# - accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_cutlass_eagle3] # failed
98+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=True]
99+
# - accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False] # failed
100+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_cutlass-torch_compile=False]
101+
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[tep4_latency_moe_cutlass-torch_compile=True]

0 commit comments

Comments
 (0)