Skip to content

Commit cb560fb

Browse files
committed
add qwen3 235b eagle3 ci
Signed-off-by: bhsueh <[email protected]>
1 parent 2fe9cc0 commit cb560fb

File tree

4 files changed

+32
-7
lines changed

4 files changed

+32
-7
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,10 @@ Qwen3/Qwen3-235B-A22B:
8686
- quant_algo: NVFP4
8787
kv_cache_quant_algo: FP8
8888
accuracy: 85.78
89+
- spec_dec_algo: Eagle
90+
quant_algo: NVFP4
91+
kv_cache_quant_algo: FP8
92+
accuracy: 85.78
8993
nvidia/Llama-3_3-Nemotron-Super-49B-v1:
9094
- accuracy: 92.57
9195
- quant_algo: FP8

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,10 @@ Qwen3/Qwen3-235B-A22B:
170170
- quant_algo: NVFP4
171171
kv_cache_quant_algo: FP8
172172
accuracy: 86
173+
- spec_dec_algo: Eagle
174+
quant_algo: NVFP4
175+
kv_cache_quant_algo: FP8
176+
accuracy: 86
173177
nvidia/Llama-3_3-Nemotron-Super-49B-v1:
174178
- accuracy: 79.43
175179
- quant_algo: FP8

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 23 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1888,28 +1888,44 @@ def test_fp8(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
18881888
@skip_pre_blackwell
18891889
@pytest.mark.skip_less_mpi_world_size(8)
18901890
@pytest.mark.parametrize(
1891-
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler,moe_backend",
1892-
[(8, 1, 8, True, True, True, "CUTLASS"),
1893-
(8, 1, 8, True, True, True, "TRTLLM")],
1894-
ids=["latency_moe_cutlass", "latency_moe_trtllm"],
1891+
"tp_size,pp_size,ep_size,attention_dp,cuda_graph,overlap_scheduler,moe_backend,eagle3",
1892+
[
1893+
(8, 1, 8, True, True, True, "CUTLASS", False),
1894+
(8, 1, 8, True, True, True, "TRTLLM", False),
1895+
(8, 1, 8, False, False, False, "TRTLLM", True),
1896+
],
1897+
ids=[
1898+
"latency_moe_cutlass", "latency_moe_trtllm",
1899+
"latency_moe_trtllm_eagle3"
1900+
],
18951901
)
18961902
def test_nvfp4(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
1897-
overlap_scheduler, moe_backend):
1903+
overlap_scheduler, moe_backend, eagle3):
18981904

18991905
pytorch_config = dict(
19001906
disable_overlap_scheduler=not overlap_scheduler,
19011907
cuda_graph_config=CudaGraphConfig() if cuda_graph else None,
19021908
moe_config=MoeConfig(backend=moe_backend))
19031909

1904-
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4)
1910+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.4,
1911+
enable_block_reuse=not eagle3)
1912+
spec_config = None
1913+
if eagle3:
1914+
spec_config = EagleDecodingConfig(
1915+
max_draft_len=2,
1916+
speculative_model_dir=
1917+
f"{llm_models_root()}/Qwen3/qwen3-235B-eagle3/",
1918+
eagle3_one_model=not eagle3)
19051919
with LLM(
19061920
f"{llm_models_root()}/Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf",
19071921
tensor_parallel_size=tp_size,
19081922
pipeline_parallel_size=pp_size,
19091923
moe_expert_parallel_size=ep_size,
19101924
**pytorch_config,
19111925
enable_attention_dp=attention_dp,
1912-
kv_cache_config=kv_cache_config) as llm:
1926+
kv_cache_config=kv_cache_config,
1927+
speculative_config=spec_config) as llm:
1928+
19131929
task = MMLU(self.MODEL_NAME)
19141930
task.evaluate(llm)
19151931
task = GSM8K(self.MODEL_NAME)

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,4 @@ l0_gb200_multi_nodes:
1818
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] TIMEOUT (180)
1919
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (180)
2020
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (180)
21+
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3] TIMEOUT (180)

0 commit comments

Comments
 (0)