Skip to content

Commit ab18622

Browse files
committed
add qwen3 eagle test
Signed-off-by: Ivy Zhang <[email protected]>
1 parent e1e1f22 commit ab18622

File tree

5 files changed

+29
-9
lines changed

5 files changed

+29
-9
lines changed

tests/integration/defs/accuracy/references/gpqa_diamond.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
meta-llama/Llama-3.3-70B-Instruct:
22
- accuracy: 45.96
3-
- spec_dec_algo: Eagle
4-
accuracy: 45.96
53
- quant_algo: NVFP4
64
kv_cache_quant_algo: FP8
75
accuracy: 45.55

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ meta-llama/Llama-3.1-8B-Instruct:
99
accuracy: 72.85
1010
meta-llama/Llama-3.3-70B-Instruct:
1111
- accuracy: 83.78
12-
- spec_dec_algo: Eagle
13-
accuracy: 83.78
1412
- quant_algo: NVFP4
1513
kv_cache_quant_algo: FP8
1614
accuracy: 75.61

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,29 @@ def test_fp8_beam_search(self):
229229
sampling_params=sampling_params,
230230
extra_acc_spec="beam_width=4")
231231

232+
@skip_pre_hopper
233+
def test_eagle3(self):
234+
pytorch_config = dict(
235+
disable_overlap_scheduler=True,
236+
cuda_graph_config=CudaGraphConfig(batch_sizes=[1]),
237+
)
238+
kv_cache_config = KvCacheConfig(enable_block_reuse=False)
239+
240+
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.1-Instruct-8B"
241+
target_model_dir = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
242+
243+
draft_len = 4
244+
spec_config = EagleDecodingConfig(max_draft_len=draft_len,
245+
speculative_model_dir=eagle_model_dir)
246+
247+
with LLM(model=target_model_dir,
248+
**pytorch_config,
249+
kv_cache_config=kv_cache_config,
250+
speculative_config=spec_config,
251+
build_config=None) as llm:
252+
task = MMLU(self.MODEL_NAME)
253+
task.evaluate(llm)
254+
232255
@skip_pre_hopper
233256
def test_ngram(self):
234257
pytorch_config = dict(
@@ -341,7 +364,7 @@ def test_auto_dtype_tp8(self):
341364
extra_evaluator_kwargs=dict(apply_chat_template=True))
342365

343366
@pytest.mark.skip_less_mpi_world_size(8)
344-
@pytest.mark.parametrize("eagle3_one_model", [True, False])
367+
@parametrize_with_ids("eagle3_one_model", [True, False])
345368
def test_eagle3_tp8(self, eagle3_one_model):
346369
model_path = f"{llm_models_root()}/llama-3.3-models/Llama-3.3-70B-Instruct"
347370
eagle_model_dir = f"{llm_models_root()}/EAGLE3-LLaMA3.3-Instruct-70B"

tests/integration/defs/examples/test_llama.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4069,8 +4069,7 @@ def test_llm_api_lookahead_decoding_1gpu(model_name, model_path):
40694069
"""
40704070
from defs.conftest import llm_models_root
40714071

4072-
from tensorrt_llm._tensorrt_engine import LLM
4073-
from tensorrt_llm.llmapi import (BuildConfig, KvCacheConfig,
4072+
from tensorrt_llm.llmapi import (LLM, BuildConfig, KvCacheConfig,
40744073
LookaheadDecodingConfig, SamplingParams)
40754074
build_config = BuildConfig(max_batch_size=128,
40764075
max_input_len=2048,

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,15 +441,16 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[
441441
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_chunked_prefill[attn_backend=TRTLLM]
442442
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_llm_sampler
443443
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_fp8_beam_search
444+
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_eagle3
444445
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_ngram
445446
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[xgrammar]
446447
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]
447448
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[xgrammar]
448449
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[llguidance]
449450
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
450451
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
451-
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[one_model]
452-
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[two_model]
452+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[eagle3_one_model=True]
453+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[eagle3_one_model=False]
453454
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
454455
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
455456
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
@@ -497,6 +498,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[throughput_laten
497498
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_fp8[latency]
498499
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass]
499500
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm]
501+
accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3]
500502
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
501503
accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_auto_dtype
502504
accuracy/test_llm_api_pytorch.py::TestBielik11BInstruct::test_fp8

0 commit comments

Comments
 (0)