Skip to content

Commit 42f7458

Browse files
committed
add llama4 maverick eagle3 accuracy case
Signed-off-by: Ivy Zhang <[email protected]>
1 parent 91376fe commit 42f7458

File tree

3 files changed

+31
-2
lines changed

3 files changed

+31
-2
lines changed

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ meta-llama/Llama-3.3-70B-Instruct:
6868
accuracy: 81.02
6969
meta-llama/Llama-4-Maverick-17B-128E-Instruct:
7070
- accuracy: 86.40
71+
- quant_algo: FP8
72+
kv_cache_quant_algo: FP8
73+
spec_dec_algo: Eagle3
74+
accuracy: 86.40
7175
meta-llama/Llama-4-Scout-17B-16E-Instruct:
7276
- accuracy: 80.00
7377
- quant_algo: NVFP4

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,30 @@ def test_chunked_prefill(self, attn_backend):
464464
task = MMLU(self.MODEL_NAME)
465465
task.evaluate(llm)
466466

467+
@skip_pre_hopper
468+
@pytest.mark.skip_less_mpi_world_size(8)
469+
@pytest.mark.parametrize("tp_size,pp_size,ep_size", [(8, 1, 1)],
470+
ids=["tp8"])
471+
def test_fp8_eagle3(self, cuda_graph, tp_size, pp_size, ep_size):
472+
model_path = f"{llm_models_root()}/llama4-models/nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8"
473+
eagle_model_dir = f"{llm_models_root()}/Llama-4-Maverick-17B-128E-Eagle3"
474+
spec_config = EagleDecodingConfig(max_draft_len=3,
475+
speculative_model_dir=eagle_model_dir)
476+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
477+
pytorch_config = dict(
478+
disable_overlap_scheduler=not cuda_graph,
479+
cuda_graph_config=CudaGraphConfig(max_batch_size=8),
480+
enable_attention_dp=False)
481+
with LLM(model_path,
482+
kv_cache_config=kv_cache_config,
483+
tensor_parallel_size=tp_size,
484+
pipeline_parallel_size=pp_size,
485+
moe_expert_parallel_size=ep_size,
486+
**pytorch_config,
487+
speculative_config=spec_config) as llm:
488+
task = MMLU(self.MODEL_NAME)
489+
task.evaluate(llm)
490+
467491

468492
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
469493
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -450,15 +450,16 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_
450450
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding_4gpus[llguidance]
451451
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
452452
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
453-
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3[one_model]
454-
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3[two_model]
453+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[one_model]
454+
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_eagle3_tp8[two_model]
455455
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
456456
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
457457
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
458458
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
459459
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]
460460
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=FLASHINFER]
461461
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_chunked_prefill[attn_backend=TRTLLM]
462+
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_fp8_eagle3[tp8]
462463
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8-cuda_graph=False]
463464
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
464465
accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]

0 commit comments

Comments
 (0)