@@ -408,7 +408,7 @@ def test_fp8_eagle3_tp8(self, eagle3_one_model):
408408 @pytest .mark .skip_less_device (4 )
409409 @skip_pre_hopper
410410 def test_fp8_tp4 (self ):
411- model_path = f"{ llm_models_root ()} /modelopt-hf-model-hub /Llama-3.3-70B-Instruct-fp8 "
411+ model_path = f"{ llm_models_root ()} /llama-3.3-models /Llama-3.3-70B-Instruct-FP8 "
412412 kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.5 )
413413 with LLM (model_path ,
414414 tensor_parallel_size = 4 ,
@@ -417,6 +417,7 @@ def test_fp8_tp4(self):
417417 kv_cache_config = kv_cache_config ) as llm :
418418 assert llm .args .quant_config .quant_algo == QuantAlgo .FP8
419419 sampling_params = SamplingParams (
420+ max_tokens = 256 ,
420421 temperature = 0.0 ,
421422 add_special_tokens = False ,
422423 )
@@ -426,16 +427,20 @@ def test_fp8_tp4(self):
426427 task .evaluate (llm , sampling_params = sampling_params )
427428 task = GPQADiamond (self .MODEL_NAME )
428429 task .evaluate (llm ,
429- sampling_params = sampling_params ,
430430 extra_evaluator_kwargs = dict (apply_chat_template = True ))
431431
432432 @pytest .mark .skip_less_device (4 )
433433 @skip_pre_blackwell
434434 def test_nvfp4_tp4 (self ):
435- model_path = f"{ llm_models_root ()} /modelopt-hf-model-hub/Llama-3.3-70B-Instruct-fp4"
436- with LLM (model_path , tensor_parallel_size = 4 ) as llm :
435+ model_path = f"{ llm_models_root ()} /llama-3.3-models/Llama-3.3-70B-Instruct-FP4"
436+ kv_cache_config = KvCacheConfig (free_gpu_memory_fraction = 0.5 )
437+ with LLM (model_path ,
438+ tensor_parallel_size = 4 ,
439+ max_batch_size = 32 ,
440+ kv_cache_config = kv_cache_config ) as llm :
437441 assert llm .args .quant_config .quant_algo == QuantAlgo .NVFP4
438442 sampling_params = SamplingParams (
443+ max_tokens = 256 ,
439444 temperature = 0.0 ,
440445 add_special_tokens = False ,
441446 )
@@ -445,7 +450,6 @@ def test_nvfp4_tp4(self):
445450 task .evaluate (llm , sampling_params = sampling_params )
446451 task = GPQADiamond (self .MODEL_NAME )
447452 task .evaluate (llm ,
448- sampling_params = sampling_params ,
449453 extra_evaluator_kwargs = dict (apply_chat_template = True ))
450454
451455
0 commit comments