diff --git a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml index dbf2be50f39..a9ad9a5da81 100644 --- a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml +++ b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml @@ -322,6 +322,8 @@ Qwen/Qwen2.5-7B-Instruct: - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 33.248 +Qwen/QwQ-32B: + - accuracy: 30.358 nvidia/Nemotron-Mini-4B-Instruct: - quant_algo: FP8 accuracy: 25.247 diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml index 05816c0613d..8d84ebda779 100644 --- a/tests/integration/defs/accuracy/references/mmlu.yaml +++ b/tests/integration/defs/accuracy/references/mmlu.yaml @@ -141,6 +141,8 @@ Qwen/Qwen2.5-7B-Instruct: - quant_algo: FP8 kv_cache_quant_algo: FP8 accuracy: 75.32 +Qwen/QwQ-32B: + - accuracy: 82.60 deepseek-ai/DeepSeek-V3-Lite: - accuracy: 71.40 - quant_algo: NVFP4 diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py index cce443bf7ba..8573cf1f726 100644 --- a/tests/integration/defs/accuracy/test_llm_api_pytorch.py +++ b/tests/integration/defs/accuracy/test_llm_api_pytorch.py @@ -2845,3 +2845,23 @@ def test_auto_dtype(self): kv_cache_config=self.kv_cache_config) as llm: task = MMMU(self.MODEL_NAME) task.evaluate(llm, sampling_params=self.sampling_params) + + +class TestQwQ_32B(LlmapiAccuracyTestHarness): + MODEL_NAME = "Qwen/QwQ-32B" + MODEL_PATH = f"{llm_models_root()}/QwQ-32B" + + @pytest.mark.skip_less_device_memory(80000) + @pytest.mark.skip_less_device(4) + def test_auto_dtype_tp4(self): + kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5) + + with LLM(self.MODEL_PATH, + max_num_tokens=16384, + kv_cache_config=kv_cache_config, + tensor_parallel_size=4, + max_batch_size=8) as llm: + task = CnnDailymail(self.MODEL_NAME) + task.evaluate(llm) + task = MMLU(self.MODEL_NAME) + task.evaluate(llm) diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt index 90b6406806b..71bd82043b9 100644 --- a/tests/integration/test_lists/qa/llm_function_nim.txt +++ b/tests/integration/test_lists/qa/llm_function_nim.txt @@ -21,6 +21,7 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True] accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True] accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True] +accuracy/test_llm_api_pytorch.py::TestQwQ_32B::test_auto_dtype_tp4 accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8 accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype