NVIDIA · StanleySun639 · Aug 29, 2025 · Aug 26, 2025 · Aug 26, 2025 · Aug 27, 2025
diff --git a/tests/integration/defs/accuracy/references/cnn_dailymail.yaml b/tests/integration/defs/accuracy/references/cnn_dailymail.yaml
@@ -322,6 +322,8 @@ Qwen/Qwen2.5-7B-Instruct:
   - quant_algo: FP8
     kv_cache_quant_algo: FP8
     accuracy: 33.248
+Qwen/QwQ-32B:
+  - accuracy: 30.358
 nvidia/Nemotron-Mini-4B-Instruct:
   - quant_algo: FP8
     accuracy: 25.247

diff --git a/tests/integration/defs/accuracy/references/mmlu.yaml b/tests/integration/defs/accuracy/references/mmlu.yaml
@@ -141,6 +141,8 @@ Qwen/Qwen2.5-7B-Instruct:
   - quant_algo: FP8
     kv_cache_quant_algo: FP8
     accuracy: 75.32
+Qwen/QwQ-32B:
+  - accuracy: 82.60
 deepseek-ai/DeepSeek-V3-Lite:
   - accuracy: 71.40
   - quant_algo: NVFP4

diff --git a/tests/integration/defs/accuracy/test_llm_api_pytorch.py b/tests/integration/defs/accuracy/test_llm_api_pytorch.py
@@ -2845,3 +2845,23 @@ def test_auto_dtype(self):
                  kv_cache_config=self.kv_cache_config) as llm:
             task = MMMU(self.MODEL_NAME)
             task.evaluate(llm, sampling_params=self.sampling_params)
+
+
+class TestQwQ_32B(LlmapiAccuracyTestHarness):
+    MODEL_NAME = "Qwen/QwQ-32B"
+    MODEL_PATH = f"{llm_models_root()}/QwQ-32B"
+
+    @pytest.mark.skip_less_device_memory(80000)
+    @pytest.mark.skip_less_device(4)
+    def test_auto_dtype_tp4(self):
+        kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.5)
+
+        with LLM(self.MODEL_PATH,
+                 max_num_tokens=16384,
+                 kv_cache_config=kv_cache_config,
+                 tensor_parallel_size=4,
+                 max_batch_size=8) as llm:
+            task = CnnDailymail(self.MODEL_NAME)
+            task.evaluate(llm)
+            task = MMLU(self.MODEL_NAME)
+            task.evaluate(llm)
diff --git a/tests/integration/test_lists/qa/llm_function_nim.txt b/tests/integration/test_lists/qa/llm_function_nim.txt
@@ -21,6 +21,7 @@ accuracy/test_llm_api_pytorch.py::TestNemotronH_56B_Base::test_auto_dtype[tp8-cu
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_auto_dtype[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8ep4-cuda_graph=True]
 accuracy/test_llm_api_pytorch.py::TestNemotronUltra::test_fp8_prequantized[tp8-cuda_graph=True]
+accuracy/test_llm_api_pytorch.py::TestQwQ_32B::test_auto_dtype_tp4
 accuracy/test_llm_api.py::TestStarCoder2_7B::test_auto_dtype
 accuracy/test_llm_api.py::TestStarCoder2_7B::test_fp8
 accuracy/test_llm_api.py::TestCodestral_22B_V01::test_auto_dtype