NVIDIA
diff --git a/‎tests/integration/defs/accuracy/accuracy_core.py‎
Lines changed: 53 additions & 20 deletions b/‎tests/integration/defs/accuracy/accuracy_core.py‎
Lines changed: 53 additions & 20 deletions
diff --git a/‎tests/integration/defs/accuracy/test_cli_flow.py‎
Lines changed: 7 additions & 4 deletions b/‎tests/integration/defs/accuracy/test_cli_flow.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎tests/integration/defs/common.py‎
Lines changed: 8 additions & 6 deletions b/‎tests/integration/defs/common.py‎
Lines changed: 8 additions & 6 deletions
diff --git a/‎tests/integration/defs/conftest.py‎
Lines changed: 35 additions & 0 deletions b/‎tests/integration/defs/conftest.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎tests/integration/defs/examples/test_commandr.py‎
Lines changed: 39 additions & 20 deletions b/‎tests/integration/defs/examples/test_commandr.py‎
Lines changed: 39 additions & 20 deletions
@@ -701,26 +701,59 @@ def run(self,
             extra_build_args: Optional[list] = None,
             extra_summarize_args: Optional[list] = None,
             extra_eval_long_context_args: Optional[list] = None,
-            env: Optional[Dict[str, str]] = None):
-        self.install_requirements()
-        self.initialize_case(
-            tasks=tasks,
-            dtype=dtype,
-            quant_algo=quant_algo,
-            kv_cache_quant_algo=kv_cache_quant_algo,
-            spec_dec_algo=spec_dec_algo,
-            extra_acc_spec=extra_acc_spec,
-            tp_size=tp_size,
-            pp_size=pp_size,
-            cp_size=cp_size,
-            extra_convert_args=extra_convert_args,
-            extra_build_args=extra_build_args,
-            extra_summarize_args=extra_summarize_args,
-            extra_eval_long_context_args=extra_eval_long_context_args,
-            env=env)
-        self.convert()
-        self.build()
-        self.evaluate()
+            env: Optional[Dict[str, str]] = None,
+            timeout_manager=None):
+        """
+        Run all accuracy test phases with timeout management.
+        If timeout_manager is provided, each phase will be wrapped to track and deduct remaining timeout.
+        """
+        # Use timeout_manager to manage timeout for each phase
+        if timeout_manager is not None:
+            with timeout_manager.timed_operation("install_requirements"):
+                self.install_requirements()
+            with timeout_manager.timed_operation("initialize_case"):
+                self.initialize_case(
+                    tasks=tasks,
+                    dtype=dtype,
+                    quant_algo=quant_algo,
+                    kv_cache_quant_algo=kv_cache_quant_algo,
+                    spec_dec_algo=spec_dec_algo,
+                    extra_acc_spec=extra_acc_spec,
+                    tp_size=tp_size,
+                    pp_size=pp_size,
+                    cp_size=cp_size,
+                    extra_convert_args=extra_convert_args,
+                    extra_build_args=extra_build_args,
+                    extra_summarize_args=extra_summarize_args,
+                    extra_eval_long_context_args=extra_eval_long_context_args,
+                    env=env)
+            with timeout_manager.timed_operation("convert"):
+                self.convert()
+            with timeout_manager.timed_operation("build"):
+                self.build()
+            with timeout_manager.timed_operation("evaluate"):
+                self.evaluate()
+        else:
+            # fallback: no timeout management
+            self.install_requirements()
+            self.initialize_case(
+                tasks=tasks,
+                dtype=dtype,
+                quant_algo=quant_algo,
+                kv_cache_quant_algo=kv_cache_quant_algo,
+                spec_dec_algo=spec_dec_algo,
+                extra_acc_spec=extra_acc_spec,
+                tp_size=tp_size,
+                pp_size=pp_size,
+                cp_size=cp_size,
+                extra_convert_args=extra_convert_args,
+                extra_build_args=extra_build_args,
+                extra_summarize_args=extra_summarize_args,
+                extra_eval_long_context_args=extra_eval_long_context_args,
+                env=env)
+            self.convert()
+            self.build()
+            self.evaluate()
 
 
 class LlmapiAccuracyTestHarness:
 
@@ -1167,14 +1167,15 @@ class TestMixtral8x22B(CliFlowAccuracyTestHarness):
     @skip_pre_ada
     @pytest.mark.skip_less_device(4)
     @pytest.mark.skip_less_device_memory(80000)
-    def test_fp8_tp2pp2(self):
+    def test_fp8_tp2pp2(self, timeout_manager):
         self.run(tasks=[CnnDailymail(self.MODEL_NAME),
                         MMLU(self.MODEL_NAME)],
                  quant_algo=QuantAlgo.FP8,
                  tp_size=2,
                  pp_size=2,
                  extra_convert_args=["--calib_size=32"],
-                 extra_build_args=["--gemm_plugin=auto"])
+                 extra_build_args=["--gemm_plugin=auto"],
+                 timeout_manager=timeout_manager)
 
     @skip_post_blackwell
     @pytest.mark.skip_less_device(8)
@@ -1184,7 +1185,8 @@ def test_fp8_tp2pp2(self):
         ids=['expert_parallel', 'mixed_parallel', 'tensor_parallel'])
     @pytest.mark.parametrize("moe_renorm_mode", [0, 1],
                              ids=['no_renormalize', 'renormalize'])
-    def test_int8_plugin_tp8(self, moe_tp_size, moe_renorm_mode):
+    def test_int8_plugin_tp8(self, moe_tp_size, moe_renorm_mode,
+                             timeout_manager):
         self.run(quant_algo=QuantAlgo.W8A16,
                  tp_size=8,
                  extra_convert_args=[
@@ -1195,7 +1197,8 @@ def test_int8_plugin_tp8(self, moe_tp_size, moe_renorm_mode):
                  extra_build_args=[
                      "--max_beam_width=4", "--gemm_plugin=auto",
                      "--moe_plugin=auto", f"--max_seq_len={8192}"
-                 ])
+                 ],
+                 timeout_manager=timeout_manager)
 
 
 class TestGemma2B(CliFlowAccuracyTestHarness):
 
@@ -44,7 +44,7 @@ def _war_check_output(*args, **kwargs):
     return venv.run_cmd(cmd, caller=_war_check_output, env=env, **kwargs)
 
 
-def venv_mpi_check_call(venv, mpi_cmd, python_cmd):
+def venv_mpi_check_call(venv, mpi_cmd, python_cmd, **kwargs):
     """
     This function WAR check_call() to run python_cmd with mpi.
     If mpi_cmd = ["mpirun", "-n", "2"] and python_cmd = ["run.py"], the command will be:
@@ -61,10 +61,10 @@ def _war_check_call(*args, **kwargs):
         kwargs["cwd"] = venv.get_working_directory()
         return check_call(merged_cmd, **kwargs)
 
-    venv.run_cmd(python_cmd, caller=_war_check_call)
+    venv.run_cmd(python_cmd, caller=_war_check_call, **kwargs)
 
 
-def venv_mpi_check_output(venv, mpi_cmd, python_cmd, env=None):
+def venv_mpi_check_output(venv, mpi_cmd, python_cmd, env=None, **kwargs):
     """
     This function WAR check_output() to run python_cmd with mpi.
     If mpi_cmd = ["mpirun", "-n", "2"] and python_cmd = ["run.py"], the command will be:
@@ -81,7 +81,7 @@ def _war_check_output(*args, **kwargs):
         kwargs["cwd"] = venv.get_working_directory()
         return check_output(merged_cmd, **kwargs)
 
-    return venv.run_cmd(python_cmd, caller=_war_check_output, env=env)
+    return venv.run_cmd(python_cmd, caller=_war_check_output, env=env, **kwargs)
 
 
 def parse_mpi_cmd(cmd):
@@ -506,6 +506,7 @@ def convert_weights(llm_venv,
         convert_cmd.append(f"--quant_ckpt_path={quant_ckpt_path}")
     if per_group:
         convert_cmd.append("--per_group")
+    timeout = kwargs.pop('timeout', None)
 
     for key, value in kwargs.items():
         if isinstance(value, bool):
@@ -515,7 +516,7 @@ def convert_weights(llm_venv,
             convert_cmd.extend([f"--{key}={value}"])
 
     if llm_venv:
-        venv_check_call(llm_venv, convert_cmd)
+        venv_check_call(llm_venv, convert_cmd, timeout=timeout)
         return model_dir
     else:
         return convert_cmd, model_dir
@@ -607,6 +608,7 @@ def quantize_data(llm_venv,
 
     if kv_cache_dtype:
         quantize_cmd.append(f"--kv_cache_dtype={kv_cache_dtype}")
+    timeout = kwargs.pop('timeout', None)
 
     for key, value in kwargs.items():
         if isinstance(value, bool):
@@ -617,7 +619,7 @@ def quantize_data(llm_venv,
 
     if llm_venv:
         if not exists(output_dir):
-            venv_check_call(llm_venv, quantize_cmd)
+            venv_check_call(llm_venv, quantize_cmd, timeout=timeout)
         return output_dir
     else:
         return quantize_cmd, output_dir
 
@@ -2351,3 +2351,38 @@ def tritonserver_test_root(llm_root):
                                      "tests/integration/defs/triton_server")
 
     return tritonserver_root
+
+
+@pytest.fixture
+def timeout_from_marker(request):
+    """Get timeout value from pytest timeout marker."""
+    timeout_marker = request.node.get_closest_marker('timeout')
+    if timeout_marker:
+        return timeout_marker.args[0] if timeout_marker.args else None
+    return None
+
+
+@pytest.fixture
+def timeout_from_command_line(request):
+    """Get timeout value from command line --timeout parameter."""
+    # Get timeout from command line argument
+    timeout_arg = request.config.getoption("--timeout", default=None)
+    if timeout_arg is not None:
+        return float(timeout_arg)
+    return None
+
+
+@pytest.fixture
+def timeout_manager(timeout_from_command_line, timeout_from_marker):
+    """Create a TimeoutManager instance with priority: marker > cmdline > config."""
+    from defs.utils.timeout_manager import TimeoutManager
+
+    # Priority: marker > command line
+    timeout_value = None
+
+    if timeout_from_marker is not None:
+        timeout_value = timeout_from_marker
+    elif timeout_from_command_line is not None:
+        timeout_value = timeout_from_command_line
+
+    return TimeoutManager(timeout_value)
@@ -94,22 +94,27 @@ def test_llm_commandr_plus_4gpus_summary(commandr_example_root,
                                          llm_commandr_plus_model_root,
                                          llm_datasets_root, llm_rouge_root,
                                          llm_venv, cmodel_dir, engine_dir,
-                                         use_weight_only):
+                                         use_weight_only, timeout_manager):
     "Build & run Command-R+ with smoothquant on 4 gpus."
     dtype = 'float16'
     tp_size = 4
     model_name = os.path.basename(llm_commandr_plus_model_root)
-    print("Converting checkpoint...")
-    ckpt_dir = convert_weights(llm_venv=llm_venv,
-                               example_root=commandr_example_root,
-                               cmodel_dir=cmodel_dir,
-                               model=model_name,
-                               model_path=llm_commandr_plus_model_root,
-                               data_type=dtype,
-                               tp_size=tp_size,
-                               gpus=tp_size,
-                               use_weight_only=use_weight_only)
 
+    # Convert checkpoint with timeout management
+    print("Converting checkpoint...")
+    with timeout_manager.timed_operation("convert"):
+        ckpt_dir = convert_weights(llm_venv=llm_venv,
+                                   example_root=commandr_example_root,
+                                   cmodel_dir=cmodel_dir,
+                                   model=model_name,
+                                   model_path=llm_commandr_plus_model_root,
+                                   data_type=dtype,
+                                   tp_size=tp_size,
+                                   gpus=tp_size,
+                                   use_weight_only=use_weight_only,
+                                   timeout=timeout_manager.remaining_timeout)
+
+    # Build engines with timeout management
     print("Building engines...")
     build_cmd = [
         "trtllm-build",
@@ -130,12 +135,23 @@ def test_llm_commandr_plus_4gpus_summary(commandr_example_root,
         f"--engine_dir={engine_dir}",
     ]
 
-    check_call(" ".join(build_cmd), shell=True, env=llm_venv._new_env)
-
-    venv_mpi_check_call(
-        llm_venv,
-        ["mpirun", "-n", str(tp_size), "--allow-run-as-root"], run_cmd)
-
+    with timeout_manager.timed_operation("build"):
+        check_call(" ".join(build_cmd),
+                   shell=True,
+                   env=llm_venv._new_env,
+                   timeout=timeout_manager.remaining_timeout)
+
+    # Run engines with timeout management
+    print("Running engines...")
+    with timeout_manager.timed_operation("run"):
+        venv_mpi_check_call(
+            llm_venv, ["mpirun", "-n",
+                       str(tp_size), "--allow-run-as-root"],
+            run_cmd,
+            timeout=timeout_manager.remaining_timeout)
+
+    # Run summary with timeout management
+    print("Running summary...")
     summary_cmd = generate_summary_cmd(
         commandr_example_root,
         hf_model_dir=llm_commandr_plus_model_root,
@@ -144,6 +160,9 @@ def test_llm_commandr_plus_4gpus_summary(commandr_example_root,
         dataset_dir=llm_datasets_root,
         rouge_dir=llm_rouge_root)
 
-    venv_mpi_check_call(
-        llm_venv,
-        ["mpirun", "-n", str(tp_size), "--allow-run-as-root"], summary_cmd)
+    with timeout_manager.timed_operation("summary"):
+        venv_mpi_check_call(
+            llm_venv, ["mpirun", "-n",
+                       str(tp_size), "--allow-run-as-root"],
+            summary_cmd,
+            timeout=timeout_manager.remaining_timeout)