revert unnec changes

IzzyPutterman · IzzyPutterman · commit ee12005a5bd0 · 2025-08-25T23:08:08.000-07:00
Signed-off-by: Izzy Putterman &lt;iputterman@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py b/tensorrt_llm/_torch/pyexecutor/py_executor_creator.py
@@ -252,7 +252,6 @@ def create_py_executor(
         with mem_monitor.observe_creation_stage(
                 _ExecutorCreationStage.MODEL_ENGINE_DRAFT):
             draft_spec_config = copy.copy(spec_config)
-            draft_spec_config.update_for_draft_init()
             draft_pytorch_backend_config = copy.copy(pytorch_backend_config)
             if spec_config.load_format == "dummy":
                 draft_pytorch_backend_config.load_format = LoadFormat.DUMMY
diff --git a/tensorrt_llm/_torch/speculative/eagle3.py b/tensorrt_llm/_torch/speculative/eagle3.py
@@ -91,7 +91,7 @@ class Eagle3SpecMetadata(SpecMetadata):
 
     def __post_init__(self):
         if self.layers_to_capture is None:
-            if self.num_layers == 1:
+            if self.is_draft_model or self.num_layers == 1:
                 self.layers_to_capture = (self.num_layers - 1, )
             else:
                 if self.num_layers <= 5:
diff --git a/tensorrt_llm/llmapi/llm_args.py b/tensorrt_llm/llmapi/llm_args.py
@@ -391,11 +391,6 @@ def validate(self) -> None:
         Do any additional error checking here.
         """
 
-    def update_for_draft_init(self):
-        """
-        Update the config for draft model initialization.
-        """
-
     @functools.cached_property
     def spec_dec_mode(self):
         # spec_dec_mode has more functionality than the raw decoding_mode string.
@@ -450,7 +445,7 @@ def spec_dec_mode(self):
             return TorchSpeculativeDecodingMode.EAGLE3_ONE_MODEL
         return TorchSpeculativeDecodingMode.EAGLE3
 
-    @property
+    @functools.cached_property
     def num_capture_layers(self):
         """
         Returns the number of layers to capture of the target model.
@@ -461,16 +456,6 @@ def num_capture_layers(self):
             return len(self.eagle3_layers_to_capture)
         return 3
 
-    def update_for_draft_init(self):
-        """
-        Update the config for draft model initialization.
-        """
-        if not self.eagle3_one_model:
-            num_layers = self.num_eagle_layers
-            if num_layers is None:
-                num_layers = 1
-            self.eagle3_layers_to_capture = set(num_layers - 1)
-
 
 class UserProvidedDecodingConfig(DecodingBaseConfig):
     # Cannot use real type annotations due to circular imports