update after review

ttyio · ttyio · commit c64b707b4548 · 2025-07-14T08:50:04.000-07:00
Signed-off-by: Vincent Huang &lt;vincenth@nvidia.com&gt;
diff --git a/tensorrt_llm/_torch/models/modeling_deepseekv3.py b/tensorrt_llm/_torch/models/modeling_deepseekv3.py
@@ -62,8 +62,7 @@
 from ..modules.rms_norm import RMSNorm
 from ..peft.lora.layer import LoraLayer
 from ..speculative import MTPEagleWorker, MTPSpecMetadata, MTPWorker
-from ..utils import (AuxStreamType, EventType, Fp4QuantizedTensor,
-                     disable_fp4_allgather)
+from ..utils import AuxStreamType, EventType, Fp4QuantizedTensor
 from .modeling_utils import (DecoderModel, DecoderModelForCausalLM,
                              EagerFusionConfig, filter_weights,
                              register_auto_model)
@@ -512,16 +511,9 @@ def compute_routed_output(self, hidden_states, hidden_states_fp4,
         # max-throughput
         use_dp_padding = False
         if self.use_dp and self.mapping.tp_size > 1:
-            # MoE use static heuristic to check alltoall enabled or not, however, for wide_ep, the alltoall could also be dynamically disabled when chunking is used or TRTLLM_DEEP_EP_TOKEN_LIMIT is hit.
-            is_wide_ep_alltoall_disabled = isinstance(
-                self.experts, WideEPMoE) and not self.experts.can_use_alltoall(
-                    hidden_states, all_rank_num_tokens)
-            alltoall_enabled = self.experts.enable_alltoall and not is_wide_ep_alltoall_disabled
-
             # FP4 all_gather moves this bf16 allgather in to after topk and fp4 quantization
             # to reduce allreduce BW
-            if (disable_fp4_allgather() and not alltoall_enabled) or isinstance(
-                    self.experts, TRTLLMGenFusedMoE):
+            if isinstance(self.experts, TRTLLMGenFusedMoE):
                 hidden_states = allgather(hidden_states,
                                           self.mapping,
                                           dim=0,