Skip to content

Commit c64b707

Browse files
committed
update after review
Signed-off-by: Vincent Huang <[email protected]>
1 parent 35ba966 commit c64b707

File tree

1 file changed

+2
-10
lines changed

1 file changed

+2
-10
lines changed

tensorrt_llm/_torch/models/modeling_deepseekv3.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,7 @@
6262
from ..modules.rms_norm import RMSNorm
6363
from ..peft.lora.layer import LoraLayer
6464
from ..speculative import MTPEagleWorker, MTPSpecMetadata, MTPWorker
65-
from ..utils import (AuxStreamType, EventType, Fp4QuantizedTensor,
66-
disable_fp4_allgather)
65+
from ..utils import AuxStreamType, EventType, Fp4QuantizedTensor
6766
from .modeling_utils import (DecoderModel, DecoderModelForCausalLM,
6867
EagerFusionConfig, filter_weights,
6968
register_auto_model)
@@ -512,16 +511,9 @@ def compute_routed_output(self, hidden_states, hidden_states_fp4,
512511
# max-throughput
513512
use_dp_padding = False
514513
if self.use_dp and self.mapping.tp_size > 1:
515-
# MoE use static heuristic to check alltoall enabled or not, however, for wide_ep, the alltoall could also be dynamically disabled when chunking is used or TRTLLM_DEEP_EP_TOKEN_LIMIT is hit.
516-
is_wide_ep_alltoall_disabled = isinstance(
517-
self.experts, WideEPMoE) and not self.experts.can_use_alltoall(
518-
hidden_states, all_rank_num_tokens)
519-
alltoall_enabled = self.experts.enable_alltoall and not is_wide_ep_alltoall_disabled
520-
521514
# FP4 all_gather moves this bf16 allgather in to after topk and fp4 quantization
522515
# to reduce allreduce BW
523-
if (disable_fp4_allgather() and not alltoall_enabled) or isinstance(
524-
self.experts, TRTLLMGenFusedMoE):
516+
if isinstance(self.experts, TRTLLMGenFusedMoE):
525517
hidden_states = allgather(hidden_states,
526518
self.mapping,
527519
dim=0,

0 commit comments

Comments
 (0)