diff --git a/deeplink_ext/internlm_ops/mha/mha.py b/deeplink_ext/internlm_ops/mha/mha.py index 70e38492..0c2a4e31 100644 --- a/deeplink_ext/internlm_ops/mha/mha.py +++ b/deeplink_ext/internlm_ops/mha/mha.py @@ -110,17 +110,17 @@ def forward( self.softmax_scale, causal if causal is not None else self.causal, ) - else: - # unpadded - return DeepLinkMultiHeadAttentionVarLenKVPackedFunc.apply( - q, - kv, - cu_seqlens, - cu_seqlens_k, - max_seqlen, - max_seqlen_k, - self.dropout_p if self.training else 0.0, - self.softmax_scale, - causal if causal is not None else self.causal, - False, - ) + # else: + # # unpadded + # return DeepLinkMultiHeadAttentionVarLenKVPackedFunc.apply( + # q, + # kv, + # cu_seqlens, + # cu_seqlens_k, + # max_seqlen, + # max_seqlen_k, + # self.dropout_p if self.training else 0.0, + # self.softmax_scale, + # causal if causal is not None else self.causal, + # False, + # )