From 4c1be55ca2b086e3a650182b3851bdbfff8c6f76 Mon Sep 17 00:00:00 2001 From: zhoushenglong Date: Wed, 18 Dec 2024 02:08:31 +0000 Subject: [PATCH] fix moe op for dlinfer. --- lmdeploy/pytorch/backends/dlinfer/moe.py | 2 +- lmdeploy/pytorch/kernels/dlinfer/fused_moe.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/lmdeploy/pytorch/backends/dlinfer/moe.py b/lmdeploy/pytorch/backends/dlinfer/moe.py index 6ada730fbe..4980166b61 100644 --- a/lmdeploy/pytorch/backends/dlinfer/moe.py +++ b/lmdeploy/pytorch/backends/dlinfer/moe.py @@ -48,7 +48,7 @@ def forward(self, expert_list: List[int] = None): """forward.""" return fused_moe(hidden_states, self.top_k, topk_ids, topk_weights, - gate_up_weights, down_weights) + gate_up_weights, down_weights, self.renormalize) class DlinferFusedMoEBuilder(FusedMoEBuilder): diff --git a/lmdeploy/pytorch/kernels/dlinfer/fused_moe.py b/lmdeploy/pytorch/kernels/dlinfer/fused_moe.py index 72bab2d720..9ebfd3d8cb 100644 --- a/lmdeploy/pytorch/kernels/dlinfer/fused_moe.py +++ b/lmdeploy/pytorch/kernels/dlinfer/fused_moe.py @@ -10,6 +10,7 @@ def fused_moe( topk_weights: Tensor, gate_up_weights: Tensor, down_weights: Tensor, + renormalize: bool = False, ): """ascend fused moe.""" return ext_ops.fused_moe(hidden_states, top_k, topk_ids, topk_weights,