[fix] moe padding for reading correct tuned config (#172)

ROCm · Sep 6, 2024 · b3fc9f4 · b3fc9f4
1 parent 8032519
commit b3fc9f4
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -596,7 +596,7 @@ def fused_experts(hidden_states: torch.Tensor,
     get_config_func = functools.partial(
         try_get_optimal_moe_config,
         w1.shape,
-        w2.shape,
+        (w2.shape[0], w2.shape[1], w2.shape[2] - padding_size),
         topk_ids.shape[1],
         config_dtype,
         override_config=override_config,