From f05975cf66c7798b5be0b911cf0c197d2e1dee78 Mon Sep 17 00:00:00 2001 From: AllentDan Date: Wed, 18 Dec 2024 15:42:34 +0800 Subject: [PATCH] moe new --- lmdeploy/lite/apis/calibrate.py | 2 +- lmdeploy/lite/quantization/awq.py | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/lmdeploy/lite/apis/calibrate.py b/lmdeploy/lite/apis/calibrate.py index c91bfd85d..6093111c1 100644 --- a/lmdeploy/lite/apis/calibrate.py +++ b/lmdeploy/lite/apis/calibrate.py @@ -69,7 +69,7 @@ 'MixtralForCausalLM': 'lm_head', 'Qwen2VLForConditionalGeneration': 'lm_head', 'MistralForCausalLM': 'lm_head', - 'InternLM3MoEForCausalLM': 'output', + 'InternLM3MoEForCausalLM': 'lm_head', } diff --git a/lmdeploy/lite/quantization/awq.py b/lmdeploy/lite/quantization/awq.py index cc887960f..df7cae56a 100644 --- a/lmdeploy/lite/quantization/awq.py +++ b/lmdeploy/lite/quantization/awq.py @@ -51,8 +51,10 @@ ['block_sparse_moe.experts.{i}.w1', 'block_sparse_moe.experts.{i}.w3'] }, 'InternLM3MoEDecoderLayer': { - 'attention_norm': ['attention.wqkv'], - 'ffn_norm': ['feed_forward.experts.fused_w1w3'] + 'input_layernorm': + ['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'], + 'post_attention_layernorm': + ['mlp.experts.{i}.w1', 'mlp.experts.{i}.w3'] }, 'Qwen2VLDecoderLayer': { 'input_layernorm': @@ -106,7 +108,8 @@ 'block_sparse_moe.experts.{i}.w3': ['block_sparse_moe.experts.{i}.w2'] }, 'InternLM3MoEDecoderLayer': { - 'feed_forward.experts.fused_w1w3': ['feed_forward.experts.w2'] + 'self_attn.v_proj': ['self_attn.o_proj'], + 'mlp.experts.{i}.w3': ['mlp.experts.{i}.w2'] }, 'Qwen2VLDecoderLayer': { 'self_attn.v_proj': ['self_attn.o_proj'],