moe new

AllentDan · Dec 18, 2024 · f05975c · f05975c
1 parent b1c2c0d
commit f05975c
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 4 deletions.
diff --git a/lmdeploy/lite/apis/calibrate.py b/lmdeploy/lite/apis/calibrate.py
@@ -69,7 +69,7 @@
     'MixtralForCausalLM': 'lm_head',
     'Qwen2VLForConditionalGeneration': 'lm_head',
     'MistralForCausalLM': 'lm_head',
-    'InternLM3MoEForCausalLM': 'output',
+    'InternLM3MoEForCausalLM': 'lm_head',
 }
 
 

diff --git a/lmdeploy/lite/quantization/awq.py b/lmdeploy/lite/quantization/awq.py
@@ -51,8 +51,10 @@
         ['block_sparse_moe.experts.{i}.w1', 'block_sparse_moe.experts.{i}.w3']
     },
     'InternLM3MoEDecoderLayer': {
-        'attention_norm': ['attention.wqkv'],
-        'ffn_norm': ['feed_forward.experts.fused_w1w3']
+        'input_layernorm':
+        ['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
+        'post_attention_layernorm':
+        ['mlp.experts.{i}.w1', 'mlp.experts.{i}.w3']
     },
     'Qwen2VLDecoderLayer': {
         'input_layernorm':
@@ -106,7 +108,8 @@
         'block_sparse_moe.experts.{i}.w3': ['block_sparse_moe.experts.{i}.w2']
     },
     'InternLM3MoEDecoderLayer': {
-        'feed_forward.experts.fused_w1w3': ['feed_forward.experts.w2']
+        'self_attn.v_proj': ['self_attn.o_proj'],
+        'mlp.experts.{i}.w3': ['mlp.experts.{i}.w2']
     },
     'Qwen2VLDecoderLayer': {
         'self_attn.v_proj': ['self_attn.o_proj'],