Skip to content

Commit

Permalink
InternLM3MoE quant WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Dec 11, 2024
1 parent 14bb81d commit a827598
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 5 deletions.
20 changes: 16 additions & 4 deletions lmdeploy/lite/apis/calibrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@
'MGMLlamaForCausalLM': 'LlamaDecoderLayer', # mini gemini
'InternLMXComposer2ForCausalLM': 'InternLM2DecoderLayer',
'Phi3ForCausalLM': 'Phi3DecoderLayer',
'ChatGLMForConditionalGeneration': 'GLMBlock'
'ChatGLMForConditionalGeneration': 'GLMBlock',
'MixtralForCausalLM': 'MixtralDecoderLayer',
'Qwen2VLForConditionalGeneration': 'Qwen2VLDecoderLayer',
'MistralForCausalLM': 'MistralDecoderLayer',
'InternLM3MoEForCausalLM': 'InternLM3MoEDecoderLayer',
}

NORM_TYPE_MAP = {
Expand All @@ -41,7 +45,11 @@
'MGMLlamaForCausalLM': 'LlamaRMSNorm', # mini gemini
'InternLMXComposer2ForCausalLM': 'InternLM2RMSNorm',
'Phi3ForCausalLM': 'Phi3RMSNorm',
'ChatGLMForConditionalGeneration': 'RMSNorm'
'ChatGLMForConditionalGeneration': 'RMSNorm',
'MixtralForCausalLM': 'MixtralRMSNorm',
'Qwen2VLForConditionalGeneration': 'Qwen2RMSNorm',
'MistralForCausalLM': 'MistralRMSNorm',
'InternLM3MoEForCausalLM': 'InternLM3MoERMSNorm',
}

HEAD_NAME_MAP = {
Expand All @@ -57,7 +65,11 @@
'MGMLlamaForCausalLM': 'lm_head', # mini gemini
'InternLMXComposer2ForCausalLM': 'output',
'Phi3ForCausalLM': 'lm_head',
'ChatGLMForConditionalGeneration': 'output_layer'
'ChatGLMForConditionalGeneration': 'output_layer',
'MixtralForCausalLM': 'lm_head',
'Qwen2VLForConditionalGeneration': 'lm_head',
'MistralForCausalLM': 'lm_head',
'InternLM3MoEForCausalLM': 'output',
}


Expand Down Expand Up @@ -185,7 +197,7 @@ def calibrate(model: str,
trust_remote_code=True)

model = load_hf_from_pretrained(model,
torch_dtype=torch.float16,
torch_dtype=torch.bfloat16,
trust_remote_code=True)
vl_model = None
elif model_type == 'vlm':
Expand Down
5 changes: 5 additions & 0 deletions lmdeploy/lite/apis/gptq.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ def auto_gptq(model: str,
SUPPORTED_MODELS.append('internlm2')
GPTQ_CAUSAL_LM_MODEL_MAP.update(dict(internlm2=InternLM2GPTQForCausalLM))

from ..modeling.internlm3_moe_gptq import InternLM3MoEGPTQForCausalLM
SUPPORTED_MODELS.append('InternLM3_MoE')
GPTQ_CAUSAL_LM_MODEL_MAP.update(dict(InternLM3_MoE=InternLM3MoEGPTQForCausalLM))

pretrained_model_dir = model
quantized_model_dir = work_dir

Expand Down Expand Up @@ -85,6 +89,7 @@ def auto_gptq(model: str,
# the model will always be loaded into CPU memory
model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir,
quantize_config,
torch_dtype=torch.bfloat16,
revision=revision,
trust_remote_code=True)

Expand Down
40 changes: 39 additions & 1 deletion lmdeploy/lite/quantization/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,29 @@
'GLMBlock': {
'input_layernorm': ['self_attention.query_key_value'],
'post_attention_layernorm': ['mlp.dense_h_to_4h']
}
},
'MixtralDecoderLayer': {
'input_layernorm':
['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
'post_attention_layernorm':
['block_sparse_moe.experts.{i}.w1', 'block_sparse_moe.experts.{i}.w3']
},
'InternLM3MoEDecoderLayer': {
'attention_norm':
['attention.wqkv'],
'ffn_norm':
['feed_forward.experts.fused_w1w3']
},
'Qwen2VLDecoderLayer': {
'input_layernorm':
['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
},
'MistralDecoderLayer': {
'input_layernorm':
['self_attn.k_proj', 'self_attn.q_proj', 'self_attn.v_proj'],
'post_attention_layernorm': ['mlp.gate_proj', 'mlp.up_proj']
},
}

FC_FCS_MAP = {
Expand Down Expand Up @@ -80,6 +102,22 @@
'GLMBlock': {
# 'self_attention.query_key_value': ['self_attention.dense']
# 'mlp.dense_h_to_4h': ['mlp.dense_4h_to_h']
},
'MixtralDecoderLayer': {
'self_attn.v_proj': ['self_attn.o_proj'],
'block_sparse_moe.experts.{i}.w3': ['block_sparse_moe.experts.{i}.w2']
},
'InternLM3MoEDecoderLayer': {
'feed_forward.experts.fused_w1w3':
['feed_forward.experts.w2']
},
'Qwen2VLDecoderLayer': {
'self_attn.v_proj': ['self_attn.o_proj'],
'mlp.up_proj': ['mlp.down_proj']
},
'MistralDecoderLayer': {
'self_attn.v_proj': ['self_attn.o_proj'],
'mlp.up_proj': ['mlp.down_proj']
}
}

Expand Down

0 comments on commit a827598

Please sign in to comment.