[Model] support bitsandbytes quantization with minicpm3 model

Signed-off-by: Ubuntu <[email protected]>
vllm-project · Nov 27, 2024 · fab405b · fab405b
1 parent 395b1c7
commit fab405b
Showing 1 changed file with 6 additions and 0 deletions.
diff --git a/vllm/model_executor/models/minicpm3.py b/vllm/model_executor/models/minicpm3.py
@@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
     # `embedding_modules` and `embedding_padding_modules`
     # are inherited from MiniCPMForCausalLM
 
+    bitsandbytes_stacked_params_mapping = {
+        # shard_name, weight_name, index
+        "gate_proj": ("gate_up_proj", 0),
+        "up_proj": ("gate_up_proj", 1),
+    }
+
     def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""):
         self.model = MiniCPM3Model(vllm_config=vllm_config,
                                    prefix=maybe_prefix(prefix, "model"))