From fab405b10541ffc8a8f48403877b356778fce8dd Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 27 Nov 2024 22:01:44 +0000 Subject: [PATCH] [Model] support bitsandbytes quantization with minicpm3 model Signed-off-by: Ubuntu --- vllm/model_executor/models/minicpm3.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vllm/model_executor/models/minicpm3.py b/vllm/model_executor/models/minicpm3.py index c38c31a0d4953..c66be2d9c2d07 100644 --- a/vllm/model_executor/models/minicpm3.py +++ b/vllm/model_executor/models/minicpm3.py @@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM): # `embedding_modules` and `embedding_padding_modules` # are inherited from MiniCPMForCausalLM + bitsandbytes_stacked_params_mapping = { + # shard_name, weight_name, index + "gate_proj": ("gate_up_proj", 0), + "up_proj": ("gate_up_proj", 1), + } + def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""): self.model = MiniCPM3Model(vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model"))