Skip to content

Commit

Permalink
[Model] support bitsandbytes quantization with minicpm3 model
Browse files Browse the repository at this point in the history
Signed-off-by: Ubuntu <[email protected]>
  • Loading branch information
zixuanzhang226 committed Nov 27, 2024
1 parent 395b1c7 commit fab405b
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions vllm/model_executor/models/minicpm3.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,12 @@ class MiniCPM3ForCausalLM(MiniCPMForCausalLM):
# `embedding_modules` and `embedding_padding_modules`
# are inherited from MiniCPMForCausalLM

bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"gate_proj": ("gate_up_proj", 0),
"up_proj": ("gate_up_proj", 1),
}

def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""):
self.model = MiniCPM3Model(vllm_config=vllm_config,
prefix=maybe_prefix(prefix, "model"))

0 comments on commit fab405b

Please sign in to comment.