Skip to content

Commit

Permalink
support TP in qwen2 bnb (vllm-project#9574)
Browse files Browse the repository at this point in the history
Signed-off-by: Alvant <[email protected]>
  • Loading branch information
chenqianfzh authored and Alvant committed Oct 26, 2024
1 parent 0bdbb81 commit 96441ea
Showing 1 changed file with 14 additions and 0 deletions.
14 changes: 14 additions & 0 deletions vllm/model_executor/models/qwen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,20 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
]
embedding_modules = {}
embedding_padding_modules = []

# BitandBytes specific attributes
default_bitsandbytes_target_modules = [
".gate_proj.",
".down_proj.",
".up_proj.",
".q_proj.",
".k_proj.",
".v_proj.",
".o_proj.",
]

# in TP, these weights are partitioned along the column dimension (dim=-1)
column_parallel_weights_modules = [".down_proj.", ".o_proj."]
bitsandbytes_stacked_params_mapping = {
# shard_name, weight_name, index
"q_proj": ("qkv_proj", 0),
Expand Down

0 comments on commit 96441ea

Please sign in to comment.