From 96441ea1bb627063abaa399825ed12171ac21610 Mon Sep 17 00:00:00 2001
From: chenqianfzh <51831990+chenqianfzh@users.noreply.github.com>
Date: Tue, 22 Oct 2024 00:13:23 -0700
Subject: [PATCH] support TP in qwen2 bnb (#9574)

Signed-off-by: Alvant <alvasian@yandex.ru>
---
 vllm/model_executor/models/qwen2.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py
index cb04cc4850951..23eb1482ffef1 100644
--- a/vllm/model_executor/models/qwen2.py
+++ b/vllm/model_executor/models/qwen2.py
@@ -364,6 +364,20 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP):
     ]
     embedding_modules = {}
     embedding_padding_modules = []
+
+    # BitandBytes specific attributes
+    default_bitsandbytes_target_modules = [
+        ".gate_proj.",
+        ".down_proj.",
+        ".up_proj.",
+        ".q_proj.",
+        ".k_proj.",
+        ".v_proj.",
+        ".o_proj.",
+    ]
+
+    # in TP, these weights are partitioned along the column dimension (dim=-1)
+    column_parallel_weights_modules = [".down_proj.", ".o_proj."]
     bitsandbytes_stacked_params_mapping = {
         # shard_name, weight_name, index
         "q_proj": ("qkv_proj", 0),