diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index fddaa29a93091..029178af61da0 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -350,7 +350,6 @@ class GemmaForCausalLM(nn.Module, SupportsLoRA, SupportsPP): "gate_up_proj", "down_proj", ] - # BitandBytes specific attributes default_bitsandbytes_target_modules = [ ".gate_proj.", @@ -361,7 +360,6 @@ class GemmaForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ".v_proj.", ".o_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index 6661ae91195c9..9238ed839c9de 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -390,7 +390,6 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ".v_proj.", ".o_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 586de6d44950c..38a31f420cec9 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -464,7 +464,6 @@ class LlamaForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ".v_proj.", ".o_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py index 4ac11104a058b..f90df6b7df036 100644 --- a/vllm/model_executor/models/minicpmv.py +++ b/vllm/model_executor/models/minicpmv.py @@ -854,7 +854,6 @@ class MiniCPMV2_5(MiniCPMVBaseModel, SupportsLoRA): # resampler ".kv_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), @@ -1005,7 +1004,6 @@ class MiniCPMV2_6(MiniCPMVBaseModel, SupportsLoRA): # resampler ".kv_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), diff --git a/vllm/model_executor/models/mllama.py b/vllm/model_executor/models/mllama.py index 0eae08732ae2d..d30b9addd09f1 100644 --- a/vllm/model_executor/models/mllama.py +++ b/vllm/model_executor/models/mllama.py @@ -1062,7 +1062,6 @@ class MllamaForConditionalGeneration(nn.Module, SupportsMultiModal): # so we can't add a dot in front of it. "multi_modal_projector." ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0), diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index 23e4af040cbfb..72b286fe6f6d6 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -395,7 +395,6 @@ class Qwen2ForCausalLM(nn.Module, SupportsLoRA, SupportsPP): ".v_proj.", ".o_proj.", ] - bitsandbytes_stacked_params_mapping = { # shard_name, weight_name, index "q_proj": ("qkv_proj", 0),