Skip to content

Commit

Permalink
[Model][LoRA]LoRA support added for Qwen2VLForConditionalGeneration (v…
Browse files Browse the repository at this point in the history
…llm-project#10022)

Signed-off-by: ericperfect <[email protected]>
Signed-off-by: Loc Huynh <[email protected]>
  • Loading branch information
ericperfect authored and JC1DA committed Nov 11, 2024
1 parent 9a8555a commit 03d73bb
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 5 deletions.
2 changes: 1 addition & 1 deletion docs/source/models/supported_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -540,7 +540,7 @@ Text Generation
- Qwen2-VL
- T + I\ :sup:`E+` + V\ :sup:`+`
- :code:`Qwen/Qwen2-VL-2B-Instruct`, :code:`Qwen/Qwen2-VL-7B-Instruct`, :code:`Qwen/Qwen2-VL-72B-Instruct`, etc.
-
- ✅︎
- ✅︎
* - :code:`UltravoxModel`
- Ultravox
Expand Down
32 changes: 28 additions & 4 deletions vllm/model_executor/models/qwen2_vl.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from vllm.attention import AttentionMetadata
from vllm.attention.selector import _Backend
from vllm.config import CacheConfig, MultiModalConfig
from vllm.config import CacheConfig, LoRAConfig, MultiModalConfig
from vllm.distributed import get_pp_group, parallel_state
from vllm.distributed import utils as dist_utils
from vllm.inputs import (INPUT_REGISTRY, DecoderOnlyInputs, DummyData,
Expand All @@ -65,7 +65,7 @@
from vllm.transformers_utils.config import uses_mrope
from vllm.transformers_utils.processor import cached_get_processor

from .interfaces import SupportsMultiModal, SupportsPP
from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsPP
from .utils import (PPMissingLayer, get_vit_attn_backend,
is_pp_missing_parameter,
make_empty_intermediate_tensors_factory)
Expand Down Expand Up @@ -927,13 +927,37 @@ def input_processor_for_qwen2_vl(
@INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen2_vl)
@INPUT_REGISTRY.register_input_processor(input_processor_for_qwen2_vl)
class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal,
SupportsPP):
SupportsLoRA, SupportsPP):
packed_modules_mapping = {
"qkv_proj": [
"q_proj",
"k_proj",
"v_proj",
],
"gate_up_proj": [
"gate_proj",
"up_proj",
],
}

# LoRA specific attributes
# TODO Support LoRA for the visual encoder in the future.
supported_lora_modules = [
"qkv_proj",
"o_proj",
"gate_up_proj",
"down_proj",
]
embedding_modules = {}
embedding_padding_modules = []

def __init__(self,
config: Qwen2VLConfig,
multimodal_config: MultiModalConfig,
cache_config: Optional[CacheConfig] = None,
quant_config: Optional[QuantizationConfig] = None) -> None:
quant_config: Optional[QuantizationConfig] = None,
lora_config: Optional[LoRAConfig] = None) -> None:

super().__init__()

assert not cache_config.enable_prefix_caching, \
Expand Down

0 comments on commit 03d73bb

Please sign in to comment.