From 5608e611c2116cc17c6808b2ae1ecb4a3e263493 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Thu, 31 Oct 2024 16:54:18 +0800 Subject: [PATCH] [Doc] Update Qwen documentation (#9869) --- docs/source/models/supported_models.rst | 7 +++++-- vllm/model_executor/models/qwen.py | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst index ff893b613f150..3279e7a108232 100644 --- a/docs/source/models/supported_models.rst +++ b/docs/source/models/supported_models.rst @@ -277,7 +277,7 @@ Text Generation * - :code:`QWenLMHeadModel` - Qwen - :code:`Qwen/Qwen-7B`, :code:`Qwen/Qwen-7B-Chat`, etc. - - + - ✅︎ - ✅︎ * - :code:`Qwen2ForCausalLM` - Qwen2 @@ -516,7 +516,7 @@ Text Generation - Qwen-VL - T + I\ :sup:`E+` - :code:`Qwen/Qwen-VL`, :code:`Qwen/Qwen-VL-Chat`, etc. - - + - ✅︎ - ✅︎ * - :code:`Qwen2AudioForConditionalGeneration` - Qwen2-Audio @@ -540,6 +540,9 @@ Text Generation | :sup:`E` Pre-computed embeddings can be inputted for this modality. | :sup:`+` Multiple items can be inputted per text prompt for this modality. +.. note:: + vLLM currently only supports adding LoRA to the language backbone of multimodal models. + .. note:: For :code:`openbmb/MiniCPM-V-2`, the official repo doesn't work yet, so we need to use a fork (:code:`HwwwH/MiniCPM-V-2`) for now. For more details, please see: https://github.com/vllm-project/vllm/pull/4087#issuecomment-2250397630 diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index 0a1b40927e9f9..998016ea28c26 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -1048,7 +1048,7 @@ def get_mm_mapping(self) -> MultiModelKeys: @MULTIMODAL_REGISTRY.register_max_image_tokens(MAX_QWEN_IMG_TOKENS) @INPUT_REGISTRY.register_dummy_data(dummy_data_for_qwen) @INPUT_REGISTRY.register_input_processor(input_processor_for_qwen) -class QWenLMHeadModel(QWenBaseModel): +class QWenLMHeadModel(QWenBaseModel, SupportsLoRA): """ QWenLMHeadModel is not only applicable to LLM but also to VL, which is not conducive to the current integration logic of LoRA in vLLM. Therefore, it