diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml
index 41216cb73..507f81ceb 100644
--- a/autotest/config-v100.yaml
+++ b/autotest/config-v100.yaml
@@ -1,4 +1,5 @@
 model_path: /nvme/qa_test_models
+resource_path: /nvme/qa_test_models/resource
 dst_path: /nvme/qa_test_models/autotest_model
 log_path: /nvme/qa_test_models/autotest_model/log
 benchmark_path: /nvme/qa_test_models/benchmark-reports
@@ -100,12 +101,22 @@ turbomind_quatization:
         - meta-llama/Meta-Llama-3-8B-Instruct
         - internlm/internlm-xcomposer2d5-7b
         - OpenGVLab/Mini-InternVL-Chat-2B-V1-5
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - mistralai/Mistral-7B-Instruct-v0.3
         - THUDM/glm-4-9b-chat
+        - deepseek-ai/deepseek-coder-1.3b-instruct
+        - codellama/CodeLlama-7b-Instruct-hf
     gptq:
         - internlm/internlm2_5-7b-chat
     no_kvint4:
         - openbmb/MiniCPM-V-2_6
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-AWQ
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2.5-0.5B-Instruct
+        - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-GPTQ-Int4
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
@@ -120,6 +131,10 @@ pytorch_quatization:
     no_kvint4:
         - OpenGVLab/InternVL2-1B
         - OpenGVLab/InternVL2-4B
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - microsoft/Phi-3-mini-4k-instruct
         - microsoft/Phi-3-vision-128k-instruct
@@ -128,7 +143,6 @@ pytorch_quatization:
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
-
 longtext_model:
     - meta-llama/Meta-Llama-3-1-8B-Instruct
     - meta-llama/Meta-Llama-3-8B-Instruct
diff --git a/autotest/config.yaml b/autotest/config.yaml
index 88ca7c312..b4fd4e171 100644
--- a/autotest/config.yaml
+++ b/autotest/config.yaml
@@ -1,4 +1,5 @@
 model_path: /nvme/qa_test_models
+resource_path: /nvme/qa_test_models/resource
 dst_path: /nvme/qa_test_models/autotest_model
 log_path: /nvme/qa_test_models/autotest_model/log
 benchmark_path: /nvme/qa_test_models/benchmark-reports
@@ -18,6 +19,7 @@ tp_config:
     Qwen2-7B-Instruct-GPTQ-Int4: 2
     InternVL2-40B: 2
     MiniCPM-V-2_6: 2
+    Qwen2.5-72B-Instruct: 4
 
 turbomind_chat_model:
     - meta-llama/Llama-3.2-1B-Instruct
@@ -164,7 +166,11 @@ pytorch_base_model:
 
 turbomind_quatization:
     no_awq:
+        - Qwen/Qwen1.5-MoE-A2.7B-Chat
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - mistralai/Mistral-7B-Instruct-v0.3
+        - mistralai/Mistral-Nemo-Instruct-2407
         - deepseek-ai/deepseek-coder-1.3b-instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - codellama/CodeLlama-7b-Instruct-hf
@@ -172,6 +178,12 @@ turbomind_quatization:
         - internlm/internlm2_5-7b-chat
     no_kvint4:
         - openbmb/MiniCPM-V-2_6
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-AWQ
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2.5-0.5B-Instruct
+        - Qwen/Qwen2.5-7B-Instruct
+        - Qwen/Qwen2-7B-Instruct-GPTQ-Int4
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
@@ -203,6 +215,10 @@ pytorch_quatization:
     no_kvint4:
         - OpenGVLab/InternVL2-1B
         - OpenGVLab/InternVL2-4B
+        - Qwen/Qwen2-7B-Instruct
+        - Qwen/Qwen2-1.5B-Instruct
+        - Qwen/Qwen2-VL-2B-Instruct
+        - Qwen/Qwen2-VL-7B-Instruct
         - deepseek-ai/DeepSeek-V2-Lite-Chat
         - microsoft/Phi-3-mini-4k-instruct
         - microsoft/Phi-3-vision-128k-instruct
@@ -211,7 +227,6 @@ pytorch_quatization:
     no_kvint8:
         - deepseek-ai/DeepSeek-V2-Lite-Chat
 
-
 longtext_model:
     - meta-llama/Meta-Llama-3-1-8B-Instruct
     - meta-llama/Meta-Llama-3-8B-Instruct
@@ -227,7 +242,8 @@ benchmark_model:
     - internlm/internlm2_5-7b-chat
     - internlm/internlm2_5-20b-chat
     - THUDM/glm-4-9b-chat
-    - Qwen/Qwen2-7B-Instruct
+    - Qwen/Qwen2.5-7B-Instruct
+    - Qwen/Qwen2.5-72B-Instruct
     - mistralai/Mistral-7B-Instruct-v0.3
     - mistralai/Mixtral-8x7B-Instruct-v0.1
     - deepseek-ai/DeepSeek-V2-Lite-Chat
diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
index a828e17a0..58674fa17 100644
--- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
+++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
@@ -67,8 +67,6 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
                                               exclude_dup=True))
 def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
                                   worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
index 276ced5bc..8403ced94 100644
--- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
+++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
@@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
                                               quant_policy=4,
                                               model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
@@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
                                               quant_policy=4,
                                               model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                      tp_num=2)
diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
index 17560e754..d1865175c 100644
--- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
+++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
@@ -56,8 +56,6 @@ def test_pipeline_chat_tp2(config, common_case_config, model, worker_id):
 @pytest.mark.parametrize('model', get_all_model_list(tp_num=1, quant_policy=4))
 def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
                                   worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
index 8f1bc7d8b..8c845fa77 100644
--- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
+++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
@@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
                                             quant_policy=4,
                                             model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
     spawn_context = get_context('spawn')
@@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
                                             quant_policy=4,
                                             model_type='vl_model'))
 def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
-    if 'Qwen2' in model:
-        return  # kvint4 for qwen2 is not support
     if 'gw' in worker_id:
         os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
                                                                      tp_num=2)
diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
index ab1f5595a..fc95e288c 100644
--- a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
+++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
@@ -67,8 +67,7 @@ def getKvintModelList(tp_num, quant_policy):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_torch_model_list(
-        tp_num, quant_policy=quant_policy, exclude_dup=True)
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, exclude_dup=True)]
 
 
 @pytest.mark.order(7)
diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
index b210733db..bf20c45e6 100644
--- a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
+++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
@@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_torch_model_list(
-        tp_num, quant_policy=quant_policy, model_type='vl_model')
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, model_type='vl_model')]
 
 
 @pytest.mark.order(7)
diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
index 91e65ee51..1c9131b32 100644
--- a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
+++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
@@ -66,8 +66,7 @@ def getKvintModelList(tp_num, quant_policy):
         'cuda_prefix': None,
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
-    } for item in get_all_model_list(tp_num, quant_policy=quant_policy)
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+    } for item in get_all_model_list(tp_num, quant_policy=quant_policy)]
 
 
 @pytest.mark.order(7)
diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
index 091e18e6e..641f2f760 100644
--- a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
+++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py
@@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
         'tp_num': tp_num,
         'extra': f'--quant-policy {quant_policy}'
     } for item in get_all_model_list(
-        tp_num, quant_policy=quant_policy, model_type='vl_model')
-            if 'qwen2' not in item.lower() or quant_policy == 8]
+        tp_num, quant_policy=quant_policy, model_type='vl_model')]
 
 
 @pytest.mark.order(7)
diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py
index 562a707ef..023e4ac14 100644
--- a/autotest/utils/pipeline_chat.py
+++ b/autotest/utils/pipeline_chat.py
@@ -3,7 +3,10 @@
 from subprocess import PIPE
 
 import allure
+import numpy as np
 import torch
+from decord import VideoReader, cpu
+from PIL import Image
 from pytest_assume.plugin import assume
 from utils.get_run_config import get_model_name, get_tp_num
 from utils.rule_condition_assert import assert_result
@@ -13,6 +16,7 @@
 from lmdeploy.utils import is_bf16_supported
 from lmdeploy.vl import load_image
 from lmdeploy.vl.constants import IMAGE_TOKEN
+from lmdeploy.vl.utils import encode_image_base64
 
 
 def run_pipeline_chat_test(config,
@@ -275,6 +279,12 @@ def assert_pipeline_single_element(output,
 
 PIC1 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg'  # noqa E501
 PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg'  # noqa E501
+PIC_BEIJING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Beijing_Small.jpeg'  # noqa E501
+PIC_CHONGQING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Chongqing_Small.jpeg'  # noqa E501
+PIC_REDPANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg'  # noqa E501
+PIC_PANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image2.jpg'  # noqa E501
+DESC = 'What are the similarities and differences between these two images.'  # noqa E501
+DESC_ZH = '两张图有什么相同和不同的地方.'  # noqa E501
 
 
 def run_pipeline_vl_chat_test(config,
@@ -386,12 +396,350 @@ def run_pipeline_vl_chat_test(config,
                     ', reason: Multi-turn example: ski not in ' +
                     sess.response.text + '\n')
 
+    if 'internvl' in model_case.lower():
+        internvl_vl_testcase(config, pipe, file)
+        internvl_vl_testcase(config, pipe, file, 'cn')
+    if 'minicpm' in model_case.lower():
+        MiniCPM_vl_testcase(config, pipe, file)
+    if 'qwen' in model_case.lower():
+        Qwen_vl_testcase(config, pipe, file)
+
     file.close()
 
     del pipe
     torch.cuda.empty_cache()
 
 
+def internvl_vl_testcase(config, pipe, file, lang='en'):
+    if lang == 'cn':
+        description = DESC_ZH
+    else:
+        description = DESC
+    # multi-image multi-round conversation, combined images
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text',
+                      text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{description}'),
+                 dict(type='image_url',
+                      image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)),
+                 dict(type='image_url',
+                      image_url=dict(max_dynamic_patch=12, url=PIC_PANDA))
+             ])
+    ]
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images: panda not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=description))
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images second: panda not in ' +
+                    response.text + '\n')
+
+    # multi-image multi-round conversation, separate images
+    messages = [
+        dict(
+            role='user',
+            content=[
+                dict(
+                    type='text',
+                    text=f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\n'
+                    +  # noqa E251,E501
+                    description),
+                dict(type='image_url',
+                     image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)),
+                dict(type='image_url',
+                     image_url=dict(max_dynamic_patch=12, url=PIC_PANDA))
+            ])
+    ]
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: separate images: panda not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=description))
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: separate images second: panda not in ' +
+                    response.text + '\n')
+
+    # video multi-round conversation
+    def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
+        if bound:
+            start, end = bound[0], bound[1]
+        else:
+            start, end = -100000, 100000
+        start_idx = max(first_idx, round(start * fps))
+        end_idx = min(round(end * fps), max_frame)
+        seg_size = float(end_idx - start_idx) / num_segments
+        frame_indices = np.array([
+            int(start_idx + (seg_size / 2) + np.round(seg_size * idx))
+            for idx in range(num_segments)
+        ])
+        return frame_indices
+
+    def load_video(video_path, bound=None, num_segments=32):
+        vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
+        max_frame = len(vr) - 1
+        fps = float(vr.get_avg_fps())
+        frame_indices = get_index(bound,
+                                  fps,
+                                  max_frame,
+                                  first_idx=0,
+                                  num_segments=num_segments)
+        imgs = []
+        for frame_index in frame_indices:
+            img = Image.fromarray(vr[frame_index].asnumpy()).convert('RGB')
+            imgs.append(img)
+        return imgs
+
+    resource_path = config.get('resource_path')
+    video_path = resource_path + '/red-panda.mp4'
+    imgs = load_video(video_path, num_segments=8)
+
+    question = ''
+    for i in range(len(imgs)):
+        question = question + f'Frame{i+1}: {IMAGE_TOKEN}\n'
+
+    if lang == 'cn':
+        question += '小熊猫在做什么？'
+    else:
+        question += 'What is the red panda doing?'
+
+    content = [{'type': 'text', 'text': question}]
+    for img in imgs:
+        content.append({
+            'type': 'image_url',
+            'image_url': {
+                'max_dynamic_patch': 1,
+                'url': f'data:image/jpeg;base64,{encode_image_base64(img)}'
+            }
+        })
+
+    messages = [dict(role='user', content=content)]
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: video images: red panda not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    if lang == 'cn':
+        messages.append(dict(role='user', content='描述视频详情，不要重复'))
+    else:
+        messages.append(
+            dict(role='user',
+                 content='Describe this video in detail. Don\'t repeat.'))
+    response = pipe(messages)
+    result = 'red panda' in response.text.lower(
+    ) or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: video images: red panda not in ' +
+                    response.text + '\n')
+
+
+def llava_vl_testcase(config, pipe, file):
+    # multi-image multi-round conversation, combined images
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text', text='Describe the two images in detail.'),
+                 dict(type='image_url', image_url=dict(url=PIC_BEIJING)),
+                 dict(type='image_url', image_url=dict(url=PIC_CHONGQING))
+             ])
+    ]
+    response = pipe(messages)
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images: buildings not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=DESC))
+    response = pipe(messages)
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images second: buildings not in ' +
+                    response.text + '\n')
+
+
+def MiniCPM_vl_testcase(config, pipe, file):
+    # Chat with multiple images
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text', text='Describe the two images in detail.'),
+                 dict(type='image_url',
+                      image_url=dict(max_slice_nums=9, url=PIC_REDPANDA)),
+                 dict(type='image_url',
+                      image_url=dict(max_slice_nums=9, url=PIC_PANDA))
+             ])
+    ]
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: multiple images: panda not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=DESC))
+    response = pipe(messages)
+    result = 'panda' in response.text.lower() or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: multiple images second: panda not in ' +
+                    response.text + '\n')
+
+    # In-context few-shot learning
+    EXAMPLE1 = 'https://github.com/user-attachments/assets/405d9147-95f6-4f78-8879-606a0aed6707'  # noqa E251,E501
+    EXAMPLE2 = 'https://github.com/user-attachments/assets/9f2c6ed9-2aa5-4189-9c4f-0b9753024ba1'  # noqa E251,E501
+    EXAMPLE3 = 'https://github.com/user-attachments/assets/f335b507-1957-4c22-84ae-ed69ff79df38'  # noqa E251,E501
+    question = 'production date'
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text', text=question),
+                 dict(type='image_url', image_url=dict(url=EXAMPLE1)),
+             ]),
+        dict(role='assistant', content='2021.08.29'),
+        dict(role='user',
+             content=[
+                 dict(type='text', text=question),
+                 dict(type='image_url', image_url=dict(url=EXAMPLE2)),
+             ]),
+        dict(role='assistant', content='1999.05.15'),
+        dict(role='user',
+             content=[
+                 dict(type='text', text=question),
+                 dict(type='image_url', image_url=dict(url=EXAMPLE3)),
+             ])
+    ]
+    response = pipe(messages)
+    result = '2021' in response.text.lower() or '14' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: in context learning: 2021 or 14 not in ' +
+                    response.text + '\n')
+
+    # Chat with video
+    MAX_NUM_FRAMES = 64  # if cuda OOM set a smaller number
+
+    def encode_video(video_path):
+
+        def uniform_sample(length, n):
+            gap = len(length) / n
+            idxs = [int(i * gap + gap / 2) for i in range(n)]
+            return [length[i] for i in idxs]
+
+        vr = VideoReader(video_path, ctx=cpu(0))
+        sample_fps = round(vr.get_avg_fps() / 1)  # FPS
+        frame_idx = [i for i in range(0, len(vr), sample_fps)]
+        if len(frame_idx) > MAX_NUM_FRAMES:
+            frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES)
+        frames = vr.get_batch(frame_idx).asnumpy()
+        frames = [Image.fromarray(v.astype('uint8')) for v in frames]
+        print('num frames:', len(frames))
+        return frames
+
+    resource_path = config.get('resource_path')
+    video_path = resource_path + '/red-panda.mp4'
+    frames = encode_video(video_path)
+    question = 'Describe the video'
+
+    content = [dict(type='text', text=question)]
+    for frame in frames:
+        content.append(
+            dict(type='image_url',
+                 image_url=dict(
+                     use_image_id=False,
+                     max_slice_nums=2,
+                     url=f'data:image/jpeg;base64,{encode_image_base64(frame)}'
+                 )))
+
+    messages = [dict(role='user', content=content)]
+    response = pipe(messages)
+    result = 'red panda' in response.text.lower(
+    ) or '熊猫' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: video example: panda not in ' + response.text +
+                    '\n')
+
+
+def Qwen_vl_testcase(config, pipe, file):
+    # multi-image multi-round conversation, combined images
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text', text='Describe the two images in detail.'),
+                 dict(type='image_url', image_url=dict(url=PIC_BEIJING)),
+                 dict(type='image_url', image_url=dict(url=PIC_CHONGQING))
+             ])
+    ]
+    response = pipe(messages)
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images: buildings not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=DESC))
+    response = pipe(messages)
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: combined images second: buildings not in ' +
+                    response.text + '\n')
+
+    # image resolution for performance boost
+    min_pixels = 64 * 28 * 28
+    max_pixels = 64 * 28 * 28
+    messages = [
+        dict(role='user',
+             content=[
+                 dict(type='text', text='Describe the two images in detail.'),
+                 dict(type='image_url',
+                      image_url=dict(min_pixels=min_pixels,
+                                     max_pixels=max_pixels,
+                                     url=PIC_BEIJING)),
+                 dict(type='image_url',
+                      image_url=dict(min_pixels=min_pixels,
+                                     max_pixels=max_pixels,
+                                     url=PIC_CHONGQING))
+             ])
+    ]
+    response = pipe(messages)
+    result = 'ski' in response.text.lower() or '滑雪' in response.text.lower()
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: performance boost: buildings not in ' +
+                    response.text + '\n')
+
+    messages.append(dict(role='assistant', content=response.text))
+    messages.append(dict(role='user', content=DESC))
+    response = pipe(messages)
+    result = 'buildings' in response.text.lower(
+    ) or '楼' in response.text.lower() or 'skyline' in response.text.lower(
+    ) or 'cityscape' in response.text.lower()
+    file.writelines('result:' + str(result) +
+                    ', reason: performance boost second: buildings not in ' +
+                    response.text + '\n')
+
+
 def assert_pipeline_vl_chat_log(config, model_case, worker_id):
     log_path = config.get('log_path')
 
diff --git a/autotest/utils/run_restful_chat.py b/autotest/utils/run_restful_chat.py
index 77af1975b..082a61bcd 100644
--- a/autotest/utils/run_restful_chat.py
+++ b/autotest/utils/run_restful_chat.py
@@ -282,6 +282,7 @@ def get_model(url):
 
 
 PIC = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg'  # noqa E501
+PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg'  # noqa E501
 
 
 def run_vl_testcase(config, port: int = DEFAULT_PORT):
@@ -307,6 +308,11 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT):
             'image_url': {
                 'url': PIC,
             },
+        }, {
+            'type': 'image_url',
+            'image_url': {
+                'url': PIC2,
+            },
         }],
     }]
 
@@ -315,8 +321,6 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT):
                                               temperature=0.8,
                                               top_p=0.8)
     file.writelines(str(response).lower() + '\n')
-    assert 'tiger' in str(response).lower() or '虎' in str(
-        response).lower(), response
 
     api_client = APIClient(http_url)
     model_name = api_client.available_models[0]
@@ -324,7 +328,12 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT):
                                                messages=prompt_messages):
         continue
     file.writelines(str(item) + '\n')
-    assert 'tiger' in str(item).lower() or '虎' in str(item).lower(), item
 
     allure.attach.file(restful_log,
                        attachment_type=allure.attachment_type.TEXT)
+
+    assert 'tiger' in str(response).lower() or '虎' in str(
+        response).lower() or 'ski' in str(response).lower() or '滑雪' in str(
+            response).lower(), response
+    assert 'tiger' in str(item).lower() or '虎' in str(item).lower(
+    ) or 'ski' in str(item).lower() or '滑雪' in str(item).lower(), item
diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md
index da5224125..cd43e79c9 100644
--- a/docs/en/supported_models/supported_models.md
+++ b/docs/en/supported_models/supported_models.md
@@ -19,7 +19,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 |         Qwen          |   1.8B - 72B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |        Qwen1.5        |  1.8B - 110B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |         Qwen2         |   0.5B - 72B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Mistral        |       7B       | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mistral        |       7B       | LLM  |    Yes    |   Yes   |   Yes   |  No   |
 |        Mixtral        |  8x7B, 8x22B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |        Qwen-VL        |       7B       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |      DeepSeek-VL      |       7B       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
@@ -36,7 +36,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine
 |    MiniGeminiLlama    |       7B       | MLLM |    Yes    |    -    |    -    |  Yes  |
 |         GLM4          |       9B       | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |       CodeGeeX4       |       9B       | LLM  |    Yes    |   Yes   |   Yes   |   -   |
-|         Molmo         |    7B-D,72B    | MLLM |    Yes    |   Yes   |   Yes   |  NO   |
+|         Molmo         |    7B-D,72B    | MLLM |    Yes    |   Yes   |   Yes   |  No   |
 
 "-" means not verified yet.
 
diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md
index 502e91b6d..7ec36d235 100644
--- a/docs/zh_cn/supported_models/supported_models.md
+++ b/docs/zh_cn/supported_models/supported_models.md
@@ -19,7 +19,7 @@
 |         Qwen          |   1.8B - 72B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |        Qwen1.5        |  1.8B - 110B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |         Qwen2         |   0.5B - 72B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
-|        Mistral        |       7B       | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
+|        Mistral        |       7B       | LLM  |    Yes    |   Yes   |   Yes   |  No   |
 |        Mixtral        |  8x7B, 8x22B   | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |        Qwen-VL        |       7B       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
 |      DeepSeek-VL      |       7B       | MLLM |    Yes    |   Yes   |   Yes   |  Yes  |
@@ -36,7 +36,7 @@
 |    MiniGeminiLlama    |       7B       | MLLM |    Yes    |    -    |    -    |  Yes  |
 |         GLM4          |       9B       | LLM  |    Yes    |   Yes   |   Yes   |  Yes  |
 |       CodeGeeX4       |       9B       | LLM  |    Yes    |   Yes   |   Yes   |   -   |
-|         Molmo         |    7B-D,72B    | MLLM |    Yes    |   Yes   |   Yes   |  NO   |
+|         Molmo         |    7B-D,72B    | MLLM |    Yes    |   Yes   |   Yes   |  No   |
 
 “-” 表示还没有验证。