diff --git a/autotest/config-v100.yaml b/autotest/config-v100.yaml index 41216cb73..507f81ceb 100644 --- a/autotest/config-v100.yaml +++ b/autotest/config-v100.yaml @@ -1,4 +1,5 @@ model_path: /nvme/qa_test_models +resource_path: /nvme/qa_test_models/resource dst_path: /nvme/qa_test_models/autotest_model log_path: /nvme/qa_test_models/autotest_model/log benchmark_path: /nvme/qa_test_models/benchmark-reports @@ -100,12 +101,22 @@ turbomind_quatization: - meta-llama/Meta-Llama-3-8B-Instruct - internlm/internlm-xcomposer2d5-7b - OpenGVLab/Mini-InternVL-Chat-2B-V1-5 + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 - THUDM/glm-4-9b-chat + - deepseek-ai/deepseek-coder-1.3b-instruct + - codellama/CodeLlama-7b-Instruct-hf gptq: - internlm/internlm2_5-7b-chat no_kvint4: - openbmb/MiniCPM-V-2_6 + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-7B-Instruct-AWQ + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2.5-0.5B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2-7B-Instruct-GPTQ-Int4 no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat @@ -120,6 +131,10 @@ pytorch_quatization: no_kvint4: - OpenGVLab/InternVL2-1B - OpenGVLab/InternVL2-4B + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - deepseek-ai/DeepSeek-V2-Lite-Chat - microsoft/Phi-3-mini-4k-instruct - microsoft/Phi-3-vision-128k-instruct @@ -128,7 +143,6 @@ pytorch_quatization: no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat - longtext_model: - meta-llama/Meta-Llama-3-1-8B-Instruct - meta-llama/Meta-Llama-3-8B-Instruct diff --git a/autotest/config.yaml b/autotest/config.yaml index 88ca7c312..b4fd4e171 100644 --- a/autotest/config.yaml +++ b/autotest/config.yaml @@ -1,4 +1,5 @@ model_path: /nvme/qa_test_models +resource_path: /nvme/qa_test_models/resource dst_path: /nvme/qa_test_models/autotest_model log_path: /nvme/qa_test_models/autotest_model/log benchmark_path: /nvme/qa_test_models/benchmark-reports @@ -18,6 +19,7 @@ tp_config: Qwen2-7B-Instruct-GPTQ-Int4: 2 InternVL2-40B: 2 MiniCPM-V-2_6: 2 + Qwen2.5-72B-Instruct: 4 turbomind_chat_model: - meta-llama/Llama-3.2-1B-Instruct @@ -164,7 +166,11 @@ pytorch_base_model: turbomind_quatization: no_awq: + - Qwen/Qwen1.5-MoE-A2.7B-Chat + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 + - mistralai/Mistral-Nemo-Instruct-2407 - deepseek-ai/deepseek-coder-1.3b-instruct - deepseek-ai/DeepSeek-V2-Lite-Chat - codellama/CodeLlama-7b-Instruct-hf @@ -172,6 +178,12 @@ turbomind_quatization: - internlm/internlm2_5-7b-chat no_kvint4: - openbmb/MiniCPM-V-2_6 + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-7B-Instruct-AWQ + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2.5-0.5B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2-7B-Instruct-GPTQ-Int4 no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat @@ -203,6 +215,10 @@ pytorch_quatization: no_kvint4: - OpenGVLab/InternVL2-1B - OpenGVLab/InternVL2-4B + - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2-1.5B-Instruct + - Qwen/Qwen2-VL-2B-Instruct + - Qwen/Qwen2-VL-7B-Instruct - deepseek-ai/DeepSeek-V2-Lite-Chat - microsoft/Phi-3-mini-4k-instruct - microsoft/Phi-3-vision-128k-instruct @@ -211,7 +227,6 @@ pytorch_quatization: no_kvint8: - deepseek-ai/DeepSeek-V2-Lite-Chat - longtext_model: - meta-llama/Meta-Llama-3-1-8B-Instruct - meta-llama/Meta-Llama-3-8B-Instruct @@ -227,7 +242,8 @@ benchmark_model: - internlm/internlm2_5-7b-chat - internlm/internlm2_5-20b-chat - THUDM/glm-4-9b-chat - - Qwen/Qwen2-7B-Instruct + - Qwen/Qwen2.5-7B-Instruct + - Qwen/Qwen2.5-72B-Instruct - mistralai/Mistral-7B-Instruct-v0.3 - mistralai/Mixtral-8x7B-Instruct-v0.1 - deepseek-ai/DeepSeek-V2-Lite-Chat diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py index a828e17a0..58674fa17 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py @@ -67,8 +67,6 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model, exclude_dup=True)) def test_pipeline_chat_kvint4_tp1(config, common_case_config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') diff --git a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py index 276ced5bc..8403ced94 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp1(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp2(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id, tp_num=2) diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py index 17560e754..d1865175c 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py @@ -56,8 +56,6 @@ def test_pipeline_chat_tp2(config, common_case_config, model, worker_id): @pytest.mark.parametrize('model', get_all_model_list(tp_num=1, quant_policy=4)) def test_pipeline_chat_kvint4_tp1(config, common_case_config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') diff --git a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py index 8f1bc7d8b..8c845fa77 100644 --- a/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py +++ b/autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp1(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id) spawn_context = get_context('spawn') @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id): quant_policy=4, model_type='vl_model')) def test_pipeline_chat_kvint4_tp2(config, model, worker_id): - if 'Qwen2' in model: - return # kvint4 for qwen2 is not support if 'gw' in worker_id: os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id, tp_num=2) diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py index ab1f5595a..fc95e288c 100644 --- a/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py +++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py @@ -67,8 +67,7 @@ def getKvintModelList(tp_num, quant_policy): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_torch_model_list( - tp_num, quant_policy=quant_policy, exclude_dup=True) - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, exclude_dup=True)] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py index b210733db..bf20c45e6 100644 --- a/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py +++ b/autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_torch_model_list( - tp_num, quant_policy=quant_policy, model_type='vl_model') - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, model_type='vl_model')] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py index 91e65ee51..1c9131b32 100644 --- a/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py +++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py @@ -66,8 +66,7 @@ def getKvintModelList(tp_num, quant_policy): 'cuda_prefix': None, 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' - } for item in get_all_model_list(tp_num, quant_policy=quant_policy) - if 'qwen2' not in item.lower() or quant_policy == 8] + } for item in get_all_model_list(tp_num, quant_policy=quant_policy)] @pytest.mark.order(7) diff --git a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py index 091e18e6e..641f2f760 100644 --- a/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py +++ b/autotest/tools/restful/test_restful_chat_hf_turbomind_mllm.py @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None): 'tp_num': tp_num, 'extra': f'--quant-policy {quant_policy}' } for item in get_all_model_list( - tp_num, quant_policy=quant_policy, model_type='vl_model') - if 'qwen2' not in item.lower() or quant_policy == 8] + tp_num, quant_policy=quant_policy, model_type='vl_model')] @pytest.mark.order(7) diff --git a/autotest/utils/pipeline_chat.py b/autotest/utils/pipeline_chat.py index 562a707ef..023e4ac14 100644 --- a/autotest/utils/pipeline_chat.py +++ b/autotest/utils/pipeline_chat.py @@ -3,7 +3,10 @@ from subprocess import PIPE import allure +import numpy as np import torch +from decord import VideoReader, cpu +from PIL import Image from pytest_assume.plugin import assume from utils.get_run_config import get_model_name, get_tp_num from utils.rule_condition_assert import assert_result @@ -13,6 +16,7 @@ from lmdeploy.utils import is_bf16_supported from lmdeploy.vl import load_image from lmdeploy.vl.constants import IMAGE_TOKEN +from lmdeploy.vl.utils import encode_image_base64 def run_pipeline_chat_test(config, @@ -275,6 +279,12 @@ def assert_pipeline_single_element(output, PIC1 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' # noqa E501 PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg' # noqa E501 +PIC_BEIJING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Beijing_Small.jpeg' # noqa E501 +PIC_CHONGQING = 'https://raw.githubusercontent.com/QwenLM/Qwen-VL/master/assets/mm_tutorial/Chongqing_Small.jpeg' # noqa E501 +PIC_REDPANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image1.jpg' # noqa E501 +PIC_PANDA = 'https://raw.githubusercontent.com/OpenGVLab/InternVL/main/internvl_chat/examples/image2.jpg' # noqa E501 +DESC = 'What are the similarities and differences between these two images.' # noqa E501 +DESC_ZH = '两张图有什么相同和不同的地方.' # noqa E501 def run_pipeline_vl_chat_test(config, @@ -386,12 +396,350 @@ def run_pipeline_vl_chat_test(config, ', reason: Multi-turn example: ski not in ' + sess.response.text + '\n') + if 'internvl' in model_case.lower(): + internvl_vl_testcase(config, pipe, file) + internvl_vl_testcase(config, pipe, file, 'cn') + if 'minicpm' in model_case.lower(): + MiniCPM_vl_testcase(config, pipe, file) + if 'qwen' in model_case.lower(): + Qwen_vl_testcase(config, pipe, file) + file.close() del pipe torch.cuda.empty_cache() +def internvl_vl_testcase(config, pipe, file, lang='en'): + if lang == 'cn': + description = DESC_ZH + else: + description = DESC + # multi-image multi-round conversation, combined images + messages = [ + dict(role='user', + content=[ + dict(type='text', + text=f'{IMAGE_TOKEN}{IMAGE_TOKEN}\n{description}'), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images: panda not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=description)) + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images second: panda not in ' + + response.text + '\n') + + # multi-image multi-round conversation, separate images + messages = [ + dict( + role='user', + content=[ + dict( + type='text', + text=f'Image-1: {IMAGE_TOKEN}\nImage-2: {IMAGE_TOKEN}\n' + + # noqa E251,E501 + description), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_REDPANDA)), + dict(type='image_url', + image_url=dict(max_dynamic_patch=12, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: separate images: panda not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=description)) + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: separate images second: panda not in ' + + response.text + '\n') + + # video multi-round conversation + def get_index(bound, fps, max_frame, first_idx=0, num_segments=32): + if bound: + start, end = bound[0], bound[1] + else: + start, end = -100000, 100000 + start_idx = max(first_idx, round(start * fps)) + end_idx = min(round(end * fps), max_frame) + seg_size = float(end_idx - start_idx) / num_segments + frame_indices = np.array([ + int(start_idx + (seg_size / 2) + np.round(seg_size * idx)) + for idx in range(num_segments) + ]) + return frame_indices + + def load_video(video_path, bound=None, num_segments=32): + vr = VideoReader(video_path, ctx=cpu(0), num_threads=1) + max_frame = len(vr) - 1 + fps = float(vr.get_avg_fps()) + frame_indices = get_index(bound, + fps, + max_frame, + first_idx=0, + num_segments=num_segments) + imgs = [] + for frame_index in frame_indices: + img = Image.fromarray(vr[frame_index].asnumpy()).convert('RGB') + imgs.append(img) + return imgs + + resource_path = config.get('resource_path') + video_path = resource_path + '/red-panda.mp4' + imgs = load_video(video_path, num_segments=8) + + question = '' + for i in range(len(imgs)): + question = question + f'Frame{i+1}: {IMAGE_TOKEN}\n' + + if lang == 'cn': + question += '小熊猫在做什么?' + else: + question += 'What is the red panda doing?' + + content = [{'type': 'text', 'text': question}] + for img in imgs: + content.append({ + 'type': 'image_url', + 'image_url': { + 'max_dynamic_patch': 1, + 'url': f'data:image/jpeg;base64,{encode_image_base64(img)}' + } + }) + + messages = [dict(role='user', content=content)] + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: video images: red panda not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + if lang == 'cn': + messages.append(dict(role='user', content='描述视频详情,不要重复')) + else: + messages.append( + dict(role='user', + content='Describe this video in detail. Don\'t repeat.')) + response = pipe(messages) + result = 'red panda' in response.text.lower( + ) or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: video images: red panda not in ' + + response.text + '\n') + + +def llava_vl_testcase(config, pipe, file): + # multi-image multi-round conversation, combined images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', image_url=dict(url=PIC_BEIJING)), + dict(type='image_url', image_url=dict(url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images: buildings not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images second: buildings not in ' + + response.text + '\n') + + +def MiniCPM_vl_testcase(config, pipe, file): + # Chat with multiple images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', + image_url=dict(max_slice_nums=9, url=PIC_REDPANDA)), + dict(type='image_url', + image_url=dict(max_slice_nums=9, url=PIC_PANDA)) + ]) + ] + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: multiple images: panda not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'panda' in response.text.lower() or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: multiple images second: panda not in ' + + response.text + '\n') + + # In-context few-shot learning + EXAMPLE1 = 'https://github.com/user-attachments/assets/405d9147-95f6-4f78-8879-606a0aed6707' # noqa E251,E501 + EXAMPLE2 = 'https://github.com/user-attachments/assets/9f2c6ed9-2aa5-4189-9c4f-0b9753024ba1' # noqa E251,E501 + EXAMPLE3 = 'https://github.com/user-attachments/assets/f335b507-1957-4c22-84ae-ed69ff79df38' # noqa E251,E501 + question = 'production date' + messages = [ + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url=EXAMPLE1)), + ]), + dict(role='assistant', content='2021.08.29'), + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url=EXAMPLE2)), + ]), + dict(role='assistant', content='1999.05.15'), + dict(role='user', + content=[ + dict(type='text', text=question), + dict(type='image_url', image_url=dict(url=EXAMPLE3)), + ]) + ] + response = pipe(messages) + result = '2021' in response.text.lower() or '14' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: in context learning: 2021 or 14 not in ' + + response.text + '\n') + + # Chat with video + MAX_NUM_FRAMES = 64 # if cuda OOM set a smaller number + + def encode_video(video_path): + + def uniform_sample(length, n): + gap = len(length) / n + idxs = [int(i * gap + gap / 2) for i in range(n)] + return [length[i] for i in idxs] + + vr = VideoReader(video_path, ctx=cpu(0)) + sample_fps = round(vr.get_avg_fps() / 1) # FPS + frame_idx = [i for i in range(0, len(vr), sample_fps)] + if len(frame_idx) > MAX_NUM_FRAMES: + frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) + frames = vr.get_batch(frame_idx).asnumpy() + frames = [Image.fromarray(v.astype('uint8')) for v in frames] + print('num frames:', len(frames)) + return frames + + resource_path = config.get('resource_path') + video_path = resource_path + '/red-panda.mp4' + frames = encode_video(video_path) + question = 'Describe the video' + + content = [dict(type='text', text=question)] + for frame in frames: + content.append( + dict(type='image_url', + image_url=dict( + use_image_id=False, + max_slice_nums=2, + url=f'data:image/jpeg;base64,{encode_image_base64(frame)}' + ))) + + messages = [dict(role='user', content=content)] + response = pipe(messages) + result = 'red panda' in response.text.lower( + ) or '熊猫' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: video example: panda not in ' + response.text + + '\n') + + +def Qwen_vl_testcase(config, pipe, file): + # multi-image multi-round conversation, combined images + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', image_url=dict(url=PIC_BEIJING)), + dict(type='image_url', image_url=dict(url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images: buildings not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: combined images second: buildings not in ' + + response.text + '\n') + + # image resolution for performance boost + min_pixels = 64 * 28 * 28 + max_pixels = 64 * 28 * 28 + messages = [ + dict(role='user', + content=[ + dict(type='text', text='Describe the two images in detail.'), + dict(type='image_url', + image_url=dict(min_pixels=min_pixels, + max_pixels=max_pixels, + url=PIC_BEIJING)), + dict(type='image_url', + image_url=dict(min_pixels=min_pixels, + max_pixels=max_pixels, + url=PIC_CHONGQING)) + ]) + ] + response = pipe(messages) + result = 'ski' in response.text.lower() or '滑雪' in response.text.lower() + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: performance boost: buildings not in ' + + response.text + '\n') + + messages.append(dict(role='assistant', content=response.text)) + messages.append(dict(role='user', content=DESC)) + response = pipe(messages) + result = 'buildings' in response.text.lower( + ) or '楼' in response.text.lower() or 'skyline' in response.text.lower( + ) or 'cityscape' in response.text.lower() + file.writelines('result:' + str(result) + + ', reason: performance boost second: buildings not in ' + + response.text + '\n') + + def assert_pipeline_vl_chat_log(config, model_case, worker_id): log_path = config.get('log_path') diff --git a/autotest/utils/run_restful_chat.py b/autotest/utils/run_restful_chat.py index 77af1975b..082a61bcd 100644 --- a/autotest/utils/run_restful_chat.py +++ b/autotest/utils/run_restful_chat.py @@ -282,6 +282,7 @@ def get_model(url): PIC = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/tests/data/tiger.jpeg' # noqa E501 +PIC2 = 'https://raw.githubusercontent.com/open-mmlab/mmdeploy/main/demo/resources/human-pose.jpg' # noqa E501 def run_vl_testcase(config, port: int = DEFAULT_PORT): @@ -307,6 +308,11 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): 'image_url': { 'url': PIC, }, + }, { + 'type': 'image_url', + 'image_url': { + 'url': PIC2, + }, }], }] @@ -315,8 +321,6 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): temperature=0.8, top_p=0.8) file.writelines(str(response).lower() + '\n') - assert 'tiger' in str(response).lower() or '虎' in str( - response).lower(), response api_client = APIClient(http_url) model_name = api_client.available_models[0] @@ -324,7 +328,12 @@ def run_vl_testcase(config, port: int = DEFAULT_PORT): messages=prompt_messages): continue file.writelines(str(item) + '\n') - assert 'tiger' in str(item).lower() or '虎' in str(item).lower(), item allure.attach.file(restful_log, attachment_type=allure.attachment_type.TEXT) + + assert 'tiger' in str(response).lower() or '虎' in str( + response).lower() or 'ski' in str(response).lower() or '滑雪' in str( + response).lower(), response + assert 'tiger' in str(item).lower() or '虎' in str(item).lower( + ) or 'ski' in str(item).lower() or '滑雪' in str(item).lower(), item diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md index da5224125..cd43e79c9 100644 --- a/docs/en/supported_models/supported_models.md +++ b/docs/en/supported_models/supported_models.md @@ -19,7 +19,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine | Qwen | 1.8B - 72B | LLM | Yes | Yes | Yes | Yes | | Qwen1.5 | 1.8B - 110B | LLM | Yes | Yes | Yes | Yes | | Qwen2 | 0.5B - 72B | LLM | Yes | Yes | Yes | Yes | -| Mistral | 7B | LLM | Yes | Yes | Yes | Yes | +| Mistral | 7B | LLM | Yes | Yes | Yes | No | | Mixtral | 8x7B, 8x22B | LLM | Yes | Yes | Yes | Yes | | Qwen-VL | 7B | MLLM | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | MLLM | Yes | Yes | Yes | Yes | @@ -36,7 +36,7 @@ The following tables detail the models supported by LMDeploy's TurboMind engine | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | | GLM4 | 9B | LLM | Yes | Yes | Yes | Yes | | CodeGeeX4 | 9B | LLM | Yes | Yes | Yes | - | -| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | NO | +| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | No | "-" means not verified yet. diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index 502e91b6d..7ec36d235 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -19,7 +19,7 @@ | Qwen | 1.8B - 72B | LLM | Yes | Yes | Yes | Yes | | Qwen1.5 | 1.8B - 110B | LLM | Yes | Yes | Yes | Yes | | Qwen2 | 0.5B - 72B | LLM | Yes | Yes | Yes | Yes | -| Mistral | 7B | LLM | Yes | Yes | Yes | Yes | +| Mistral | 7B | LLM | Yes | Yes | Yes | No | | Mixtral | 8x7B, 8x22B | LLM | Yes | Yes | Yes | Yes | | Qwen-VL | 7B | MLLM | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | MLLM | Yes | Yes | Yes | Yes | @@ -36,7 +36,7 @@ | MiniGeminiLlama | 7B | MLLM | Yes | - | - | Yes | | GLM4 | 9B | LLM | Yes | Yes | Yes | Yes | | CodeGeeX4 | 9B | LLM | Yes | Yes | Yes | - | -| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | NO | +| Molmo | 7B-D,72B | MLLM | Yes | Yes | Yes | No | “-” 表示还没有验证。