Skip to content

Commit

Permalink
[CI] add more testcase for mllm models (#2791)
Browse files Browse the repository at this point in the history
* update

* update

* update

* update

* update

* update

* update

* update

* update
  • Loading branch information
zhulinJulia24 authored Nov 29, 2024
1 parent 01f82e0 commit 0b6dd1f
Show file tree
Hide file tree
Showing 14 changed files with 401 additions and 30 deletions.
16 changes: 15 additions & 1 deletion autotest/config-v100.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
model_path: /nvme/qa_test_models
resource_path: /nvme/qa_test_models/resource
dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
benchmark_path: /nvme/qa_test_models/benchmark-reports
Expand Down Expand Up @@ -100,12 +101,22 @@ turbomind_quatization:
- meta-llama/Meta-Llama-3-8B-Instruct
- internlm/internlm-xcomposer2d5-7b
- OpenGVLab/Mini-InternVL-Chat-2B-V1-5
- Qwen/Qwen2-VL-2B-Instruct
- Qwen/Qwen2-VL-7B-Instruct
- mistralai/Mistral-7B-Instruct-v0.3
- THUDM/glm-4-9b-chat
- deepseek-ai/deepseek-coder-1.3b-instruct
- codellama/CodeLlama-7b-Instruct-hf
gptq:
- internlm/internlm2_5-7b-chat
no_kvint4:
- openbmb/MiniCPM-V-2_6
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-7B-Instruct-AWQ
- Qwen/Qwen2-1.5B-Instruct
- Qwen/Qwen2.5-0.5B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2-7B-Instruct-GPTQ-Int4
no_kvint8:
- deepseek-ai/DeepSeek-V2-Lite-Chat

Expand All @@ -120,6 +131,10 @@ pytorch_quatization:
no_kvint4:
- OpenGVLab/InternVL2-1B
- OpenGVLab/InternVL2-4B
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
- Qwen/Qwen2-VL-2B-Instruct
- Qwen/Qwen2-VL-7B-Instruct
- deepseek-ai/DeepSeek-V2-Lite-Chat
- microsoft/Phi-3-mini-4k-instruct
- microsoft/Phi-3-vision-128k-instruct
Expand All @@ -128,7 +143,6 @@ pytorch_quatization:
no_kvint8:
- deepseek-ai/DeepSeek-V2-Lite-Chat


longtext_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
Expand Down
20 changes: 18 additions & 2 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
model_path: /nvme/qa_test_models
resource_path: /nvme/qa_test_models/resource
dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
benchmark_path: /nvme/qa_test_models/benchmark-reports
Expand All @@ -18,6 +19,7 @@ tp_config:
Qwen2-7B-Instruct-GPTQ-Int4: 2
InternVL2-40B: 2
MiniCPM-V-2_6: 2
Qwen2.5-72B-Instruct: 4

turbomind_chat_model:
- meta-llama/Llama-3.2-1B-Instruct
Expand Down Expand Up @@ -164,14 +166,24 @@ pytorch_base_model:

turbomind_quatization:
no_awq:
- Qwen/Qwen1.5-MoE-A2.7B-Chat
- Qwen/Qwen2-VL-2B-Instruct
- Qwen/Qwen2-VL-7B-Instruct
- mistralai/Mistral-7B-Instruct-v0.3
- mistralai/Mistral-Nemo-Instruct-2407
- deepseek-ai/deepseek-coder-1.3b-instruct
- deepseek-ai/DeepSeek-V2-Lite-Chat
- codellama/CodeLlama-7b-Instruct-hf
gptq:
- internlm/internlm2_5-7b-chat
no_kvint4:
- openbmb/MiniCPM-V-2_6
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-7B-Instruct-AWQ
- Qwen/Qwen2-1.5B-Instruct
- Qwen/Qwen2.5-0.5B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2-7B-Instruct-GPTQ-Int4
no_kvint8:
- deepseek-ai/DeepSeek-V2-Lite-Chat

Expand Down Expand Up @@ -203,6 +215,10 @@ pytorch_quatization:
no_kvint4:
- OpenGVLab/InternVL2-1B
- OpenGVLab/InternVL2-4B
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2-1.5B-Instruct
- Qwen/Qwen2-VL-2B-Instruct
- Qwen/Qwen2-VL-7B-Instruct
- deepseek-ai/DeepSeek-V2-Lite-Chat
- microsoft/Phi-3-mini-4k-instruct
- microsoft/Phi-3-vision-128k-instruct
Expand All @@ -211,7 +227,6 @@ pytorch_quatization:
no_kvint8:
- deepseek-ai/DeepSeek-V2-Lite-Chat


longtext_model:
- meta-llama/Meta-Llama-3-1-8B-Instruct
- meta-llama/Meta-Llama-3-8B-Instruct
Expand All @@ -227,7 +242,8 @@ benchmark_model:
- internlm/internlm2_5-7b-chat
- internlm/internlm2_5-20b-chat
- THUDM/glm-4-9b-chat
- Qwen/Qwen2-7B-Instruct
- Qwen/Qwen2.5-7B-Instruct
- Qwen/Qwen2.5-72B-Instruct
- mistralai/Mistral-7B-Instruct-v0.3
- mistralai/Mixtral-8x7B-Instruct-v0.1
- deepseek-ai/DeepSeek-V2-Lite-Chat
2 changes: 0 additions & 2 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,6 @@ def test_pipeline_chat_pytorch_tp2(config, common_case_config, model,
exclude_dup=True))
def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
spawn_context = get_context('spawn')
Expand Down
4 changes: 0 additions & 4 deletions autotest/tools/pipeline/test_pipeline_chat_pytorch_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
quant_policy=4,
model_type='vl_model'))
def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
spawn_context = get_context('spawn')
Expand All @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
quant_policy=4,
model_type='vl_model'))
def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
tp_num=2)
Expand Down
2 changes: 0 additions & 2 deletions autotest/tools/pipeline/test_pipeline_chat_turbomind_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def test_pipeline_chat_tp2(config, common_case_config, model, worker_id):
@pytest.mark.parametrize('model', get_all_model_list(tp_num=1, quant_policy=4))
def test_pipeline_chat_kvint4_tp1(config, common_case_config, model,
worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
spawn_context = get_context('spawn')
Expand Down
4 changes: 0 additions & 4 deletions autotest/tools/pipeline/test_pipeline_chat_turbomind_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ def test_pipeline_chat_tp2(config, model, worker_id):
quant_policy=4,
model_type='vl_model'))
def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id)
spawn_context = get_context('spawn')
Expand All @@ -70,8 +68,6 @@ def test_pipeline_chat_kvint4_tp1(config, model, worker_id):
quant_policy=4,
model_type='vl_model'))
def test_pipeline_chat_kvint4_tp2(config, model, worker_id):
if 'Qwen2' in model:
return # kvint4 for qwen2 is not support
if 'gw' in worker_id:
os.environ['CUDA_VISIBLE_DEVICES'] = get_cuda_id_by_workerid(worker_id,
tp_num=2)
Expand Down
3 changes: 1 addition & 2 deletions autotest/tools/restful/test_restful_chat_hf_pytorch_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ def getKvintModelList(tp_num, quant_policy):
'tp_num': tp_num,
'extra': f'--quant-policy {quant_policy}'
} for item in get_torch_model_list(
tp_num, quant_policy=quant_policy, exclude_dup=True)
if 'qwen2' not in item.lower() or quant_policy == 8]
tp_num, quant_policy=quant_policy, exclude_dup=True)]


@pytest.mark.order(7)
Expand Down
3 changes: 1 addition & 2 deletions autotest/tools/restful/test_restful_chat_hf_pytorch_mllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
'tp_num': tp_num,
'extra': f'--quant-policy {quant_policy}'
} for item in get_torch_model_list(
tp_num, quant_policy=quant_policy, model_type='vl_model')
if 'qwen2' not in item.lower() or quant_policy == 8]
tp_num, quant_policy=quant_policy, model_type='vl_model')]


@pytest.mark.order(7)
Expand Down
3 changes: 1 addition & 2 deletions autotest/tools/restful/test_restful_chat_hf_turbomind_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,7 @@ def getKvintModelList(tp_num, quant_policy):
'cuda_prefix': None,
'tp_num': tp_num,
'extra': f'--quant-policy {quant_policy}'
} for item in get_all_model_list(tp_num, quant_policy=quant_policy)
if 'qwen2' not in item.lower() or quant_policy == 8]
} for item in get_all_model_list(tp_num, quant_policy=quant_policy)]


@pytest.mark.order(7)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,7 @@ def getKvintModelList(tp_num, quant_policy: int = None):
'tp_num': tp_num,
'extra': f'--quant-policy {quant_policy}'
} for item in get_all_model_list(
tp_num, quant_policy=quant_policy, model_type='vl_model')
if 'qwen2' not in item.lower() or quant_policy == 8]
tp_num, quant_policy=quant_policy, model_type='vl_model')]


@pytest.mark.order(7)
Expand Down
Loading

0 comments on commit 0b6dd1f

Please sign in to comment.