Skip to content

Commit

Permalink
merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
zhulin1 committed Mar 7, 2024
2 parents 50ca504 + e710c4c commit 60d9bfd
Show file tree
Hide file tree
Showing 25 changed files with 780 additions and 298 deletions.
106 changes: 78 additions & 28 deletions .github/workflows/daily_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,29 @@ name: daily_ete_test

on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
backend:
required: true
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
type: string
default: "['turbomind', 'pytorch']"
model:
required: true
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
type: string
default: "['quantization','convert','pipeline','restful','chat','interface-pipeline']"
schedule:
- cron: '00 18 * * *'
- cron: '00 21 * * *'

env:
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
Expand All @@ -13,7 +34,7 @@ env:
jobs:
test_functions:
runs-on: [self-hosted, linux-a100]
timeout-minutes: 420
timeout-minutes: 240
env:
REPORT_DIR: /nvme/qa_test_models/test-reports
container:
Expand All @@ -23,6 +44,7 @@ jobs:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/bigdisk/qa_test_models:/mnt/bigdisk/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
Expand All @@ -33,7 +55,10 @@ jobs:
dpkg -i /root/packages/allure_2.24.1-1_all.deb
rm -rf /var/lib/apt/lists/*
- name: Clone repository
uses: actions/checkout@v2
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Install pytorch
run: |
python3 -m pip cache dir
Expand Down Expand Up @@ -68,64 +93,89 @@ jobs:
run: |
python3 -m pip list
lmdeploy check_env
rm -rf allure-results
- name: Test lmdeploy - quantization w4a16
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_w4a16.py -m 'not pr_test' -n 8 --alluredir=allure-results --clean-alluredir
- name: Test lmdeploy - quantization kv int8
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_kvint8.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - quantization w8a8
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - quantization kv int8 and w4a16
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization'))
run: |
pytest autotest/tools/quantization/test_quantization_kvint8_w4a16.py -n 8 --alluredir=allure-results
- name: Test lmdeploy - convert
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'convert'))
run: |
pytest autotest/tools/convert -m 'not pr_test' -n 6 --alluredir=allure-results --dist loadgroup
- name: Test lmdeploy - interface turbomind case
pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=allure-results
- name: Test lmdeploy - chat workspace
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat'))
timeout-minutes: 20
run: |
pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline turbomind
continue-on-error: true
timeout-minutes: 45
run: pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline torch
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf turbomind
continue-on-error: true
timeout-minutes: 75
run: pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - restful turbomind
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat'))
timeout-minutes: 20
run: |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf torch
continue-on-error: true
timeout-minutes: 60
run: pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - restful torch
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'chat'))
timeout-minutes: 20
run: |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - pipeline turbomind
continue-on-error: true
timeout-minutes: 80
run: pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - chat workspace
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
timeout-minutes: 25
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful turbomind
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful'))
timeout-minutes: 30
run: |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf turbomind
pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/restful/test_restful_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - interface pipeline turbomind case
continue-on-error: true
timeout-minutes: 45
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'interface-pipeline'))
timeout-minutes: 20
run: |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - chat hf torch
pytest autotest/interface/pipeline/test_pipeline_turbomind_func.py -m 'not pr_test' --alluredir=allure-results
- name: Test lmdeploy - pipeline torch
continue-on-error: true
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline'))
timeout-minutes: 25
run: |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - restful torch
continue-on-error: true
timeout-minutes: 60
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'restful'))
timeout-minutes: 40
run: |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'not pr_test' -n 4 --alluredir=allure-results
pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=allure-results
pytest autotest/tools/restful/test_restful_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=allure-results
- name: Test lmdeploy - rerun all fail cases
timeout-minutes: 60
timeout-minutes: 30
run: |
pytest autotest --lf --alluredir=allure-results
- name: Generate reports
Expand Down
3 changes: 2 additions & 1 deletion .github/workflows/pr_ete_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ jobs:
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
- /nvme/share_data/github-actions/packages:/root/packages
- /nvme/qa_test_models:/nvme/qa_test_models
- /mnt/bigdisk/qa_test_models:/mnt/bigdisk/qa_test_models
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
Expand Down Expand Up @@ -81,7 +82,7 @@ jobs:
lmdeploy check_env
- name: Test lmdeploy
timeout-minutes: 120
run: CUDA_VISIBLE_DEVICES=5,6 pytest autotest -m pr_test --alluredir=allure-results --clean-alluredir
run: CUDA_VISIBLE_DEVICES=5,6 pytest autotest -m pr_test -v -s --alluredir=allure-results --clean-alluredir
- name: Generate reports
if: always()
run: |
Expand Down
112 changes: 57 additions & 55 deletions autotest/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
model_path: /nvme/qa_test_models
model_path: /mnt/bigdisk/qa_test_models
dst_path: /nvme/qa_test_models/autotest_model
log_path: /nvme/qa_test_models/autotest_model/log
dataset_path: /nvme/qa_test_models/...dataset
Expand All @@ -13,67 +13,69 @@ tp_config:


turbomind_model:
- llama-2-7b-chat
- internlm2-chat-1_8b
- internlm-chat-7b
- internlm-chat-20b
- internlm2-chat-7b
- internlm2-chat-20b
- Qwen-7B-Chat
- Qwen-14B-Chat
- llama2-chat-7b-w4
- Baichuan2-7B-Chat
- Yi-6B-Chat
- internlm2-1_8b
- internlm2-20b
- CodeLlama-7b-Instruct-hf
- meta-llama/Llama-2-7b-chat
- internlm/internlm2-chat-1_8b
- internlm/internlm-chat-7b
- internlm/internlm-chat-20b
- internlm/internlm2-chat-7b
- internlm/internlm2-chat-20b
- internlm/internlm2-chat-7b-4bits
- internlm/internlm2-chat-20b-4bits
- Qwen/Qwen-7B-Chat
- Qwen/Qwen-14B-Chat
- lmdeploy/llama2-chat-7b-w4
- baichuan-inc/Baichuan2-7B-Chat
- 01-ai/Yi-6B-Chat
- internlm/internlm2-1_8b
- internlm/internlm2-20b
- codellama/CodeLlama-7b-Instruct-hf


pytorch_model:
- llama-2-7b-chat
- internlm-chat-7b
- internlm-chat-20b
- internlm2-chat-7b
- internlm2-chat-20b
- Baichuan2-7B-Chat
- Baichuan2-13B-Chat
- chatglm2-6b
- falcon-7b
- Yi-6B-Chat
- internlm2-1_8b
- internlm2-20b
- Qwen1.5-7B-Chat
- Mistral-7B-Instruct-v0.1
- Mixtral-8x7B-Instruct-v0.1
- gemma-7b-it
- deepseek-moe-16b-chat
- meta-llama/Llama-2-7b-chat
- internlm/internlm-chat-7b
- internlm/internlm-chat-20b
- internlm/internlm2-chat-7b
- internlm/internlm2-chat-20b
- baichuan-inc/Baichuan2-7B-Chat
- baichuan-inc/Baichuan2-13B-Chat
- THUDM/chatglm2-6b
- tiiuae/falcon-7b
- 01-ai/Yi-6B-Chat
- internlm/internlm2-1_8b
- internlm/internlm2-20b
- Qwen/Qwen1.5-7B-Chat
- mistralai/Mistral-7B-Instruct-v0.1
- mistralai/Mixtral-8x7B-Instruct-v0.1
- google/gemma-7b-it
- deepseek-ai/deepseek-moe-16b-chat


quatization_case_config:
w4a16:
- llama-2-7b-chat
- internlm-chat-20b
- Qwen-7B-Chat
- Qwen-14B-Chat
- internlm2-chat-20b
- Baichuan2-7B-Chat
- internlm2-20b
- meta-llama/Llama-2-7b-chat
- internlm/internlm-chat-20b
- Qwen/Qwen-7B-Chat
- Qwen/Qwen-14B-Chat
- internlm/internlm2-chat-20b
- baichuan-inc/Baichuan2-7B-Chat
- internlm/internlm2-20b
kvint8: # more models are supported kvint8 quantization, but the chat response are not good, already removed
- llama-2-7b-chat
- internlm-chat-20b
- internlm2-chat-20b
- meta-llama/Llama-2-7b-chat
- internlm/internlm-chat-20b
- internlm/internlm2-chat-20b
kvint8_w4a16:
- llama-2-7b-chat
- internlm-chat-20b
- internlm2-chat-20b
- internlm2-20b
- Qwen-7B-Chat
- Qwen-14B-Chat
- Baichuan2-7B-Chat
- meta-llama/Llama-2-7b-chat
- internlm/internlm-chat-20b
- internlm/internlm2-chat-20b
- internlm/internlm2-20b
- Qwen/Qwen-7B-Chat
- Qwen/Qwen-14B-Chat
- baichuan-inc/Baichuan2-7B-Chat
w8a8:
- llama-2-7b-chat
- internlm-chat-20b
- internlm2-chat-20b
- internlm2-chat-7b
- Yi-6B-Chat
- internlm2-20b
- meta-llama/Llama-2-7b-chat
- internlm/internlm-chat-20b
- internlm/internlm2-chat-20b
- internlm/internlm2-chat-7b
- 01-ai/Yi-6B-Chat
- internlm/internlm2-20b
16 changes: 8 additions & 8 deletions autotest/interface/pipeline/test_pipeline_turbomind_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,15 @@
@pytest.mark.flaky(reruns=0)
class TestPipelineTurbomindFuncRegression:

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_backend_config_tp(self, config, model):
with pytest.raises(AssertionError, match='tp should be 2\\^n'):
model_path = '/'.join([config.get('model_path'), model])
backend_config = TurbomindEngineConfig(tp=100)
pipe = pipeline(model_path, backend_config=backend_config)
del pipe

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_backend_config_session_len(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
backend_config = TurbomindEngineConfig(session_len=10)
Expand All @@ -29,7 +29,7 @@ def test_backend_config_session_len(self, config, model):
assert response[i].finish_reason == 'length', str(response[i])
assert response[i].generate_token_len == 0, str(response[i])

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_gen_config_test(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
pipe = pipeline(model_path)
Expand Down Expand Up @@ -111,7 +111,7 @@ def test_gen_config_test(self, config, model):

del pipe

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def future_test_backend_config_cache_max_entry_count(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
backend_config = TurbomindEngineConfig(cache_max_entry_count=-1)
Expand All @@ -122,7 +122,7 @@ def future_test_backend_config_cache_max_entry_count(self, config, model):
with assume:
assert response[i].finish_reason == 'length', str(response[i])

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_backend_config_max_batch_size2(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
backend_config = TurbomindEngineConfig(max_batch_size=-1)
Expand All @@ -140,7 +140,7 @@ def test_backend_config_max_batch_size2(self, config, model):
with assume:
assert response[i].text == '', str(response[i])

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_pipeline_batch_infer(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
pipe = pipeline(model_path)
Expand All @@ -160,7 +160,7 @@ def test_pipeline_batch_infer(self, config, model):
with assume:
assert response[i].session_id == i

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_pipeline_stream_infer(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
pipe = pipeline(model_path)
Expand Down Expand Up @@ -207,7 +207,7 @@ def test_pipeline_stream_infer(self, config, model):
with assume:
assert outputs_list[-1].finish_reason is not None, str(output)

@pytest.mark.parametrize('model', ['internlm2-chat-20b'])
@pytest.mark.parametrize('model', ['internlm/internlm2-chat-20b'])
def test_pipeline_stream_infer2(self, config, model):
model_path = '/'.join([config.get('model_path'), model])
pipe = pipeline(model_path)
Expand Down
Loading

0 comments on commit 60d9bfd

Please sign in to comment.