daily_ete_test #345
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: daily_ete_test | |
on: | |
workflow_dispatch: | |
inputs: | |
repo_org: | |
required: false | |
description: 'Tested repository organization name. Default is InternLM' | |
type: string | |
default: 'InternLM/lmdeploy' | |
repo_ref: | |
required: false | |
description: 'Set branch or tag or commit id. Default is "main"' | |
type: string | |
default: 'main' | |
backend: | |
required: true | |
description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"' | |
type: string | |
default: "['turbomind', 'pytorch', 'turbomind-vl']" | |
model: | |
required: true | |
description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models' | |
type: string | |
default: "['quantization','convert','pipeline','restful','chat','local_case']" | |
offline_mode: | |
required: true | |
description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself' | |
type: boolean | |
default: false | |
dependency_pkgs: | |
required: true | |
description: 'Dependency packages, you can also set a specific version' | |
type: string | |
default: 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath decord auto_gptq qwen_vl_utils mmengine-lite==0.10.5' | |
regression_func: | |
required: true | |
description: 'regression functions' | |
type: string | |
default: "['tools','restful','pipeline','benchmark','evaluation']" | |
schedule: | |
- cron: '00 16 * * 0-4' | |
env: | |
HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache | |
dependency_pkgs: ${{inputs.dependency_pkgs || 'packaging transformers_stream_generator transformers datasets matplotlib openai attrdict timm modelscope jmespath decord auto_gptq qwen_vl_utils mmengine-lite==0.10.5'}} | |
HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai | |
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }} | |
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }} | |
COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy | |
FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}} | |
jobs: | |
linux-build: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}} | |
strategy: | |
matrix: | |
pyver: [py310] | |
runs-on: ubuntu-latest | |
env: | |
PYTHON_VERSION: ${{ matrix.pyver }} | |
PLAT_NAME: manylinux2014_x86_64 | |
DOCKER_TAG: cuda11.8 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Build | |
run: | | |
echo ${PYTHON_VERSION} | |
echo ${PLAT_NAME} | |
echo ${DOCKER_TAG} | |
echo ${OUTPUT_FOLDER} | |
echo ${GITHUB_RUN_ID} | |
# remove -it | |
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh | |
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} | |
- name: Upload Artifacts | |
uses: actions/upload-artifact@v4 | |
with: | |
if-no-files-found: error | |
path: builder/manywheel/${{ env.OUTPUT_FOLDER }} | |
retention-days: 1 | |
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }} | |
test_tools: | |
needs: linux-build | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}} | |
runs-on: [self-hosted, linux-a100] | |
timeout-minutes: 450 | |
env: | |
PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA | |
MODELSCOPE_CACHE: /root/modelscope_hub | |
MODELSCOPE_MODULES_CACHE: /root/modelscope_modules | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/modelscope_hub:/root/modelscope_hub | |
- /nvme/github-actions/modelscope_modules:/root/modelscope_modules | |
- /nvme/github-actions/resources/lora:/root/lora | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /mnt/shared:/mnt/shared | |
- /nvme/qa_test_models/lmdeploy/autotest:/local_case | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install -e /root/packages/AutoAWQ_kernels | |
python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install ${{env.dependency_pkgs}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
pip install /nvme/qa_test_models/offline_pkg/DeepSeek-VL --no-deps | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
cp -r /root/lora . | |
rm -rf allure-results | |
# remove tmp log in testcase | |
rm -rf /nvme/qa_test_models/autotest_model/log/* | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - quantization w4a16 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'quantization')) | |
run: | | |
pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - quantization w8a8 | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'quantization')) | |
run: | | |
pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - convert | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'convert')) | |
run: | | |
pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - chat workspace | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - chat hf turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/chat/test_command_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - chat hf torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'chat')) | |
run: | | |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/chat/test_command_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - pipeline turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - pipeline turbomind vl | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful turbomind | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful workspace | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful turbomind vl | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'turbomind-vl') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_hf_turbomind_vl.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - pipeline torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'pipeline')) | |
run: | | |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/pipeline/test_pipeline_chat_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - restful torch | |
continue-on-error: true | |
if: github.event_name == 'schedule' || (contains(fromJSON(github.event.inputs.backend), 'pytorch') && contains(fromJSON(github.event.inputs.model), 'restful')) | |
run: | | |
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/tools/restful/test_restful_chat_hf_pytorch.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Test lmdeploy - local testcase | |
if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.model), 'local_case') | |
run: | | |
pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_restful: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: ['turbomind', 'pytorch'] | |
timeout-minutes: 300 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install ${{env.dependency_pkgs}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Start restful api turbomind | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api | |
timeout-minutes: 75 | |
run: | | |
pytest autotest/interface/restful/test_restful_chat_func.py -n 20 -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Start restful api turbomind - base | |
if: matrix.backend == 'turbomind' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 120s | |
- name: Start restful api pytorch - base | |
if: matrix.backend == 'pytorch' | |
run: | | |
CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 --backend pytorch > restful.log 2>&1 & | |
echo "restful_pid=$!" >> "$GITHUB_ENV" | |
sleep 180s | |
- name: Test lmdeploy - restful api - base | |
timeout-minutes: 40 | |
run: | | |
pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Kill api server | |
if: always() | |
run: | | |
kill -15 "$restful_pid" | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_pipeline: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
timeout-minutes: 300 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install ${{env.dependency_pkgs}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
rm -rf allure-results | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test lmdeploy - interface pipeline case | |
run: | | |
pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true | |
pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_benchmark: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
timeout-minutes: 300 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install ${{env.dependency_pkgs}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Test benchmark script | |
run: | | |
pytest autotest/benchmark -n 4 --run_id ${{ github.run_id }} -m function ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true | |
mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
chmod -R 777 /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
test_evaluation: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'evaluation'))}} | |
runs-on: [self-hosted, linux-a100] | |
needs: test_tools | |
timeout-minutes: 300 # 5hours | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/github-actions/resources:/root/resources | |
- /nvme/github-actions/opencompass-data:/root/opencompass-data | |
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports | |
- /nvme/qa_test_models:/root/models | |
- /nvme/qa_test_models/offline_pkg:/nvme/qa_test_models/offline_pkg | |
- /mnt/shared:/mnt/shared | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Setup systems | |
run: | | |
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')" | |
echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy - dependency | |
run: | | |
# manually install flash attn | |
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases | |
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl | |
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps | |
python3 -m pip install ${{env.dependency_pkgs}} | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install opencompass | |
run: | | |
git clone --depth=1 https://github.com/open-compass/opencompass.git | |
cd opencompass | |
python3 -m pip install -e . | |
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV | |
- name: Check env | |
run: | | |
pip uninstall -y nvidia-nccl-cu11 | |
python3 -m pip list | |
lmdeploy check_env | |
mkdir ${{env.REPORT_DIR}}/.pytest_cache -p | |
ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest | |
- name: Setup paths for evaluation | |
run: | | |
ln -s /root/opencompass-data ./data | |
python3 .github/scripts/action_tools.py create_model_links /root/models . | |
- name: Evaluate models | |
run: | | |
export LMDEPLOY_DIR=$(pwd) | |
python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, pt_internlm2_5_7b_chat, turbomind_internlm2_5_20b_chat, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, pt_internlm2_5_20b_chat, turbomind_llama_3d1_8b_instruct, pt_llama_3d1_8b_instruct, turbomind_llama_3d1_8b_instruct_4bits, turbomind_llama_3d1_8b_instruct_kvint4, turbomind_qwen2_7b_instruct, turbomind_qwen2_7b_instruct_4bits, pt_qwen1_5_moe_2_7b_chat, pt_gemma_2_9b_it]" "[*race_datasets, *gsm8k_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true | |
- name: Clear workspace | |
if: always() | |
run: | | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
get_benchmark_result: | |
if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}} | |
needs: [test_benchmark] | |
timeout-minutes: 5 | |
runs-on: [self-hosted, linux-a100] | |
env: | |
BENCHMARK_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }} | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v3 | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Get overview | |
run: | | |
pip install pandas fire mmengine | |
python3 .github/scripts/action_tools.py generate_benchmark_report $BENCHMARK_REPORT_DIR | |
get_coverage_report: | |
if: ${{!cancelled()}} | |
runs-on: [self-hosted, linux-a100] | |
needs: [test_tools, test_restful, test_pipeline, test_benchmark] | |
timeout-minutes: 5 | |
container: | |
image: openmmlab/lmdeploy:latest-cu11 | |
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never" | |
volumes: | |
- /nvme/github-actions/pip-cache:/root/.cache/pip | |
- /nvme/github-actions/packages:/root/packages | |
- /nvme/qa_test_models:/nvme/qa_test_models | |
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro | |
steps: | |
- name: Clone repository | |
uses: actions/checkout@v2 | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
with: | |
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }} | |
ref: ${{github.event.inputs.repo_ref || 'main'}} | |
- name: Copy repository - offline | |
if: ${{inputs.offline_mode}} | |
run: cp -r /nvme/qa_test_models/offline_pkg/lmdeploy/. . | |
- name: Download Artifacts | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
uses: actions/download-artifact@v4 | |
with: | |
name: my-artifact-${{ github.run_id }}-py310 | |
- name: Install lmdeploy | |
if: ${{github.event_name == 'schedule' || !inputs.offline_mode}} | |
run: | | |
python3 -m pip install lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Install lmdeploy - offline | |
if: ${{inputs.offline_mode}} | |
run: | | |
python3 -m pip install /nvme/qa_test_models/offline_pkg/py310/lmdeploy-*.whl --no-deps | |
python3 -m pip install -r requirements/test.txt | |
- name: Get coverage report | |
run: | | |
pip install coverage | |
coverage combine ${{env.REPORT_DIR}} | |
coverage xml -o ${{env.REPORT_DIR}}/coverage.xml | |
coverage report -m | |
mv .coverage ${{env.REPORT_DIR}}/.coverage | |
- name: Clear workfile | |
if: always() | |
run: | | |
chmod -R 777 $REPORT_DIR | |
export workdir=$(pwd) | |
cd .. | |
rm -rf $workdir | |
mkdir $workdir | |
chmod -R 777 $workdir | |
notify_to_feishu: | |
if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main') | |
needs: [get_benchmark_result, get_coverage_report, test_evaluation] | |
timeout-minutes: 5 | |
runs-on: [self-hosted, linux-a100] | |
steps: | |
- name: notify | |
if: contains(needs.*.result, 'failure') | |
run: | | |
curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test finished!!!","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}' ${{ secrets.FEISHU_WEBHOOK_URL }} |