.github/workflows/daily_ete_test.yml

name: daily_ete_test

on:
  workflow_dispatch:
    inputs:
      repo_org:
        required: false
        description: 'Tested repository organization name. Default is InternLM'
        type: string
        default: 'InternLM/lmdeploy'
      repo_ref:
        required: false
        description: 'Set branch or tag or commit id. Default is "main"'
        type: string
        default: 'main'
      backend:
        required: true
        description: 'Set backend testcase filter: turbomind or pytorch or turbomind, pytorch. Default is "["turbomind", "pytorch"]"'
        type: string
        default: "['turbomind', 'pytorch', 'turbomind_vl']"
      model:
        required: true
        description: 'Set testcase module filter: chat, restful, pipeline, quantization. Default contains all models'
        type: string
        default: "['pipeline','restful','chat']"
      offline_mode:
        required: true
        description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'
        type: boolean
        default: false
      regression_func:
        required: true
        description: 'regression functions'
        type: string
        default: "['quant', 'tools','restful','pipeline','benchmark','evaluation']"
  schedule:
    - cron:  '00 16 * * 0-4'

env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  REPORT_DIR: /nvme/qa_test_models/test-reports/${{ github.run_id }}
  COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy
  FAIL_CONFIG: ${{ github.event_name == 'schedule' && github.run_attempt != 1 && '--lf --lfnf none' || '--lf'}}
  TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy
  OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy
  OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt
  DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL

jobs:
  linux-build:
    if: ${{!cancelled() && (github.event_name == 'schedule' || !inputs.offline_mode)}}
    strategy:
      matrix:
        pyver: [py310]
    runs-on: ubuntu-latest
    env:
      PYTHON_VERSION: ${{ matrix.pyver }}
      PLAT_NAME: manylinux2014_x86_64
      DOCKER_TAG: cuda11.8
    steps:
      - name: Checkout repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Build
        run: |
          echo ${PYTHON_VERSION}
          echo ${PLAT_NAME}
          echo ${DOCKER_TAG}
          echo ${OUTPUT_FOLDER}
          echo ${GITHUB_RUN_ID}
          # remove -it
          sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
          bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
      - name: Upload Artifacts
        uses: actions/upload-artifact@v4
        with:
          if-no-files-found: error
          path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
          retention-days: 1
          name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}


  download_pkgs:
    needs: linux-build
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 50
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Clone repository
        uses: actions/checkout@v2
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Copy repository
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r . ${{env.TEST_CODE_PATH}}
      - name: Copy repository - offline
        if: ${{inputs.offline_mode}}
        run: rm -rf ${{env.TEST_CODE_PATH}} && mkdir ${{env.TEST_CODE_PATH}} && cp -r ${{env.OFFLINE_CODE_PATH}}/. ${{env.TEST_CODE_PATH}}
      - name: Download Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        uses: actions/download-artifact@v4
        with:
          name: my-artifact-${{ github.run_id }}-py310
      - name: Copy Artifacts
        if: ${{github.event_name == 'schedule' || !inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp lmdeploy-*.whl ${{env.TEST_CODE_PATH}}
      - name: Copy Artifacts - offline
        if: ${{inputs.offline_mode}}
        run: rm ${{env.TEST_CODE_PATH}}/lmdeploy-*.whl -f && cp ${{env.OFFLINE_CODE_PATH}}/lmdeploy-*.whl ${{env.TEST_CODE_PATH}}

  test_quantization:
    needs: download_pkgs
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'quant') )}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 120
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /root/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/modelscope_hub:/root/modelscope_hub
        - /nvme/github-actions/modelscope_modules:/root/modelscope_modules
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/shared:/mnt/shared
        - /nvme/qa_test_models/lmdeploy/autotest:/local_case
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Check env
        run: |
          pip install transformers
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - quantization w4a16
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
        run: |
          pytest autotest/tools/quantization/test_quantization_awq.py -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} --clean-alluredir ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - quantization w8a8
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'pytorch')
        run: |
          pytest autotest/tools/quantization/test_quantization_w8a8.py -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - convert
        continue-on-error: true
        if: github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.backend), 'turbomind')
        run: |
          pytest autotest/tools/convert -m 'not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_tools:
    needs: test_quantization
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'tools'))}}
    runs-on: [self-hosted, linux-a100]
    timeout-minutes: 150
    strategy:
      fail-fast: false
      matrix:
        backend: ${{ fromJSON(inputs.backend) || fromJSON('["turbomind", "pytorch", "turbomind_vl"]')}}
        model: ${{ fromJSON(inputs.model) || fromJSON('["pipeline","restful","chat"]')}}
        exclude:
          - backend: turbomind_vl
            model: chat
        include:
          - backend: turbomind
            model: local_case
    env:
      PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA
      MODELSCOPE_CACHE: /root/modelscope_hub
      MODELSCOPE_MODULES_CACHE: /root/modelscope_modules
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/modelscope_hub:/root/modelscope_hub
        - /nvme/github-actions/modelscope_modules:/root/modelscope_modules
        - /nvme/github-actions/resources/lora:/root/lora
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/shared:/mnt/shared
        - /nvme/qa_test_models/lmdeploy/autotest:/local_case
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Check env
        run: |
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          cp -r /root/lora .
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - chat workspace
        continue-on-error: true
        if: matrix.backend == 'turbomind' && matrix.model == 'chat'
        run: |
          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/chat/test_command_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - chat
        continue-on-error: true
        if: (matrix.backend == 'pytorch' || matrix.backend == 'turbomind') && matrix.model == 'chat'
        run: |
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/chat/test_command_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - pipeline
        continue-on-error: true
        if: matrix.model == 'pipeline'
        run: |
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/pipeline/test_pipeline_chat_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - restful
        continue-on-error: true
        if: matrix.model == 'restful'
        run: |
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/restful/test_restful_chat_hf_${{matrix.backend}}.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - restful workspace
        continue-on-error: true
        if: matrix.backend == 'turbomind' && matrix.model == 'restful'
        run: |
          pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/tools/restful/test_restful_chat_workspace.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Test lmdeploy - local testcase
        if: matrix.backend == 'turbomind' && matrix.model == 'local_case'
        run: |
          pytest /local_case/issue_regression --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}}|| true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_restful:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'restful'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    strategy:
      fail-fast: false
      matrix:
        backend: ['turbomind', 'pytorch']
    timeout-minutes: 60
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Check env
        run: |
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Start restful api turbomind
        if: matrix.backend == 'turbomind'
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 > restful.log  2>&1  &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          sleep 120s
      - name: Start restful api pytorch
        if: matrix.backend == 'pytorch'
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b-chat --tp 2 --backend pytorch > restful.log  2>&1  &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          sleep 180s
      - name: Test lmdeploy - restful api
        timeout-minutes: 75
        run: |
          pytest autotest/interface/restful/test_restful_chat_func.py -n 20 -m 'not not_${{matrix.backend}}' --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          kill -15 "$restful_pid"
      - name: Start restful api turbomind - base
        if: matrix.backend == 'turbomind'
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 > restful.log  2>&1  &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          sleep 120s
      - name: Start restful api pytorch - base
        if: matrix.backend == 'pytorch'
        run: |
          CUDA_VISIBLE_DEVICES=6,7 lmdeploy serve api_server /nvme/qa_test_models/internlm/internlm2_5-20b --tp 2 --backend pytorch > restful.log  2>&1  &
          echo "restful_pid=$!" >> "$GITHUB_ENV"
          sleep 180s
      - name: Test lmdeploy - restful api - base
        timeout-minutes: 40
        run: |
          pytest autotest/interface/restful/test_restful_completions_v1.py -n 20 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Kill api server
        if: always()
        run: |
          kill -15 "$restful_pid"
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_pipeline:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'pipeline'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 120
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Check env
        run: |
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test lmdeploy - interface pipeline case
        run: |
          pytest autotest/interface/pipeline/test_pipeline_func.py -m 'not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_1 and not pr_test' -n 8 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_2 and not pr_test' -n 4 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S') || true
          pytest autotest/interface/pipeline/test_pipeline_longtext_func.py -m 'gpu_num_4 and not pr_test' -n 2 --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir


  test_benchmark:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 120
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Check env
        run: |
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Test benchmark script
        run: |
          pytest autotest/benchmark -n 4 --run_id ${{ github.run_id }} -m function ${{env.FAIL_CONFIG}} --alluredir=${{env.REPORT_DIR}} ${{env.COV_PARAM}} || true
          mv .coverage ${{env.REPORT_DIR}}/.coverage.$(date +'%Y%m%d%H%M%S')
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          chmod -R 777 /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  test_evaluation:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'evaluation'))}}
    runs-on: [self-hosted, linux-a100]
    needs: test_quantization
    timeout-minutes: 120 # 2hours
    strategy:
      fail-fast: false
      matrix:
        evaluate_type: ['chat', 'base']
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/github-actions/resources:/root/resources
        - /nvme/github-actions/opencompass-data:/root/opencompass-data
        - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/shared:/mnt/shared
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy - dependency
        run: |
          # manually install flash attn
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
          python3 -m pip install -e /root/packages/AutoAWQ_kernels
          python3 -m pip install /root/packages/autoawq-0.2.6-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
          python3 -m pip install -r ${{env.OFFLINE_REQUIREMENTS}}
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
          pip install ${{env.DEEPSEEK_VL}} --no-deps
      - name: Install opencompass
        run: |
          git clone --depth=1 https://github.com/open-compass/opencompass.git
          cd opencompass
          python3 -m pip install -e .
          echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
      - name: Check env
        run: |
          pip uninstall -y nvidia-nccl-cu11
          python3 -m pip list
          lmdeploy check_env
          rm -rf allure-results
          # remove tmp log in testcase
          rm -rf /nvme/qa_test_models/autotest_model/log/*
          mkdir ${{env.REPORT_DIR}}/.pytest_cache -p
          ln -s ${{env.REPORT_DIR}}/.pytest_cache autotest
      - name: Setup paths for evaluation
        run: |
          ln -s /root/opencompass-data ./data
          python3 .github/scripts/action_tools.py create_model_links /nvme/qa_test_models .
      - name: Evaluate models
        if: matrix.evaluate_type == 'chat'
        run: |
          export LMDEPLOY_DIR=$(pwd)

          python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b_chat, turbomind_internlm2_5_7b_chat_4bits, turbomind_internlm2_5_7b_chat_kvint4, pt_internlm2_5_7b_chat, turbomind_internlm2_5_20b_chat, turbomind_internlm2_5_20b_chat_4bits, turbomind_internlm2_5_20b_chat_kvint4, pt_internlm2_5_20b_chat, turbomind_llama_3d1_8b_instruct, pt_llama_3d1_8b_instruct, turbomind_llama_3d1_8b_instruct_4bits, turbomind_llama_3d1_8b_instruct_kvint4, turbomind_qwen2_7b_instruct, turbomind_qwen2_7b_instruct_4bits, pt_qwen1_5_moe_2_7b_chat, pt_gemma_2_9b_it]" "[*race_datasets, *gsm8k_datasets]" /root/evaluation-reports/${{ github.run_id }} chat true
      - name: Evaluate base models
        if: matrix.evaluate_type == 'base'
        run: |
          export LMDEPLOY_DIR=$(pwd)

          python3 .github/scripts/action_tools.py evaluate "[turbomind_internlm2_5_7b, turbomind_qwen2_7b, turbomind_internlm2_5_7b_batch1]" "[*mmlu_datasets, *gsm8k_datasets]" /root/evaluation-reports/${{ github.run_id }} base true
      - name: Clear workspace
        if: always()
        run: |
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir


  get_benchmark_result:
    if: ${{!cancelled() && (github.event_name == 'schedule' || contains(fromJSON(github.event.inputs.regression_func), 'benchmark'))}}
    needs: [test_benchmark]
    timeout-minutes: 5
    runs-on: [self-hosted, linux-a100]
    env:
      BENCHMARK_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}
    steps:
      - name: Clone repository
        uses: actions/checkout@v3
        with:
          repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
          ref: ${{github.event.inputs.repo_ref || 'main'}}
      - name: Get overview
        run: |
          pip install pandas fire mmengine
          python3 .github/scripts/action_tools.py generate_benchmark_report $BENCHMARK_REPORT_DIR


  get_coverage_report:
    if: ${{!cancelled()}}
    runs-on: [self-hosted, linux-a100]
    needs: [test_tools, test_restful, test_pipeline, test_benchmark]
    timeout-minutes: 5
    container:
      image: openmmlab/lmdeploy:latest-cu11
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"
      volumes:
        - /nvme/github-actions/pip-cache:/root/.cache/pip
        - /nvme/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Copy repository and Artifacts
        run: cp -r ${{env.TEST_CODE_PATH}}/. .
      - name: Install lmdeploy
        run: |
          python3 -m pip install lmdeploy-*.whl --no-deps
          python3 -m pip install -r requirements/test.txt
      - name: Get coverage report
        run: |
          pip install coverage
          coverage combine ${{env.REPORT_DIR}}
          coverage xml -o ${{env.REPORT_DIR}}/coverage.xml
          coverage report -m
          mv .coverage ${{env.REPORT_DIR}}/.coverage
      - name: Clear workfile
        if: always()
        run: |
          chmod -R 777 $REPORT_DIR
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir

  notify_to_feishu:
    if: always() && !cancelled() && (github.ref_name == 'develop' || github.ref_name == 'main')
    needs: [get_benchmark_result, get_coverage_report, test_evaluation]
    timeout-minutes: 5
    runs-on: [self-hosted, linux-a100]
    steps:
      - name: notify
        if: contains(needs.*.result, 'failure')
        run: |
          curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"Lmdeploy- Daily test finished！！！","content":[[{"tag":"text","text":"branch: ${{github.ref_name}}, run action: ${{github.workflow}} failed. "},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.FEISHU_USER_ID }}'"}]]}}}}'  ${{ secrets.FEISHU_WEBHOOK_URL }}