.github/workflows/pr_ete_test.yml

name: pr_ete_test

on:
  pull_request:
    paths:
      - ".github/workflows/pr_ete_test.yml"
      - "cmake/**"
      - "src/**"
      - "autotest/**"
      - "3rdparty/**"
      - "lmdeploy/**"
      - "requirements/**"
      - "requirements.txt"
      - "CMakeLists.txt"
      - "setup.py"
  workflow_dispatch:

concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true


env:
  HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache
  HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai
  ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
  PYTHONPATH: /nvme/qa_test_models/offline_pkg/LLaVA


jobs:
  pr_functions_test:
    runs-on: [self-hosted, linux-a100-pr]
    timeout-minutes: 120
    env:
      REPORT_DIR: /nvme/qa_test_models/test-reports
    container:
      image: nvidia/cuda:12.4.1-devel-ubuntu22.04
      options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
      volumes:
        - /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
        - /nvme/share_data/github-actions/packages:/root/packages
        - /nvme/qa_test_models:/nvme/qa_test_models
        - /mnt/187:/mnt/187
        - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
    steps:
      - name: Setup systems
        run: |
          apt-get update -y && apt-get install -y software-properties-common wget vim git curl &&\
          curl https://sh.rustup.rs -sSf | sh -s -- -y &&\
          add-apt-repository ppa:deadsnakes/ppa -y && apt-get update -y && apt-get install -y --no-install-recommends \
          ninja-build rapidjson-dev libgoogle-glog-dev gdb python3.10 python3.10-dev python3.10-venv \
          && apt-get clean -y && rm -rf /var/lib/apt/lists/* && cd /opt && python3 -m venv py3
          echo "PATH=/opt/py3/bin:$PATH" >> "$GITHUB_ENV"
      - name: Clone repository
        uses: actions/checkout@v2
      - name: Install pytorch
        run: |
          python3 -m pip cache dir
          python3 -m pip install --upgrade pip setuptools==69.5.1
          python3 -m pip install torch==2.3.0 torchvision==0.18.0
          # the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
          python3 -m pip install /root/packages/flash_attn-2.6.3+cu123torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
      - name: Build lmdeploy
        run: |
          cp /nvme/qa_test_models/offline_pkg/openmpi-4.1.5.tar.gz .
          tar xf openmpi-4.1.5.tar.gz && cd openmpi-4.1.5 && ./configure --prefix=/usr/local/openmpi
          make -j$(nproc) && make install && cd .. && rm -rf openmpi-4.1.5*
          export PATH=$PATH:/usr/local/openmpi/bin
          export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/openmpi/lib
          python3 -m pip install cmake packaging wheel transformers_stream_generator transformers datasets openai einops timm decord
          python3 -m pip install -r requirements.txt -r requirements/test.txt -r requirements/build.txt
          mkdir -p build && cd build &&\
          sh ../generate.sh &&\
          ninja -j$(nproc) && ninja install &&\
          cd .. &&\
          python3 -m pip install -e . &&\
          rm -rf build
      - name: Check env
        run: |
          python3 -m pip list
          lmdeploy check_env
      - name: Test lmdeploy
        run: |
          CUDA_VISIBLE_DEVICES=5,6 pytest autotest -m 'pr_test and gpu_num_2' -x --alluredir=allure-results --clean-alluredir
          CUDA_VISIBLE_DEVICES=5,6 pytest autotest -m 'pr_test and gpu_num_1' -n 2 -x --alluredir=allure-results
      - name: Generate reports
        if: always()
        run: |
          export date_today="$(date +'%Y%m%d-%H%M%S')"
          export report_dir="$REPORT_DIR/$date_today"
          echo "Save report to $report_dir"
          mv allure-results $report_dir
      - name: Clear workfile
        if: always()
        run: |
          export workdir=$(pwd)
          cd ..
          rm -rf $workdir
          mkdir $workdir
          chmod -R 777 $workdir