Skip to content

evaluate

evaluate #40

Workflow file for this run

name: evaluate
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
models:
required: true
description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
type: string
default: '[tb_internlm2_chat_7b, tb_internlm2_chat_7b_4bits, tb_internlm2_chat_7b_kvint4, tb_internlm2_chat_7b_kvint8, pt_internlm2_chat_7b, tb_internlm2_5_7b_chat, tb_internlm2_5_7b_chat_4bits, tb_internlm2_5_7b_chat_kvint4, tb_internlm2_5_7b_chat_kvint8, pt_internlm2_5_7b_chat, tb_internlm2_5_20b_chat, tb_internlm2_5_20b_chat_4bits, tb_internlm2_5_20b_chat_kvint4, tb_internlm2_5_20b_chat_kvint8, pt_internlm2_5_20b_chat, tb_qwen1_5_7b_chat, tb_qwen1_5_7b_chat_4bits, tb_qwen1_5_7b_chat_kvint4, tb_qwen1_5_7b_chat_kvint8, pt_qwen1_5_7b_chat, tb_llama_3d1_8b_instruct, pt_llama_3d1_8b_instruct, tb_llama_3d1_8b_instruct_4bits, tb_llama_3d1_8b_instruct_kvint4, tb_llama_3d1_8b_instruct_kvint8, tb_qwen2_7b_instruct, tb_qwen2_7b_instruct_4bits, tb_qwen2_7b_instruct_kvint8, pt_qwen1_5_moe_2_7b_chat, pt_gemma_2_9b_it]'
datasets:
required: true
description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
type: string
default: '[*mmlu_datasets, *gsm8k_datasets]'
local_config:
required: true
description: 'Whether use local eval config'
type: boolean
default: false
devices:
required: true
description: 'CUDA_VISIBLE_DEVICES.'
type: string
default: '0,1,2,3,4,5,6,7'
dependency_pkgs:
required: true
description: 'Dependency packages, you can also set a specific version'
type: string
default: 'pynvml packaging protobuf transformers_stream_generator transformers human_eval'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
dependency_pkgs: ${{inputs.dependency_pkgs || 'pynvml packaging protobuf transformers_stream_generator transformers human_eval'}}
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
evaluate:
needs: linux-build
runs-on: [self-hosted, linux-a100]
timeout-minutes: 4320 # 72hours
container:
image: nvcr.io/nvidia/tritonserver:24.03-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/github-actions/opencompass-data:/root/opencompass-data
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
- /nvme/qa_test_models:/root/models
- /mnt/shared:/mnt/shared
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
run: |
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')"
echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
- name: Clone repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Download Artifacts
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.2.2 torchvision==0.17.2 --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy - dependency
run: |
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.5.7+cu118torch2.2cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
python3 -m pip install -U 'xformers<=0.0.26' --index-url https://download.pytorch.org/whl/cu118
python3 -m pip install ${{env.dependency_pkgs}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
git clone --depth=1 https://github.com/open-compass/opencompass.git
cd opencompass
python3 -m pip install -e .
python3 -m pip install triton==2.1.0
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Setup paths for evaluation
run: |
ln -s /root/opencompass-data ./data
python3 .github/scripts/action_tools.py create_model_links /root/models .
- name: Use local config
if: ${{inputs.local_config}}
run: cp /nvme/qa_test_models/offline_pkg/eval_config.py .github/scripts/eval_opencompass_config.py
- name: Evaluate models
run: |
echo ${{github.event.inputs.models}}
echo ${{github.event.inputs.devices}}
export LMDEPLOY_DIR=$(pwd)
export CUDA_VISIBLE_DEVICES="${{github.event.inputs.devices}}"
python3 .github/scripts/action_tools.py evaluate \
--models "${{github.event.inputs.models}}" \
--datasets "${{github.event.inputs.datasets}}" \
--workspace /root/evaluation-reports/${{ github.run_id }}
- name: Clear workspace
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir