-
Notifications
You must be signed in to change notification settings - Fork 442
164 lines (158 loc) · 7.25 KB
/
evaluate.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
name: evaluate
on:
workflow_dispatch:
inputs:
repo_org:
required: false
description: 'Tested repository organization name. Default is InternLM/lmdeploy'
type: string
default: 'InternLM/lmdeploy'
repo_ref:
required: false
description: 'Set branch or tag or commit id. Default is "main"'
type: string
default: 'main'
models:
required: true
description: 'Tested TurboMind models list. eg. [internlm_chat_7b,internlm_chat_7b_w8a16]'
type: string
default: '[tb_internlm2_chat_7b, tb_internlm2_chat_7b_4bits, tb_internlm2_chat_7b_kvint4, tb_internlm2_chat_7b_kvint8, pt_internlm2_chat_7b, tb_internlm2_5_7b_chat, tb_internlm2_5_7b_chat_4bits, tb_internlm2_5_7b_chat_kvint4, tb_internlm2_5_7b_chat_kvint8, pt_internlm2_5_7b_chat, tb_internlm2_5_20b_chat, tb_internlm2_5_20b_chat_4bits, tb_internlm2_5_20b_chat_kvint4, tb_internlm2_5_20b_chat_kvint8, pt_internlm2_5_20b_chat, tb_qwen1_5_7b_chat, tb_qwen1_5_7b_chat_4bits, tb_qwen1_5_7b_chat_kvint4, tb_qwen1_5_7b_chat_kvint8, pt_qwen1_5_7b_chat, tb_llama_3d1_8b_instruct, pt_llama_3d1_8b_instruct, tb_llama_3d1_8b_instruct_4bits, tb_llama_3d1_8b_instruct_kvint4, tb_llama_3d1_8b_instruct_kvint8, tb_qwen2_7b_instruct, tb_qwen2_7b_instruct_4bits, tb_qwen2_7b_instruct_kvint8, pt_qwen1_5_moe_2_7b_chat, pt_gemma_2_9b_it]'
datasets:
required: true
description: 'Tested datasets list. eg. [*bbh_datasets,*ceval_datasets,*cmmlu_datasets,*GaokaoBench_datasets,*gpqa_datasets,*gsm8k_datasets,*hellaswag_datasets,*humaneval_datasets,*ifeval_datasets,*math_datasets,*sanitized_mbpp_datasets,*mmlu_datasets,*nq_datasets,*race_datasets,*TheoremQA_datasets,*triviaqa_datasets,*winogrande_datasets,*crowspairs_datasets]'
type: string
default: '[*mmlu_datasets, *gsm8k_datasets]'
local_config:
required: true
description: 'Whether use local eval config'
type: boolean
default: false
devices:
required: true
description: 'CUDA_VISIBLE_DEVICES.'
type: string
default: '0,1,2,3,4,5,6,7'
dependency_pkgs:
required: true
description: 'Dependency packages, you can also set a specific version'
type: string
default: 'pynvml packaging protobuf transformers_stream_generator transformers human_eval'
env:
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
dependency_pkgs: ${{inputs.dependency_pkgs || 'pynvml packaging protobuf transformers_stream_generator transformers human_eval'}}
jobs:
linux-build:
if: ${{github.event_name == 'schedule' || (!cancelled() && !inputs.offline_mode)}}
strategy:
matrix:
pyver: [py310]
runs-on: ubuntu-latest
env:
PYTHON_VERSION: ${{ matrix.pyver }}
PLAT_NAME: manylinux2014_x86_64
DOCKER_TAG: cuda11.8
OUTPUT_FOLDER: cuda11.8_dist_${{ github.run_id }}
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Build
run: |
echo ${PYTHON_VERSION}
echo ${PLAT_NAME}
echo ${DOCKER_TAG}
echo ${OUTPUT_FOLDER}
echo ${GITHUB_RUN_ID}
# remove -it
sed -i 's/docker run --rm -it/docker run --rm/g' builder/manywheel/build_wheel.sh
bash builder/manywheel/build_wheel.sh ${PYTHON_VERSION} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER}
- name: Upload Artifacts
uses: actions/upload-artifact@v4
with:
if-no-files-found: error
path: builder/manywheel/${{ env.OUTPUT_FOLDER }}
retention-days: 1
name: my-artifact-${{ github.run_id }}-${{ matrix.pyver }}
evaluate:
needs: linux-build
runs-on: [self-hosted, linux-a100]
timeout-minutes: 4320 # 72hours
container:
image: nvcr.io/nvidia/tritonserver:24.03-py3
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip --pull never"
volumes:
- /nvme/github-actions/pip-cache:/root/.cache/pip
- /nvme/github-actions/packages:/root/packages
- /nvme/github-actions/resources:/root/resources
- /nvme/github-actions/opencompass-data:/root/opencompass-data
- /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports
- /nvme/qa_test_models:/root/models
- /mnt/shared:/mnt/shared
- /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro
steps:
- name: Setup systems
run: |
export TIME_STAMP="$(date +'%Y%m%d-%H%M%S')"
echo "TIME_STAMP=$TIME_STAMP" >> $GITHUB_ENV
- name: Clone repository
uses: actions/checkout@v3
with:
repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}
ref: ${{github.event.inputs.repo_ref || 'main'}}
- name: Download Artifacts
uses: actions/download-artifact@v4
with:
name: my-artifact-${{ github.run_id }}-py310
- name: Install pytorch
run: |
python3 -m pip cache dir
python3 -m pip install torch==2.3.0 torchvision==0.18.0 --index-url https://download.pytorch.org/whl/cu118
- name: Install lmdeploy - dependency
run: |
# manually install flash attn
# the install packeage from. https://github.com/Dao-AILab/flash-attention/releases
python3 -m pip install /root/packages/flash_attn-2.6.3+cu118torch2.3cxx11abiFALSE-cp310-cp310-linux_x86_64.whl
python3 -m pip install /root/packages/xformers-0.0.27+cu118-cp310-cp310-manylinux2014_x86_64.whl --no-deps
python3 -m pip install ${{env.dependency_pkgs}}
- name: Install lmdeploy
run: |
python3 -m pip install lmdeploy-*.whl
python3 -m pip install -r requirements/test.txt
- name: Install opencompass
run: |
git clone --depth=1 https://github.com/open-compass/opencompass.git
cd opencompass
python3 -m pip install -e .
python3 -m pip install triton==2.1.0
echo "OPENCOMPASS_DIR=$(pwd)" >> $GITHUB_ENV
- name: Check env
run: |
python3 -m pip list
lmdeploy check_env
- name: Setup paths for evaluation
run: |
ln -s /root/opencompass-data ./data
python3 .github/scripts/action_tools.py create_model_links /root/models .
- name: Use local config
if: ${{inputs.local_config}}
run: cp /nvme/qa_test_models/offline_pkg/eval_config.py .github/scripts/eval_opencompass_config.py
- name: Evaluate models
run: |
echo ${{github.event.inputs.models}}
echo ${{github.event.inputs.devices}}
export LMDEPLOY_DIR=$(pwd)
export CUDA_VISIBLE_DEVICES="${{github.event.inputs.devices}}"
python3 .github/scripts/action_tools.py evaluate \
--models "${{github.event.inputs.models}}" \
--datasets "${{github.event.inputs.datasets}}" \
--workspace /root/evaluation-reports/${{ github.run_id }}
- name: Clear workspace
if: always()
run: |
export workdir=$(pwd)
cd ..
rm -rf $workdir
mkdir $workdir
chmod -R 777 $workdir