forked from huggingface/trl
-
Notifications
You must be signed in to change notification settings - Fork 1
98 lines (88 loc) · 2.73 KB
/
slow-tests.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
name: Slow tests (on push)
on:
push:
branches: [ main ]
paths:
# Run only when python files are modified
- "trl/**.py"
- "examples/**.py"
env:
RUN_SLOW: "yes"
IS_GITHUB_CI: "1"
SLACK_API_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}
jobs:
run_all_tests_single_gpu:
strategy:
fail-fast: false
matrix:
docker-image-name: ["huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest"]
runs-on:
group: aws-g4dn-2xlarge
env:
CUDA_VISIBLE_DEVICES: "0"
TEST_TYPE: "single_gpu_${{ matrix.docker-image-name }}"
container:
image: ${{ matrix.docker-image-name }}
options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
- name: Pip install
run: |
source activate trl
pip install -e ".[test]" --no-deps
pip install pytest-reportlog parameterized
- name: Run slow SFT tests on single GPU
if: always()
run: |
source activate trl
make slow_tests
- name: Generate Report
if: always()
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
run_all_tests_multi_gpu:
strategy:
fail-fast: false
matrix:
docker-image-name: ["huggingface/trl-latest-gpu:latest", "huggingface/trl-source-gpu:latest"]
runs-on:
group: aws-g4dn-2xlarge
env:
CUDA_VISIBLE_DEVICES: "0,1"
TEST_TYPE: "multi_gpu_${{ matrix.docker-image-name }}"
container:
image: ${{ matrix.docker-image-name }}
options: --gpus all --shm-size "16gb" -e NVIDIA_DISABLE_REQUIRE=true
defaults:
run:
shell: bash
steps:
- uses: actions/checkout@v4
- name: Pip install
run: |
source activate trl
pip install -e ".[test]" --no-deps
pip install pytest-reportlog parameterized
- name: Run slow SFT tests on Multi GPU
if: always()
run: |
source activate trl
make slow_tests
- name: Run end-to-end examples tests on multi GPU
if: always()
run: |
source activate trl
pip install deepspeed
make test_examples
- name: Generate Reports
if: always()
run: |
pip install slack_sdk tabulate
python scripts/log_reports.py >> $GITHUB_STEP_SUMMARY
python scripts/log_example_reports.py --text_file_name temp_results_sft_tests.txt >> $GITHUB_STEP_SUMMARY
python scripts/log_example_reports.py --text_file_name temp_results_dpo_tests.txt >> $GITHUB_STEP_SUMMARY
rm *.txt