-
Notifications
You must be signed in to change notification settings - Fork 53
137 lines (118 loc) · 5.46 KB
/
unit-nvidia-reuseable.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# SPDX-License-Identifier: Apache-2.0
name: "Run unit tests on specified EC2"
on:
workflow_dispatch:
inputs:
ec2_runner_variant:
description: "EC2 instance type of runner"
type: string
required: true
default: 'g6e.12xlarge' # TODO: consider smaller default instance.
pytest_mark:
description: "pytest.mark of tests that will be run"
type: string
required: true
default: 'fast'
jobs:
# TODO: this startup step could definitely be reused by our workflows elsewhere.
start-ec2-runner:
runs-on: ubuntu-latest
outputs:
label: ${{ steps.start-ec2-runner.outputs.label }}
ec2-instance-id: ${{ steps.start-ec2-runner.outputs.label }}
steps:
- name: "Harden runner"
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit
- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}
- name: "Start EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: start
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
ec2-image-id: ${{ vars.AWS_EC2_AMI }}
ec2-instance-type: ${{ inputs.ec2_runner_variant }}
subnet-id: subnet-024298cefa3bedd61
security-group-id: sg-06300447c4a5fbef3
iam-role-name: instructlab-ci-runner
aws-resource-tags: >
[
{"Key": "Name", "Value": "instructlab-ci-github-large-runner"},
{"Key": "GitHubRepository", "Value": "${{ github.repository }}"},
{"Key": "GitHubRef", "Value": "${{ github.ref }}"},
{"Key": "GitHubPR", "Value": "${{ github.event.number }}"}
]
run-unit-tests:
needs:
- start-ec2-runner
runs-on: ${{needs.start-ec2-runner.outputs.label}}
permissions:
pull-requests: write
steps:
- name: "Harden runner"
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit
- name: "Install packages"
run: |
cat /etc/os-release
sudo dnf install -y gcc gcc-c++ make git python3.11 python3.11-devel
- name: "Checkout PR branch"
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
fetch-depth: 0
- name: "Verify environment variables are setup correctly"
run: |
export CUDA_HOME="/usr/local/cuda"
export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64"
export PATH="$PATH:$CUDA_HOME/bin"
nvidia-smi
# installs in $GITHUB_WORKSPACE/venv.
# only has to install Tox because Tox will do the other virtual environment management.
- name: "Setup Python virtual environment"
run: |
python3.11 -m venv --upgrade-deps venv
. venv/bin/activate
pip install tox
- name: "Show disk utilization BEFORE tests"
run: |
df -h
- name: "Run unit tests with Tox and Pytest"
run: |
tox -e py3-unit -- -m ${{inputs.pytest_mark}}
- name: "Show disk utilization AFTER tests"
run: |
df -h
stop-ec2-runner:
needs:
- start-ec2-runner
- run-unit-tests
runs-on: ubuntu-latest
# NOTE: (jkunstle) not sure why we need this if we're always succeeding.
if: ${{alway()}}
steps:
- name: "Harden runner"
uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f
with:
egress-policy: audit
- name: "Configure AWS credentials"
uses: "aws-actions/configure-aws-credentials@e3dd6a429d7300a6a4c196c26e071d42e0343502" # v4.0.2
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ vars.AWS_REGION }}
- name: "Stop EC2 runner"
id: start-ec2-runner
uses: machulav/ec2-github-runner@1827d6ca7544d7044ddbd2e9360564651b463da2 # v2.3.7
with:
mode: stop
github-token: ${{ secrets.GH_PERSONAL_ACCESS_TOKEN }}
label: ${{ needs.start-ec2-runner.outputs.label }}
ec2-instance-type: ${{ inputs.ev2_runner_variant }}