forked from nod-ai/shark-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
219 lines (189 loc) · 7.67 KB
/
ci-sharktank.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# Copyright 2024 Advanced Micro Devices, Inc.
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
name: CI - sharktank
on:
workflow_dispatch:
pull_request:
push:
branches:
- main
concurrency:
# A PR number if a pull request and otherwise the commit hash. This cancels
# queued and in-progress runs for the same PR (presubmit) or commit
# (postsubmit). The workflow name is prepended to avoid conflicts between
# different workflows.
group: ${{ github.workflow }}-${{ github.event.number || github.sha }}
cancel-in-progress: true
jobs:
test:
name: "Unit Tests (${{ matrix.os }}, ${{ matrix.python-version }}, ${{ matrix.torch-version }})"
strategy:
matrix:
python-version: ["3.11", "3.12"]
torch-version: ["2.3.0", "2.4.1", "2.5.1"]
os: [ubuntu-24.04]
include:
- os: windows-2022
python-version: "3.11"
torch-version: "2.3.0"
- os: windows-2022
python-version: "3.12"
torch-version: "2.4.1"
exclude:
- python-version: "3.12"
# `torch.compile` requires torch>=2.4.0 for Python 3.12+
torch-version: "2.3.0"
fail-fast: false
runs-on: ${{matrix.os}}
defaults:
run:
shell: bash
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.python-version}}
- name: Cache Pip Packages
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
- name: Install numpy
if: ${{ matrix.os == 'windows-2022' && matrix.torch-version == '2.3.0' }}
run: pip install "numpy<2.0"
- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile --index-url https://download.pytorch.org/whl/cpu torch==${{matrix.torch-version}}+cpu
# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre \
iree-base-compiler \
iree-base-runtime \
iree-turbine
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
- name: Run sharktank tests
if: ${{ !cancelled() }}
run: |
pytest -n 4 sharktank/ --durations=10
test_with_data:
name: "Data-dependent Tests"
strategy:
matrix:
python-version: [3.11]
runs-on: [llama-mi300x-3]
fail-fast: false
runs-on: ${{matrix.runs-on}}
defaults:
run:
shell: bash
env:
VENV_DIR: ${{ github.workspace }}/.venv
HF_HOME: "/data/huggingface"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: ${{matrix.python-version}}
- name: Create Python venv
run: python -m venv ${VENV_DIR}
- name: Install sharktank deps
run: |
source ${VENV_DIR}/bin/activate
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
- name: Run tests
# TODO: unify with-*-data flags into a single flag and make it possible to run
# only tests that require data.
# We would still want the separate flags as we may endup with data being
# scattered on different CI machines.
run: |
source ${VENV_DIR}/bin/activate
pytest \
--with-clip-data \
--with-flux-data \
--with-t5-data \
--with-vae-data \
sharktank/tests/models/clip/clip_test.py \
sharktank/tests/models/t5/t5_test.py \
sharktank/tests/models/flux/flux_test.py \
sharktank/tests/models/vae/vae_test.py \
--durations=0
test_integration:
name: "Model Integration Tests"
runs-on: ubuntu-24.04
env:
PIP_CACHE_DIR: "${{ github.workspace }}/.pip-cache"
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
- name: "Setting up Python"
id: setup_python
uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
with:
python-version: 3.11
- name: Cache Pip Packages
uses: actions/cache@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2
id: cache-pip
with:
path: ${{ env.PIP_CACHE_DIR }}
key: pip-${{ steps.setup_python.outputs.python-version }}-${{ hashFiles('*requirements*.txt','sharktank/requirements*.txt') }}
- name: Install pip deps
run: |
python -m pip install --no-compile --upgrade pip
# Note: We install in three steps in order to satisfy requirements
# from non default locations first. Installing the PyTorch CPU
# wheels saves multiple minutes and a lot of bandwidth on runner setup.
pip install --no-compile -r pytorch-cpu-requirements.txt
# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre \
iree-base-compiler \
iree-base-runtime \
iree-turbine
pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
- name: Run punet tests
run: |
pytest -v sharktank/ -m punet_quick \
--durations=0
# Depends on other jobs to provide an aggregate job status.
# TODO(#584): move test_with_data and test_integration to a pkgci integration test workflow?
ci_sharktank_summary:
if: always()
runs-on: ubuntu-24.04
needs:
- test
steps:
- name: Getting failed jobs
run: |
echo '${{ toJson(needs) }}'
FAILED_JOBS="$(echo '${{ toJson(needs) }}' \
| jq --raw-output \
'map_values(select(.result!="success" and .result!="skipped")) | keys | join(",")' \
)"
if [[ "${FAILED_JOBS}" != "" ]]; then
echo "The following jobs failed: ${FAILED_JOBS}"
exit 1
fi