Skip to content

Commit 3dd39dc

Browse files
committed
debug
Signed-off-by: Dmitry Rogozhkin <[email protected]>
1 parent f325ae8 commit 3dd39dc

File tree

3 files changed

+76
-330
lines changed

3 files changed

+76
-330
lines changed

.github/actions/print-environment/action.yml

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,11 @@ inputs:
1616
type: string
1717
default: ''
1818
description: "Space separated list of PyPi packages to evaluate"
19+
to:
20+
required: false
21+
type: string
22+
default: ''
23+
description: "File to print environment to"
1924

2025
runs:
2126
using: composite
@@ -26,6 +31,11 @@ runs:
2631
if [ -n "${{ inputs.conda }}" ]; then
2732
source activate ${{ inputs.conda }}
2833
fi
34+
to=$GITHUB_STEP_SUMMARY
35+
if [ -n "${{ inputs.to }}" ]; then
36+
to="${{ inputs.to }}"
37+
mkdir -p $(dirname $to)
38+
fi
2939
{
3040
echo "### Environment"
3141
echo "| | |"
@@ -71,4 +81,4 @@ runs:
7181
echo "| jobs.$GITHUB_JOB.env.NEOReadDebugKeys | $NEOReadDebugKeys |"
7282
echo "| jobs.$GITHUB_JOB.env.PYTORCH_ENABLE_XPU_FALLBACK | $PYTORCH_ENABLE_XPU_FALLBACK |"
7383
echo "| jobs.$GITHUB_JOB.env.PYTORCH_DEBUG_XPU_FALLBACK | $PYTORCH_DEBUG_XPU_FALLBACK |"
74-
} >> $GITHUB_STEP_SUMMARY
84+
} >> $to

.github/workflows/_linux_transformers.yml

Lines changed: 65 additions & 89 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,6 @@ on:
1313
- '.github/workflows/_linux_transformers.yml'
1414
workflow_dispatch:
1515
inputs:
16-
pytorch:
17-
required: false
18-
type: string
19-
default: 'nightly'
20-
description: Pytorch branch/commit
2116
python:
2217
required: false
2318
type: string
@@ -51,7 +46,6 @@ env:
5146
NEOReadDebugKeys: ${{ inputs.driver == 'rolling' && '1' || '0' }}
5247
DisableScratchPages: ${{ inputs.driver == 'rolling' && '1' || '0' }}
5348
python: ${{ inputs.python != '' && inputs.python || '3.10' }}
54-
pytorch: ${{ inputs.pytorch != '' && inputs.pytorch || 'nightly' }}
5549
transformers: ${{ inputs.transformers != '' && inputs.transformers || 'v4.49.0' }}
5650
PACKAGES: |
5751
espeak-ng
@@ -65,64 +59,27 @@ env:
6559
libswresample-dev
6660
libswscale-dev
6761
PYTEST_TIMEOUT: 600
62+
TORCH_INDEX: '--pre --index-url https://download.pytorch.org/whl/nightly/xpu'
6863

6964
jobs:
7065
prepare:
7166
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
67+
outputs:
68+
torch: ${{ steps.getver.outputs.torch }}
69+
torchvision: ${{ steps.getver.outputs.torchvision }}
70+
torchaudio: ${{ steps.getver.outputs.torchaudio }}
7271
steps:
73-
- name: Checkout torch-xpu-ops
74-
uses: actions/checkout@v4
75-
with:
76-
path: torch-xpu-ops
77-
- name: Checkout Transformers
78-
uses: actions/checkout@v4
79-
with:
80-
repository: huggingface/transformers
81-
ref: ${{ env.transformers }}
82-
path: transformers
83-
- name: Prepare OS environment
84-
run: |
85-
sudo apt-get update
86-
sudo apt-get install -y $PACKAGES
87-
git lfs install
88-
- name: Create unique Conda ENV name
72+
- id: getver
8973
run: |
90-
echo "CONDA_ENV_NAME=hf_transformers_test_${ZE_AFFINITY_MASK}" >> $GITHUB_ENV
91-
- name: Prepare Conda ENV
92-
run: |
93-
echo "Using Conda ENV name: $CONDA_ENV_NAME"
94-
which conda && conda clean -ay
95-
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
96-
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }}
97-
source activate $CONDA_ENV_NAME
98-
pip install junitparser pytest-timeout
99-
- name: Prepare Stock XPU Pytorch
100-
run: |
101-
pwd
102-
source activate $CONDA_ENV_NAME
103-
if [ -z "${{ inputs.nightly_whl }}" ]; then
104-
pip install torch torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
105-
else
106-
pip install torch==$(echo ${{ inputs.nightly_whl }}) torchvision torchaudio --pre --index-url https://download.pytorch.org/whl/nightly/xpu
107-
fi
108-
- name: Prepare Transformers
109-
run: |
110-
source activate $CONDA_ENV_NAME
111-
cd transformers
112-
pip install -e .
113-
pip install -e ".[dev-torch,testing,video]"
114-
- name: Export environment
115-
pip freeze --exclude-editable | tee ${{ github.workspace }}/transformers/tests_log/pip_freeze.txt
116-
- name: Upload pip_freeze
117-
if: ${{ ! cancelled() }}
118-
uses: actions/upload-artifact@v4
119-
with:
120-
name: pip_freeze
121-
path: |
122-
${{ github.workspace }}/transformers/tests_log/pip_freeze.txt
74+
torch=$(pip index versions torch $TORCH_INDEX | grep torch | sed -E 's/.*\((.*)\)/\1/')
75+
torchvision=$(pip index versions torchvision $TORCH_INDEX | grep torch | sed -E 's/.*\((.*)\)/\1/')
76+
torchaudio=$(pip index versions torchaudio $TORCH_INDEX | grep torch | sed -E 's/.*\((.*)\)/\1/')
77+
echo "torch=$torch" | tee "$GITHUB_OUTPUT"
78+
echo "torchvision=$torchvision" | tee "$GITHUB_OUTPUT"
79+
echo "torchaudio=$torchaudio" | tee "$GITHUB_OUTPUT"
12380
12481
tests:
125-
needs: [prepare]
82+
needs: prepare
12683
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
12784
strategy:
12885
matrix:
@@ -131,8 +88,8 @@ jobs:
13188
# * https://github.com/huggingface/transformers/issues/36267 (marian tests)
13289
##- cmd: '--ignore=tests/models/marian/test_modeling_marian.py -k backbone tests'
13390
## test_case: 'tests_backbone'
134-
- cmd: "tests/*.py"
135-
test_case: "tests_py"
91+
- test_case: "tests_py"
92+
cmd: "tests/*.py"
13693
# Excluding tests due to:
13794
# * torch.distributed.* not yet supported by XPU
13895
##- cmd: 'tests/generation'
@@ -156,16 +113,13 @@ jobs:
156113
# * Network proxy connection issue, reason unknown
157114
# *'tests/utils/test_import_utils.py' invalidates state of the test engine causing
158115
# next tests to fail. See: https://github.com/huggingface/transformers/issues/36267
159-
- cmd: '--ignore=tests/utils/test_import_utils.py tests/utils'
116+
- test_case: 'tests_utils'
117+
cmd: '--ignore=tests/utils/test_import_utils.py tests/utils'
160118
filter: 'not test_load_img_url_timeout'
161-
test_case: 'tests_utils'
162119
env:
163120
PYTORCH_DEBUG_XPU_FALLBACK: '1'
164121
TRANSFORMERS_TEST_DEVICE_SPEC: 'spec.py'
165122
steps:
166-
- uses: actions/download-artifact@v4
167-
with:
168-
name: pip_freeze
169123
- name: Checkout torch-xpu-ops
170124
uses: actions/checkout@v4
171125
with:
@@ -191,28 +145,34 @@ jobs:
191145
conda remove --all -y -n $CONDA_ENV_NAME || rm -rf $(dirname ${CONDA_EXE})/../envs/$CONDA_ENV_NAME
192146
conda create -y -n $CONDA_ENV_NAME python=${{ env.python }}
193147
source activate $CONDA_ENV_NAME
194-
find . -name "pip_freeze*"
195-
pip install -r pip_freeze.txt
148+
pip install junitparser pytest-timeout
149+
- name: Prepare Stock XPU Pytorch
150+
run: |
151+
source activate $CONDA_ENV_NAME
152+
pip install $TORCH_INDEX \
153+
torch==${{ needs.prepare.outputs.torch }} \
154+
torchvision==${{ needs.prepare.outputs.torchvision }} \
155+
torchaudio==${{ needs.prepare.outputs.torchaudio }}
196156
- name: Prepare Transformers
197157
run: |
198158
pwd
199159
source activate $CONDA_ENV_NAME
200160
cd transformers
201161
pip install -e .
202162
pip install -e ".[dev-torch,testing,video]"
203-
rm -rf tests_log && mkdir -p tests_log
163+
rm -rf logs && mkdir -p logs
204164
rm -rf reports
205165
cp ${{ github.workspace }}/torch-xpu-ops/.github/scripts/spec.py ./
206166
- name: Report installed versions
207167
run: |
208168
source activate $CONDA_ENV_NAME
209-
LOGS_DIR="${{ github.workspace }}/transformers/test_logs/${{matrix.test.test_case}}"
169+
LOGS_DIR="${{ github.workspace }}/transformers/logs/${{matrix.test.test_case}}"
210170
echo "pip installed packages:"
211-
pip list | tee $LOGS_DIR/pip_list.txt
171+
pip list | tee "$LOGS_DIR/pip_list-${{ matrix.test.test_case }}.txt"
212172
echo "lspci gpu devices:"
213-
lspci -d ::0380 | tee $LOGS_DIR/lspci_0380.txt
173+
lspci -d ::0380 | tee "$LOGS_DIR/lspci_0380-${{ matrix.test.test_case }}.txt"
214174
echo "GPU render nodes:"
215-
cat /sys/class/drm/render*/device/device | tee $LOGS_DIR/device_IDs.txt
175+
cat /sys/class/drm/render*/device/device | tee "$LOGS_DIR/device_IDs-${{ matrix.test.test_case }}.txt"
216176
echo "xpu-smi output:"
217177
xpu-smi discovery -y --json --dump -1
218178
- name: Sanity check installed packages
@@ -244,28 +204,51 @@ jobs:
244204
run: |
245205
du -sh ${{ env.HF_HOME }} || true
246206
rm -rf ${{ env.HF_HOME }}
247-
- name: Upload Test log
207+
- name: Print environment
208+
if: ${{ ! cancelled() }}
209+
uses: ./torch-xpu-ops/.github/actions/print-environment
210+
with:
211+
conda: $CONDA_ENV_NAME
212+
pip_packages: 'accelerate transformers'
213+
to: 'transformers/logs/environment-${{ matrix.test.test_case }}.md'
214+
- name: Upload reports
248215
if: ${{ ! cancelled() }}
249216
uses: actions/upload-artifact@v4
250217
with:
251-
name: ${{ matrix.test.test_case }}-${{ github.event.pull_request.number || github.sha }}
252-
path: |
253-
${{ github.workspace }}/transformers/reports
254-
${{ github.workspace }}/transformers/tests_log
218+
name: reports-${{ matrix.test.test_case }}-${{ github.event.pull_request.number || github.sha }}
219+
path: ${{ github.workspace }}/transformers/reports
220+
- name: Upload logs
221+
if: ${{ ! cancelled() }}
222+
uses: actions/upload-artifact@v4
223+
with:
224+
name: logs-${{ matrix.test.test_case }}-${{ github.event.pull_request.number || github.sha }}
225+
path: ${{ github.workspace }}/transformers/logs
255226

256227
report:
257-
needs: [tests]
228+
needs: tests
229+
if: "always()"
258230
runs-on: ${{ inputs.runner != '' && inputs.runner || 'linux.idc.xpu' }}
259231
steps:
260-
- name: Download artifacts
232+
- name: Download reports
233+
uses: actions/download-artifact@v4
234+
with:
235+
name: 'reports-*'
236+
path: 'transformers/reports/'
237+
- name: Download logs
238+
if: ${{ ! cancelled() }}
261239
uses: actions/download-artifact@v4
240+
with:
241+
name: 'logs-*'
242+
path: 'transformers/logs/'
262243
- name: Checkout torch-xpu-ops
244+
if: ${{ ! cancelled() }}
263245
uses: actions/checkout@v4
264246
with:
265247
path: torch-xpu-ops
266248
- name: Print results table
267249
if: ${{ ! cancelled() }}
268250
run: |
251+
ls transformers/reports/
269252
find . -name "*.xml"
270253
# Helper function to return number preceeding given pattern, i.e:
271254
# === 25 failed, 11 warnings, 0 errors ===
@@ -356,15 +339,8 @@ jobs:
356339
} >> $GITHUB_STEP_SUMMARY
357340
- name: Print environment
358341
if: ${{ ! cancelled() }}
359-
uses: ./torch-xpu-ops/.github/actions/print-environment
360-
with:
361-
conda: $CONDA_ENV_NAME
362-
pip_packages: 'accelerate transformers'
363-
- name: Upload Test log
364-
if: ${{ ! cancelled() }}
365-
uses: actions/upload-artifact@v4
366-
with:
367-
name: Torch-XPU-Transformers-Log-${{ github.event.pull_request.number || github.sha }}
368-
path: |
369-
${{ github.workspace }}/transformers/reports
370-
${{ github.workspace }}/transformers/tests_log
342+
run: |
343+
environment=$(find transformers/logs -name "environment-*.md" | head -1)
344+
cat $environment >> $GITHUB_STEP_SUMMARY
345+
# we expect envinments to be identical
346+
diff $(find transformers/logs -name "environment-*.md")

0 commit comments

Comments
 (0)