Update matrix-multiplication-cpu tutorial, use preallocated output bu… #28
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AUTOGENERATED by pre-commit, modify the .in file instead. | |
# integration-tests.yml.in is used to generate integration-tests.yml by | |
# expanding yaml anchors, because github actions don't support them | |
# (https://github.com/actions/runner/issues/1182). pre-commit will do this for | |
# you automatically. | |
name: Integration Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
# You can name your branch dev-foo to get CI runs. | |
branches: [main, 'dev-**'] | |
merge_group: | |
branches: [main, 'dev-**'] | |
types: [checks_requested] | |
push: | |
branches: [main] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
permissions: read-all | |
env: | |
TRITON_BUILD_WITH_CLANG_LLD: "TRUE" | |
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE" | |
TRITON_DISABLE_LINE_INFO: 1 | |
jobs: | |
Runner-Preparation: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
outputs: | |
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }} | |
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }} | |
steps: | |
- name: Decide pre-submit integration test enablement | |
# Always enable integration tests for pre-submit pull requests. | |
if: github.event_name == 'pull_request' | |
run: | | |
echo "enable_integration=true" >> $GITHUB_ENV | |
- name: Checkout post-submit commits | |
if: github.event_name == 'push' | |
uses: actions/checkout@v4 | |
with: | |
# Only fetch two commits to check the latest changed files. | |
fetch-depth: 2 | |
- name: Detect if build deps (e.g. LLVM hash) changed | |
id: detect-change | |
if: github.event_name == 'push' | |
uses: tj-actions/changed-files@v44 | |
with: | |
files: | | |
cmake/*.txt | |
- name: Detect if enough time has passed since last post-submit run | |
id: detect-time | |
if: github.event_name == 'push' | |
run: | | |
GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} | |
REPO_NAME="${{ github.repository }}" | |
# ID of integration-tests workflow | |
WORKFLOW_ID="11678186" | |
# Fetch the last run time of this workflow | |
LAST_RUN=$(curl -s \ | |
-H "Authorization: token $GITHUB_TOKEN" \ | |
-H "Accept: application/vnd.github.v3+json" \ | |
"https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \ | |
| jq -r '.workflow_runs[0].updated_at') | |
# Convert to timestamp | |
LAST_RUN_TS=$(date -d "$LAST_RUN" +%s) | |
NOW_TS=$(date +%s) | |
DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours | |
echo "Last run was $DIFF hours ago." | |
if [ "$DIFF" -ge 4 ]; then | |
echo "Will run CI; last build was long enough ago." | |
echo "n_hours_since_last_run=true" >> $GITHUB_ENV | |
else | |
echo "Will not run CI; last build was too recent." | |
echo "n_hours_since_last_run=false" >> $GITHUB_ENV | |
fi | |
# We want to run integration tests on the main branch (i.e. post-submit) | |
# occasionally, because pre-submit CI caches will only read from caches | |
# generated from the main branch (or the PR's branch), and we want these | |
# caches to be recent. | |
# | |
# But we also don't want to run the tests on *every* commit, because this | |
# would compete for resources with pre-commit CI (and the whole point of | |
# caching is to speed up CI). | |
# | |
# As a compromise, run every N hours, or if a build dependency changes | |
# (e.g. we update the LLVM hash). | |
- name: Decide whether to run integration tests post-submit | |
if: | | |
github.event_name == 'push' && | |
(steps.detect-change.outputs.any_changed == 'true' || | |
env.n_hours_since_last_run == 'true') | |
run: | | |
echo "enable_integration=true" >> $GITHUB_ENV | |
- name: Prepare runner matrix | |
id: set-matrix | |
if: env.enable_integration == 'true' | |
run: | | |
if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then | |
echo '::set-output name=matrix-CUDA::[["self-hosted", "A100"], ["self-hosted", "H100"]]' | |
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"]]' | |
else | |
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]' | |
echo '::set-output name=matrix-HIP::["ubuntu-latest"]' | |
fi | |
pre-commit: | |
name: pre-commit (code formatting) | |
needs: Runner-Preparation | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
cache: 'pip' | |
- name: Compute hash of pre-commit config | |
id: cache-key | |
run: | | |
echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache pre-commit's cache dir | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.cache/pre-commit | |
key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }} | |
- name: Check pre-commit | |
run: | | |
python3 -m pip install --upgrade pre-commit | |
# TODO: ignore the first yapf failure until https://github.com/google/yapf/issues/1164 is fixed | |
python3 -m pre_commit run --all-files --verbose yapf &> /dev/null || true | |
# If first run of yapf worked and made changes reset the tree to the original state | |
git reset --hard | |
python3 -m pre_commit run --all-files --verbose | |
- name: Print diff of changes if pre-commit failed | |
if: failure() | |
run: | | |
git diff | |
Integration-Tests: | |
needs: Runner-Preparation | |
if: needs.Runner-Preparation.outputs.matrix-CUDA != '' | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 30 | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: "true" | |
- name: Compute cache keys | |
id: cache-key | |
run: | | |
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT | |
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT | |
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT | |
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache build dependencies | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.triton/llvm | |
~/.triton/nvidia | |
~/.triton/pybind11 | |
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }} | |
- # Cache ~/.triton/cache because the vast majority of unit test time is | |
# spent compiling. Triton won't (well, should not) use these cached files | |
# if something internal to Triton changes, because Triton's internal | |
# source code is part of the cache key. | |
# | |
# Similarly, cache ~/.cache/ccache to speed up compilation. | |
# | |
# On branch `main` we always start from an empty cache, i.e. we skip the | |
# "restore" step. This is to prevent the caches from accumulating stale | |
# files over time. | |
name: Restore cache of ccache and Triton compilation artifacts | |
if: github.event_name != 'push' | |
uses: actions/cache/restore@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.cache/ccache | |
# Restore the most recent cache entry. | |
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}- | |
# We expect this cache key never to hit and for us to fall back | |
# unconditionally to the restore-key, so it doesn't actually matter | |
# what we put here (so long as it doesn't hit an existing key). | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directory | |
run: | | |
mkdir -p ~/.triton | |
ls -alh ~/.triton | |
- name: Update PATH | |
run: | | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
- name: Install pip dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
python3 -m pip install wheel cmake==3.24 ninja pytest-xdist lit | |
- name: Install Triton | |
env: | |
TRITON_BUILD_WITH_CCACHE: "true" | |
CUDA_HOME: "/usr/local/cuda" | |
run: | | |
echo "PATH is '$PATH'" | |
cd python | |
python3 -m pip install --no-build-isolation -vvv '.[tests]' | |
- name: Run lit tests | |
run: | | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Run python tests on CUDA | |
run: | | |
cd python/test/unit | |
python3 -m pytest -vvv -n 8 --ignore=hopper/test_flashattention.py --ignore=runtime --ignore=language/test_line_info.py --ignore=language/test_subprocess.py | |
python3 -m pytest -vvv -n 8 language/test_subprocess.py | |
# Run runtime tests serially to avoid race condition with cache handling | |
python3 -m pytest -vvv runtime/ | |
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv language/test_line_info.py | |
# Run hopper/test_flashattention.py separately to avoid out of gpu memory | |
python3 -m pytest -vs hopper/test_flashattention.py | |
- name: Run interpreter tests | |
if: ${{matrix.runner[0] == 'self-hosted' && matrix.runner[1] == 'H100'}} | |
env: | |
TRITON_INTERPRET: "1" | |
run: | | |
cd python/test/unit | |
python3 -m pytest -vvv -n 16 -m interpreter language/test_core.py language/test_standard.py \ | |
language/test_random.py language/test_block_pointer.py language/test_subprocess.py \ | |
operators/test_flash_attention.py::test_op \ | |
../../tutorials/06-fused-attention.py::test_op --device cpu | |
- name: Run C++ unittests | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest | |
- name: Run Proton tests | |
env: | |
LD_LIBRARY_PATH: "/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH" | |
run: | | |
cd third_party/proton | |
python3 -m pytest -vvv test | |
- # If we're on branch `main`, save the ccache Triton compilation artifacts | |
# to the cache so they can be used by other (non-main) CI runs. | |
# | |
# (It wouldn't be a problem to save the cache on every run, because github | |
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | |
name: Save ccache and Triton compilation artifacts to cache | |
if: github.ref == 'refs/heads/main' | |
uses: actions/cache/save@v4 | |
with: | |
path: ~/.triton/cache ~/.cache/ccache | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
ls -alh ~/.triton | |
du -sh ~/.triton/** | |
mkdir -p ~/.cache/ccache | |
ls -alh ~/.cache/ccache | |
du -sh ~/.cache/ccache | |
Integration-Tests-AMD: | |
needs: Runner-Preparation | |
if: needs.Runner-Preparation.outputs.matrix-HIP != '' | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 30 | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}} | |
container: | |
image: rocm/pytorch:rocm6.0.2_ubuntu22.04_py3.10_pytorch_2.1.2 | |
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: 'true' | |
- name: Compute cache keys | |
id: cache-key | |
run: | | |
echo "llvm=$(cat cmake/llvm-hash.txt | cut -c 1-8)" >> $GITHUB_OUTPUT | |
echo "pybind11=$(cat cmake/pybind11-version.txt)" >> $GITHUB_OUTPUT | |
echo "nvidia=$(cat cmake/nvidia-toolchain-version.txt)" >> $GITHUB_OUTPUT | |
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache build dependencies | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.triton/llvm | |
~/.triton/nvidia | |
~/.triton/pybind11 | |
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-pybind11-${{ steps.cache-key.outputs.pybind11 }} | |
- # Cache ~/.triton/cache because the vast majority of unit test time is | |
# spent compiling. Triton won't (well, should not) use these cached files | |
# if something internal to Triton changes, because Triton's internal | |
# source code is part of the cache key. | |
# | |
# Similarly, cache ~/.cache/ccache to speed up compilation. | |
# | |
# On branch `main` we always start from an empty cache, i.e. we skip the | |
# "restore" step. This is to prevent the caches from accumulating stale | |
# files over time. | |
name: Restore cache of ccache and Triton compilation artifacts | |
if: github.event_name != 'push' | |
uses: actions/cache/restore@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.cache/ccache | |
# Restore the most recent cache entry. | |
restore-keys: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}- | |
# We expect this cache key never to hit and for us to fall back | |
# unconditionally to the restore-key, so it doesn't actually matter | |
# what we put here (so long as it doesn't hit an existing key). | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directory | |
run: | | |
mkdir -p ~/.triton | |
ls -alh ~/.triton | |
- name: Update PATH | |
run: | | |
echo "/opt/rocm/llvm/bin" >> $GITHUB_PATH | |
- name: Install pip dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
python3 -m pip install lit | |
- name: Install Triton | |
run: | | |
echo "PATH is '$PATH'" | |
pip uninstall -y triton | |
cd python | |
pip install -v -e . | |
- name: Run lit tests | |
run: | | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Coult not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Run python tests on HIP | |
run: | | |
pytest --capture=tee-sys -rfs -vvv python/tutorials/06-fused-attention.py | |
cd python/test/unit | |
pytest --capture=tee-sys -rfs -vvv -n 32 language operators \ | |
hopper/test_mixed_io.py \ | |
hopper/test_gemm.py \ | |
hopper/test_tma_store_gemm.py \ | |
hopper/test_persistent_warp_specialized_fused-attention.py \ | |
--ignore=language/test_line_info.py | |
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -vvv -n 8 language/test_line_info.py | |
# Run runtime tests serially to avoid race condition with cache handling | |
python3 -m pytest -vvv runtime | |
- name: Run C++ unittests | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest | |
- # If we're on branch `main`, save the ccache Triton compilation artifacts | |
# to the cache so they can be used by other (non-main) CI runs. | |
# | |
# (It wouldn't be a problem to save the cache on every run, because github | |
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | |
name: Save ccache and Triton compilation artifacts to cache | |
if: github.ref == 'refs/heads/main' | |
uses: actions/cache/save@v4 | |
with: | |
path: ~/.triton/cache ~/.cache/ccache | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ runner.name }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
ls -alh ~/.triton | |
du -sh ~/.triton/** | |
mkdir -p ~/.cache/ccache | |
ls -alh ~/.cache/ccache | |
du -sh ~/.cache/ccache |