[backend] Allocating smem buffers in size descending order #14355
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# AUTOGENERATED by pre-commit, modify the .in file instead. | |
# integration-tests.yml.in is used to generate integration-tests.yml by | |
# expanding yaml anchors, because github actions don't support them | |
# (https://github.com/actions/runner/issues/1182). pre-commit will do this for | |
# you automatically. | |
name: Integration Tests | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches-ignore: ['llvm-**'] | |
merge_group: | |
branches: [main, 'dev-**'] | |
types: [checks_requested] | |
push: | |
branches: [main] | |
concurrency: | |
group: ${{ github.ref }} | |
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }} | |
permissions: read-all | |
env: | |
TRITON_BUILD_WITH_CCACHE: "true" | |
TRITON_BUILD_WITH_CLANG_LLD: "TRUE" | |
TRITON_USE_ASSERT_ENABLED_LLVM: "TRUE" | |
TRITON_DISABLE_LINE_INFO: 1 | |
PROTON_SKIP_PC_SAMPLING_TEST: 1 | |
CCACHE_COMPRESS: "true" | |
jobs: | |
Runner-Preparation: | |
runs-on: ubuntu-latest | |
timeout-minutes: 30 | |
outputs: | |
matrix-CUDA: ${{ steps.set-matrix.outputs.matrix-CUDA }} | |
matrix-HIP: ${{ steps.set-matrix.outputs.matrix-HIP }} | |
matrix-MACOS: ${{ steps.set-matrix.outputs.matrix-MACOS }} | |
steps: | |
- name: Decide pre-submit integration test enablement | |
# Always enable integration tests for pre-submit pull requests. | |
if: github.event_name == 'pull_request' | |
run: | | |
echo "enable_integration=true" >> $GITHUB_ENV | |
- name: Decide manual trigger integration test enablement | |
# Always enable integration tests when manually triggered | |
if: github.event_name == 'workflow_dispatch' | |
run: | | |
echo "enable_integration=true" >> $GITHUB_ENV | |
- name: Checkout post-submit commits | |
if: github.event_name == 'push' | |
uses: actions/checkout@v4 | |
with: | |
# Only fetch two commits to check the latest changed files. | |
fetch-depth: 2 | |
- name: Detect if build deps (e.g. LLVM hash) changed | |
id: detect-change | |
if: github.event_name == 'push' | |
uses: tj-actions/changed-files@v45 | |
with: | |
files: | | |
cmake/*.txt | |
cmake/*.json | |
- name: Detect if enough time has passed since last post-submit run | |
id: detect-time | |
if: github.event_name == 'push' | |
run: | | |
GITHUB_TOKEN=${{ secrets.GITHUB_TOKEN }} | |
REPO_NAME="${{ github.repository }}" | |
# ID of integration-tests workflow | |
WORKFLOW_ID="11678186" | |
# Fetch the last run time of this workflow | |
LAST_RUN=$(curl -s \ | |
-H "Authorization: token $GITHUB_TOKEN" \ | |
-H "Accept: application/vnd.github.v3+json" \ | |
"https://api.github.com/repos/$REPO_NAME/actions/workflows/$WORKFLOW_ID/runs?branch=main&status=success&per_page=1" \ | |
| jq -r '.workflow_runs[0].updated_at') | |
# Convert to timestamp | |
LAST_RUN_TS=$(date -d "$LAST_RUN" +%s) | |
NOW_TS=$(date +%s) | |
DIFF=$(( (NOW_TS - LAST_RUN_TS) / 3600 )) # Difference in hours | |
echo "Last run was $DIFF hours ago." | |
if [ "$DIFF" -ge 4 ]; then | |
echo "Will run CI; last build was long enough ago." | |
echo "n_hours_since_last_run=true" >> $GITHUB_ENV | |
else | |
echo "Will not run CI; last build was too recent." | |
echo "n_hours_since_last_run=false" >> $GITHUB_ENV | |
fi | |
# We want to run integration tests on the main branch (i.e. post-submit) | |
# occasionally, because pre-submit CI caches will only read from caches | |
# generated from the main branch (or the PR's branch), and we want these | |
# caches to be recent. | |
# | |
# But we also don't want to run the tests on *every* commit, because this | |
# would compete for resources with pre-commit CI (and the whole point of | |
# caching is to speed up CI). | |
# | |
# As a compromise, run every N hours, or if a build dependency changes | |
# (e.g. we update the LLVM hash). | |
- name: Decide whether to run integration tests post-submit | |
if: | | |
github.event_name == 'push' && | |
(steps.detect-change.outputs.any_changed == 'true' || | |
env.n_hours_since_last_run == 'true') | |
run: | | |
echo "enable_integration=true" >> $GITHUB_ENV | |
- name: Prepare runner matrix | |
id: set-matrix | |
if: env.enable_integration == 'true' | |
run: | | |
if [ x"${{ github.repository }}" == x"triton-lang/triton" ]; then | |
echo '::set-output name=matrix-CUDA::[["a100-runner-set"], ["h100-runner-set"]]' | |
echo '::set-output name=matrix-HIP::[["self-hosted", "gfx90a"], ["self-hosted", "gfx942"]]' | |
echo '::set-output name=matrix-MACOS::[["macos-latest"]]' | |
else | |
echo '::set-output name=matrix-CUDA::["ubuntu-latest"]' | |
echo '::set-output name=matrix-HIP::["ubuntu-latest"]' | |
echo '::set-output name=matrix-MACOS::[["macos-latest"]]' | |
fi | |
pre-commit: | |
name: pre-commit (code formatting) | |
needs: Runner-Preparation | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.12' | |
cache: 'pip' | |
- name: Compute hash of pre-commit config | |
id: cache-key | |
run: | | |
echo "pre_commit_hash=$(sha256sum .pre-commit-config.yaml | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache pre-commit's cache dir | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.cache/pre-commit | |
key: ${{ runner.os }}-${{ steps.cache-key.outputs.pre_commit_hash }} | |
- name: Check pre-commit | |
run: | | |
python3 -m pip install --upgrade pre-commit | |
python3 -m pre_commit run --all-files --verbose | |
- name: Print diff of changes if pre-commit failed | |
if: failure() | |
run: | | |
git diff | |
Integration-Tests: | |
needs: Runner-Preparation | |
if: needs.Runner-Preparation.outputs.matrix-CUDA != '' | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 30 | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-CUDA)}} | |
env: | |
RUNNER_TYPE: ${{ matrix.runner[0] }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: "true" | |
- name: Compute cache keys | |
id: cache-key | |
run: | | |
llvm_file="cmake/llvm-hash.txt" | |
nvidia_file="cmake/nvidia-toolchain-version.json" | |
json_file="cmake/json-version.txt" | |
# Check if files exist before proceeding | |
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then | |
echo "Error: Required dependency files are missing." | |
exit 1 | |
fi | |
# Process the files if they exist | |
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT | |
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT | |
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT | |
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache build dependencies | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.triton/llvm | |
~/.triton/nvidia | |
~/.triton/json | |
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }} | |
- # Cache ~/.triton/cache because the vast majority of unit test time is | |
# spent compiling. Triton won't (well, should not) use these cached files | |
# if something internal to Triton changes, because Triton's internal | |
# source code is part of the cache key. | |
# | |
# Similarly, cache ~/.cache/ccache to speed up compilation. | |
# | |
# On branch `main` we always start from an empty cache, i.e. we skip the | |
# "restore" step. This is to prevent the caches from accumulating stale | |
# files over time. | |
name: Restore cache of ccache and Triton compilation artifacts | |
id: restore-build-cache | |
if: github.ref != 'refs/heads/main' | |
uses: actions/cache/restore@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
# Restore the most recent cache entry. | |
restore-keys: | | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}- | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}- | |
# We expect this cache key never to hit and for us to fall back | |
# unconditionally to the restore-key, so it doesn't actually matter | |
# what we put here (so long as it doesn't hit an existing key). | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- name: Update PATH | |
run: | | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
- name: Install pip dependencies | |
run: | | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cython setuptools wheel cmake==3.24 ninja lit | |
- name: Install Triton | |
env: | |
CUDA_HOME: "/usr/local/cuda" | |
run: | | |
echo "PATH is '$PATH'" | |
cd python | |
ccache --zero-stats | |
python3 -m pip install -v '.[tests]' | |
- name: CCache Stats | |
run: ccache --print-stats | |
- name: Run lit tests | |
run: | | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Could not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Run python tests on CUDA | |
run: | | |
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/build/$(ls python/build | grep -i lib)/triton/instrumentation" | |
if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then | |
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1 | |
fi | |
cd python/test/unit | |
python3 -m pytest -s -n 8 --ignore=hopper/test_flashattention.py --ignore=language/test_line_info.py --ignore=language/test_subprocess.py --ignore=test_debug.py | |
python3 -m pytest -s -n 8 language/test_subprocess.py | |
python3 -m pytest -s -n 8 test_debug.py --forked | |
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s language/test_line_info.py | |
# Run hopper/test_flashattention.py separately to avoid out of gpu memory | |
python3 -m pytest -s hopper/test_flashattention.py | |
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \ | |
python3 -m pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py | |
- name: Run interpreter tests | |
if: ${{ matrix.runner[0] == 'h100-runner-set' }} | |
env: | |
TRITON_INTERPRET: "1" | |
run: | | |
cd python/test/unit | |
python3 -m pytest -s -n 16 -m interpreter language/test_core.py language/test_standard.py \ | |
language/test_random.py language/test_block_pointer.py language/test_subprocess.py language/test_line_info.py \ | |
runtime/test_autotuner.py::test_kwargs[False]\ | |
../../tutorials/06-fused-attention.py::test_op --device cpu | |
- name: Run regression tests | |
run: | | |
cd python/test/regression | |
python3 -m pytest -s -n 8 . | |
- name: Run C++ unittests | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest -j32 | |
- name: Run Proton tests | |
run: | | |
cd third_party/proton/test | |
python3 -m pytest -s . | |
cd .. | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- # If we're on branch `main`, save the ccache Triton compilation artifacts | |
# to the cache so they can be used by other (non-main) CI runs. | |
# | |
# (It wouldn't be a problem to save the cache on every run, because github | |
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | |
name: Save ccache and Triton compilation artifacts to cache | |
if: github.ref == 'refs/heads/main' | |
uses: actions/cache/save@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
Integration-Tests-AMD: | |
needs: Runner-Preparation | |
if: needs.Runner-Preparation.outputs.matrix-HIP != '' | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 30 | |
env: | |
RUNNER_TYPE: ${{ matrix.runner[1] }} | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-HIP)}} | |
name: Integration-Tests (${{matrix.runner[1] == 'gfx90a' && 'mi210' || 'mi300x'}}) | |
container: | |
image: rocmshared/pytorch:rocm6.2.2_ubuntu22.04_py3.10_pytorch_2.5.1_asan | |
options: --device=/dev/kfd --device=/dev/dri --security-opt seccomp=unconfined --group-add video --user root | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: 'true' | |
- name: Compute cache keys | |
id: cache-key | |
run: | | |
llvm_file="cmake/llvm-hash.txt" | |
nvidia_file="cmake/nvidia-toolchain-version.json" | |
json_file="cmake/json-version.txt" | |
# Check if files exist before proceeding | |
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then | |
echo "Error: Required dependency files are missing." | |
exit 1 | |
fi | |
# Process the files if they exist | |
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT | |
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT | |
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT | |
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache build dependencies | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.triton/llvm | |
~/.triton/nvidia | |
~/.triton/json | |
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }} | |
- # Cache ~/.triton/cache because the vast majority of unit test time is | |
# spent compiling. Triton won't (well, should not) use these cached files | |
# if something internal to Triton changes, because Triton's internal | |
# source code is part of the cache key. | |
# | |
# Similarly, cache ~/.cache/ccache to speed up compilation. | |
# | |
# On branch `main` we always start from an empty cache, i.e. we skip the | |
# "restore" step. This is to prevent the caches from accumulating stale | |
# files over time. | |
name: Restore cache of ccache and Triton compilation artifacts | |
id: restore-build-cache | |
if: github.ref != 'refs/heads/main' | |
uses: actions/cache/restore@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
# Restore the most recent cache entry. | |
restore-keys: | | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}- | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}- | |
# We expect this cache key never to hit and for us to fall back | |
# unconditionally to the restore-key, so it doesn't actually matter | |
# what we put here (so long as it doesn't hit an existing key). | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- name: Update compiler to clang | |
run: | | |
export CC=/usr/bin/clang | |
export CXX=/usr/bin/clang++ | |
- name: Install Triton | |
id: amd-install-triton | |
run: | | |
echo "PATH is '$PATH'" | |
pip uninstall -y triton pytorch-triton-rocm | |
cd python | |
ccache --zero-stats | |
pip install -v -e '.[tests]' | |
- name: CCache Stats | |
run: ccache --print-stats | |
- name: Run lit tests | |
run: | | |
cd python | |
LIT_TEST_DIR="build/$(ls build | grep -i cmake)/test" | |
if [ ! -d "${LIT_TEST_DIR}" ]; then | |
echo "Could not find '${LIT_TEST_DIR}'" ; exit -1 | |
fi | |
lit -v "${LIT_TEST_DIR}" | |
- name: Run python tests on HIP | |
run: | | |
INSTRUMENTATION_LIB_DIR="${GITHUB_WORKSPACE}/python/triton/instrumentation" | |
if [ ! -d "${INSTRUMENTATION_LIB_DIR}" ]; then | |
echo "Could not find '${INSTRUMENTATION_LIB_DIR}'" ; exit -1 | |
fi | |
pytest --capture=tee-sys -rfs python/tutorials/06-fused-attention.py | |
pytest --capture=tee-sys -rfs third_party/amd/python/test/test_extract_slice.py | |
cd python/test/unit | |
pytest --capture=tee-sys -rfs -n 12 language runtime \ | |
--ignore=language/test_line_info.py \ | |
--ignore=test_debug.py | |
# TODO: uncomment | |
# pytest --capture=tee-sys -rfs test_debug.py | |
TRITON_ALWAYS_COMPILE=1 TRITON_DISABLE_LINE_INFO=0 LLVM_PASS_PLUGIN_PATH=${INSTRUMENTATION_LIB_DIR}/libGPUInstrumentationTestLib.so \ | |
pytest --capture=tee-sys -rfs -vvv instrumentation/test_gpuhello.py | |
# Run test_line_info.py separately with TRITON_DISABLE_LINE_INFO=0 | |
TRITON_DISABLE_LINE_INFO=0 python3 -m pytest -s -n 8 language/test_line_info.py | |
- name: Run asan tests on HIP | |
run: | | |
cd third_party/amd/python/test/ | |
ulimit -s 1024 | |
export PATH=$(find ~/.triton/llvm -name llvm-symbolizer -printf '%h\n'):$PATH | |
export LD_LIBRARY_PATH=$(find /opt -name libclang_rt.asan-x86_64.so -printf '%h\n'):$LD_LIBRARY_PATH | |
export LD_LIBRARY_PATH=$(find /opt -type d -wholename *lib/llvm/lib/asan):$LD_LIBRARY_PATH | |
export LD_LIBRARY_PATH=$(find /usr -name libcaffe2_nvrtc.so -printf '%h\n'):$LD_LIBRARY_PATH | |
export CLANG_ASAN_LIB=$(find /opt -name libclang_rt.asan-x86_64.so) | |
export HIP_ASAN_LIB=$(find /opt -wholename *lib/asan/libamdhip64.so) | |
ASAN_OPTIONS=detect_leaks=0,alloc_dealloc_mismatch=0 \ | |
LD_PRELOAD=$CLANG_ASAN_LIB:$HIP_ASAN_LIB python3 -m pytest -s test_address_sanitizer.py | |
- name: Run regression tests | |
run: | | |
# Reenable test_functional_regression.py once it's fixed | |
cd python/test/regression | |
python3 -m pytest -s -n 8 ./test_cast_matmul.py | |
- name: Run Proton tests | |
run: | | |
cd third_party/proton/test | |
python3 -m pytest -s . | |
cd .. | |
- name: Run C++ unittests | |
run: | | |
cd python | |
cd "build/$(ls build | grep -i cmake)" | |
ctest -j32 | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- # If we're on branch `main`, save the ccache Triton compilation artifacts | |
# to the cache so they can be used by other (non-main) CI runs. | |
# | |
# (It wouldn't be a problem to save the cache on every run, because github | |
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | |
name: Save ccache and Triton compilation artifacts to cache | |
if: github.ref == 'refs/heads/main' | |
uses: actions/cache/save@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Clean up caches | |
# Always cleanup the worker, even if builds or tests failed | |
if: always() | |
run: | | |
rm -rf ~/.triton | |
rm -rf ~/.ccache | |
Build-Tests: | |
needs: Runner-Preparation | |
if: needs.Runner-Preparation.outputs.matrix-MACOS != '' | |
runs-on: ${{ matrix.runner }} | |
timeout-minutes: 40 | |
strategy: | |
matrix: | |
runner: ${{fromJson(needs.Runner-Preparation.outputs.matrix-MACOS)}} | |
env: | |
RUNNER_TYPE: ${{ matrix.runner[0] }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
with: | |
submodules: "true" | |
- name: Install brew dependencies | |
run: | | |
brew update | |
brew install ccache llvm@19 lld coreutils | |
- name: Compute cache keys | |
id: cache-key | |
run: | | |
llvm_file="cmake/llvm-hash.txt" | |
nvidia_file="cmake/nvidia-toolchain-version.json" | |
json_file="cmake/json-version.txt" | |
# Check if files exist before proceeding | |
if [[ ! -f "$llvm_file" || ! -f "$nvidia_file" || ! -f "$json_file" ]]; then | |
echo "Error: Required dependency files are missing." | |
exit 1 | |
fi | |
# Process the files if they exist | |
echo "llvm=$(cat $llvm_file | cut -c 1-8)" >> $GITHUB_OUTPUT | |
echo "nvidia=$(sha256sum $nvidia_file | cut -d ' ' -f 1)" >> $GITHUB_OUTPUT | |
echo "json=$(cat $json_file)" >> $GITHUB_OUTPUT | |
echo "datetime=$(date -u -Iseconds)" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: Cache build dependencies | |
uses: actions/cache@v4 | |
with: | |
# Note that we cannot use environment variables here given there is | |
# no shell to interpret them in the paths. | |
path: | | |
~/.triton/llvm | |
~/.triton/nvidia | |
~/.triton/json | |
key: ${{ runner.os }}-${{ runner.arch }}-llvm-${{ steps.cache-key.outputs.llvm }}-nvidia-${{ steps.cache-key.outputs.nvidia }}-json-${{ steps.cache-key.outputs.json }} | |
- # Cache ~/.triton/cache because the vast majority of unit test time is | |
# spent compiling. Triton won't (well, should not) use these cached files | |
# if something internal to Triton changes, because Triton's internal | |
# source code is part of the cache key. | |
# | |
# Similarly, cache ~/.cache/ccache to speed up compilation. | |
# | |
# On branch `main` we always start from an empty cache, i.e. we skip the | |
# "restore" step. This is to prevent the caches from accumulating stale | |
# files over time. | |
name: Restore cache of ccache and Triton compilation artifacts | |
id: restore-build-cache | |
if: github.ref != 'refs/heads/main' | |
uses: actions/cache/restore@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
# Restore the most recent cache entry. | |
restore-keys: | | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}- | |
triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}- | |
# We expect this cache key never to hit and for us to fall back | |
# unconditionally to the restore-key, so it doesn't actually matter | |
# what we put here (so long as it doesn't hit an existing key). | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- name: Update PATH | |
run: | | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
echo "/opt/homebrew/opt/llvm/bin" >> $GITHUB_PATH | |
- name: Install pip dependencies | |
run: | | |
python3 -m venv ~/.venv | |
source ~/.venv/bin/activate | |
python3 -m pip install --upgrade pip | |
python3 -m pip install cython setuptools wheel cmake==3.24 ninja lit pybind11 | |
- name: Install Triton | |
env: | |
TRITON_BUILD_WITH_O1: "true" | |
# macos-latest has 3 vcpus and 7GB DRAM, to save memory we limit the number of jobs to 3 | |
# https://docs.github.com/en/actions/using-github-hosted-runners/about-github-hosted-runners/about-github-hosted-runners#standard-github-hosted-runners-for-public-repositories | |
MAX_JOBS: 3 | |
run: | | |
source ~/.venv/bin/activate | |
echo "PATH is '$PATH'" | |
cd python | |
ccache --zero-stats | |
python3 -m pip install -v --no-build-isolation . | |
- name: CCache Stats | |
run: ccache --print-stats | |
- name: Inspect cache directories | |
run: | | |
mkdir -p ~/.triton | |
du -h -d 1 ~/.triton | |
mkdir -p ~/.ccache | |
du -h -d 1 ~/.ccache | |
- # If we're on branch `main`, save the ccache Triton compilation artifacts | |
# to the cache so they can be used by other (non-main) CI runs. | |
# | |
# (It wouldn't be a problem to save the cache on every run, because github | |
# evicts cache entries LRU, but maybe this saves a bit of time in CI.) | |
name: Save ccache and Triton compilation artifacts to cache | |
if: github.ref == 'refs/heads/main' | |
uses: actions/cache/save@v4 | |
with: | |
path: | | |
~/.triton/cache | |
~/.ccache | |
key: triton-artifacts-${{ runner.os }}-${{ runner.arch }}-${{ env.RUNNER_TYPE }}-llvm-${{ steps.cache-key.outputs.llvm }}-${{ steps.cache-key.outputs.datetime }} |