Multireduce Kernels - prereq refactor (#4173) #1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
env: | |
# increment this when downloads substantially change to avoid the internet | |
DOWNLOAD_CACHE_VERSION: '4' | |
on: | |
push: | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
jobs: | |
uops: | |
name: uops tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.12 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.12 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.12/site-packages | |
key: uops-packages-${{ hashFiles('**/setup.py') }}-3.12 | |
- name: Install dependencies | |
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Test IMAGE=2 support | |
run: | | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- name: Test emulated METAL tensor cores | |
run: DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm | |
- name: Test emulated HSA tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_HSA=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_HSA=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_HSA=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_HSA=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
- name: Test emulated CUDA tensor cores | |
run: DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Full test tensor cores | |
run: | | |
DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
DEBUG=2 EMULATE_HSA=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
- name: Test dtype with Python emulator | |
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 test/test_dtype.py | |
- name: Test ops with Python emulator | |
run: DEBUG=2 PYTHON=1 python3 -m pytest test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_maxpool2d or test_maxpool2d_simple or test_maxpool2d_bigger_stride or test_avgpool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int)" --durations=20 | |
- name: Test uops with Python emulator | |
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20 | |
- name: Test symbolic with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py | |
- name: test_linearizer_failures with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1 | |
linter: | |
name: Linters | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
# TODO: run the pre-commit hook to replace a lot of this | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.8 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.8/site-packages | |
key: linting-packages-${{ hashFiles('**/setup.py') }}-3.8 | |
- name: Install dependencies | |
run: pip install -e '.[linting,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Lint with pylint | |
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' **/*.py | |
- name: Lint with ruff | |
run: | | |
pip3 install --upgrade --force-reinstall ruff | |
python3 -m ruff check . --preview | |
- name: Lint tinygrad with pylint | |
run: python -m pylint tinygrad/ | |
- name: Run mypy | |
run: python -m mypy --strict-equality | |
- name: Test Docs | |
run: | | |
python docs/abstractions2.py | |
- name: Test Quickstart | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py | |
- name: Fuzz Test symbolic | |
run: python test/external/fuzz_symbolic.py | |
- name: Fuzz Test shapetracker | |
run: | | |
PYTHONPATH="." python test/external/fuzz_shapetracker.py | |
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py | |
- name: Test to_movement_ops | |
run: PYTHONPATH="." python extra/to_movement_ops.py | |
- name: Use as an external package | |
run: | | |
mkdir $HOME/test_external_dir | |
cd $HOME/test_external_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
- name: Test DEBUG | |
run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
- name: Repo line count <7000 lines | |
run: MAX_LINE_COUNT=7000 python sz.py | |
testcpuimagenet: | |
name: ImageNet to C Tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.8 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.8 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.8/site-packages | |
key: testing-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-cpu-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Install Dependencies | |
run: pip install -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
#- name: Run Pytest | |
# run: python -m pytest -n=auto test/ -k "not (test_efficientnet and models/test_train.py)" --durations=20 | |
- name: Compile EfficientNet to C | |
run: PYTHONPATH="." CLANG=1 python examples/compile_efficientnet.py > recognize.c | |
- name: Compile C to native | |
run: clang -O2 recognize.c -lm -o recognize | |
- name: Test EfficientNet | |
run: cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock | |
testopencl: | |
strategy: | |
fail-fast: false | |
matrix: | |
task: [optimage, openpilot, onnx] | |
name: ${{ matrix.task=='optimage'&&'GPU OPT and IMAGE Tests' || matrix.task=='openpilot'&&'openpilot (OpenCL) Tests' || matrix.task=='onnx'&&'ONNX+Optimization Tests' }} | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Install OpenCL | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update | |
sudo apt install --allow-unauthenticated -y --no-install-recommends \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: testing-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.task }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Install Dependencies | |
run: pip install -e '.[testing,testing_tf]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- if: ${{ matrix.task == 'optimage' }} | |
name: Run Optimizer Test (OPT 2 and 3) | |
run: | | |
PYTHONPATH="." OPT=2 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py | |
PYTHONPATH="." OPT=3 GPU=1 python -m pytest -n=auto test/external/external_test_opt.py | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test WINO=1 | |
run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test GPU IMAGE=1 ops | |
run: GPU=1 IMAGE=1 python -m pytest -n=auto test/test_ops.py | |
- if: ${{ matrix.task == 'optimage'}} | |
name: Test GPU IMAGE=2 ops | |
run: GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py | |
- if: ${{ matrix.task == 'openpilot' }} | |
name: Test openpilot model compile and size | |
run: | | |
DEBUG=2 ALLOWED_KERNEL_COUNT=208 FLOAT16=1 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py | |
#python -c 'import os; assert os.path.getsize("/tmp/output.thneed") < 100_000_000' | |
- if: ${{ matrix.task == 'openpilot' }} | |
name: Test openpilot model correctness (float32) | |
run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py | |
- if: ${{ matrix.task == 'openpilot' }} | |
name: Test openpilot alt model correctness (float32) | |
run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'openpilot' }} | |
name: Test openpilot fastvits model correctness (float32) | |
run: FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python openpilot/compile2.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (GPU) | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test ONNX (CLANG) | |
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Action Space | |
run: PYTHONPATH="." GPU=1 python3 extra/optimization/get_action_space.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test Beam Search | |
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test MLPerf optimizers | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py --durations=20 | |
- if: ${{ matrix.task == 'onnx' }} | |
name: Test THREEFRY | |
run: PYTHONPATH=. THREEFRY=1 GPU=1 python3 -m pytest test/test_randomness.py test/test_jit.py | |
#testwebgpu: | |
# name: WebGPU Tests | |
# runs-on: macos-13 | |
# timeout-minutes: 20 | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v4 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
# key: webgpu-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install Dependencies | |
# run: pip install --user -e '.[webgpu,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Cache downloads | |
# uses: actions/cache@v4 | |
# with: | |
# path: ~/Library/Caches/tinygrad/downloads/ | |
# key: downloads-cache-webgpu-${{ env.DOWNLOAD_CACHE_VERSION }} | |
# - name: Check Device.DEFAULT (WEBGPU) and print some source | |
# run: | | |
# WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" | |
# WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
#- name: Run webgpu pytest | |
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto | |
# - name: Run selected webgpu tests | |
# run: | | |
# WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m pytest -n=auto test/test_ops.py test/test_dtype.py \ | |
# test/test_jit.py test/test_symbolic_ops.py test/test_symbolic_jit.py test/test_linearizer.py \ | |
# test/test_linearizer_failures.py test/test_nn.py | |
# - name: Build WEBGPU Efficientnet | |
# run: WEBGPU=1 WGPU_BACKEND_TYPE=Metal python -m examples.compile_efficientnet | |
# - name: Install Puppeteer | |
# run: npm install puppeteer | |
# - name: Run WEBGPU Efficientnet | |
# run: node test/web/test_webgpu.js | |
testmetal: | |
name: Metal Tests | |
runs-on: macos-14 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: /Users/runner/Library/Python/3.11/lib/python/site-packages | |
key: metal-m1-testing-user3-packages-${{ hashFiles('**/setup.py') }} | |
- name: Install Dependencies | |
run: pip install --user -e '.[testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/Library/Caches/tinygrad/downloads/ | |
key: downloads-cache-metal-only-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Check Device.DEFAULT (METAL) and print some source | |
run: | | |
METAL=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'METAL', Device.DEFAULT" | |
METAL=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run metal test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/ --ignore=test/external --ignore=test/models --durations=20 | |
- name: Run real world test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 | |
- name: Run ONNX | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test tensor core ops (fake) | |
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm | |
- name: Test tensor core ops (real) | |
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm | |
- name: Test LLaMA compile speed | |
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py | |
- name: Test Beam Search | |
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- name: Fuzz Test linearizer | |
run: PYTHONPATH="." METAL=1 CACHELEVEL=0 FUZZ_ALL_ACTIONS=1 DEPTH=2 FUZZ_N=48 FUZZ_MAX_SIZE=10000000 python test/external/fuzz_linearizer.py | |
# testwebgl: | |
# name: WebGL Tests | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 20 | |
# | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v3 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v4 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: webgl-testing-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install Dependencies | |
# run: pip install -e '.[webgl,testing]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Cache downloads | |
# uses: actions/cache@v4 | |
# with: | |
# path: ~/Library/Caches/tinygrad/downloads/ | |
# key: downloads-cache-webgl-${{ env.DOWNLOAD_CACHE_VERSION }} | |
# - name: Prepare | |
# run: | | |
# sudo apt-get -y install xvfb | |
# sudo /usr/bin/Xvfb :0 -screen 0 4096x4096x24+32 & | |
# - name: Run selected webgl tests | |
# run: WEBGL=1 python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_jit.py | |
# - name: Build WebGL Efficientnet | |
# run: WEBGL=1 python -m examples.compile_efficientnet | |
tests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [llvm, clang, gpu, cuda, hip, ptx] #, triton] | |
name: Tests on (${{ matrix.backend }}) | |
runs-on: ubuntu-latest | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Set up Python 3.11 | |
uses: actions/setup-python@v5 | |
with: | |
python-version: 3.11 | |
- name: Cache python packages | |
uses: actions/cache@v4 | |
with: | |
path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
key: ${{ matrix.backend }}-packages-${{ hashFiles('**/setup.py') }} | |
- name: Cache downloads | |
uses: actions/cache@v4 | |
with: | |
path: ~/.cache/tinygrad/downloads/ | |
key: downloads-cache-${{ matrix.backend }}-${{ env.DOWNLOAD_CACHE_VERSION }} | |
- name: Set env | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'cuda' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\n' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nPTX=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nCUDACPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'hip' && 'RHIP=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV | |
- name: Install OpenCL | |
if: matrix.backend == 'gpu' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
echo "deb [ allow-insecure=yes ] https://apt.repos.intel.com/oneapi all main" | sudo tee /etc/apt/sources.list.d/oneAPI.list | |
sudo apt update | |
sudo apt install --allow-unauthenticated -y --no-install-recommends opencl-headers \ | |
intel-oneapi-runtime-openmp=2023.2.1-16 intel-oneapi-runtime-compilers-common=2023.2.1-16 intel-oneapi-runtime-compilers=2023.2.1-16 \ | |
intel-oneapi-runtime-dpcpp-sycl-opencl-cpu=2023.2.1-16 intel-oneapi-runtime-tbb-common=2021.10.0-49541 \ | |
intel-oneapi-runtime-tbb=2021.10.0-49541 intel-oneapi-runtime-opencl=2023.2.1-16 | |
- name: Install packages (cuda) | |
if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
sudo apt update -y | |
sudo apt install -y --no-install-recommends git g++ cmake ninja-build llvm-15-dev zlib1g-dev libglew-dev \ | |
flex bison libfl-dev libboost-thread-dev libboost-filesystem-dev nvidia-cuda-toolkit-gcc libzstd-dev | |
- name: Cache gpuocelot | |
if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton' | |
id: cache-build | |
uses: actions/cache@v4 | |
env: | |
cache-name: cache-gpuocelot-build | |
with: | |
path: ${{ github.workspace }}/gpuocelot/ocelot | |
key: ubuntu22.04-gpuocelot-18401f4245b27ca4b3af433196583cc81ef84480-rebuild-5 | |
- name: Clone/compile gpuocelot | |
if: (matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton') && steps.cache-build.outputs.cache-hit != 'true' | |
run: | | |
git clone --recurse-submodules https://github.com/gpuocelot/gpuocelot.git ${{ github.workspace }}/gpuocelot | |
cd ${{ github.workspace }}/gpuocelot/ocelot | |
git checkout 18401f4245b27ca4b3af433196583cc81ef84480 | |
mkdir build | |
cd build | |
cmake .. -Wno-dev -G Ninja -DOCELOT_BUILD_TOOLS=OFF -DCMAKE_BUILD_ALWAYS=0 -DBUILD_TESTS_CUDA=OFF | |
ninja | |
- name: Install gpuocelot | |
if: matrix.backend == 'cuda' || matrix.backend == 'ptx' || matrix.backend == 'triton' | |
run: | | |
cd ${{ github.workspace }}/gpuocelot/ocelot/build | |
sudo ninja install -d explain | |
- name: Install packages (hip) | |
if: matrix.backend == 'hip' | |
run: | | |
echo 'Acquire::http::Pipeline-Depth "5";' | sudo tee -a /etc/apt/apt.conf.d/99parallel | |
wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null | |
sudo tee /etc/apt/sources.list.d/rocm.list <<'EOF' | |
deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/6.0.2 jammy main | |
EOF | |
echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' | sudo tee /etc/apt/preferences.d/rocm-pin-600 | |
sudo apt update | |
sudo apt install --no-install-recommends --allow-unauthenticated -y hsa-rocr comgr rocm-llvm hsa-rocr-dev | |
curl -s https://api.github.com/repos/Qazalin/remu/releases/latest | \ | |
jq -r '.assets[] | select(.name == "libremu.so").browser_download_url' | \ | |
sudo xargs curl -L -o /usr/local/lib/libremu.so | |
- name: Install dependencies | |
run: pip install -e '.[testing${{matrix.backend=='llvm'&&',llvm'||matrix.backend=='cuda'&&',cuda'||matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}}]' --extra-index-url https://download.pytorch.org/whl/cpu --extra-index-url https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/Triton-Nightly/pypi/simple/ | |
- name: Check Device.DEFAULT and print some source | |
run: | | |
python -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','RHIP'], Device.DEFAULT" | |
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Verify OpenCL autogen | |
if: matrix.backend == 'gpu' | |
run: | | |
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak | |
./autogen_stubs.sh opencl | |
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py | |
- name: Verify CUDA autogen | |
if: matrix.backend == 'cuda' | |
run: | | |
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak | |
./autogen_stubs.sh cuda | |
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py | |
- name: Verify HIP autogen | |
if: matrix.backend == 'hip' | |
run: | | |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak | |
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak | |
./autogen_stubs.sh hsa | |
./autogen_stubs.sh comgr | |
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py | |
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py | |
- name: Run pytest (not cuda or hip) | |
if: matrix.backend!='cuda' && matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'hip' | |
run: python -m pytest -n=auto test/ --durations=20 | |
- name: Run ONNX (only LLVM) | |
if: matrix.backend == 'llvm' | |
run: python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Run pytest (cuda) | |
if: matrix.backend=='cuda'||matrix.backend=='ptx'||matrix.backend=='triton' | |
run: python -m pytest -n=auto test/ -k 'not (half or test_efficientnet_safetensors)' --ignore=test/external --ignore=test/models --durations=20 | |
- name: Run pytest (hip) | |
if: matrix.backend=='hip' | |
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/external/external_test_hip_compile.py --durations=20 | |
#testunicorn: | |
# name: ARM64 unicorn Test | |
# runs-on: ubuntu-latest | |
# timeout-minutes: 20 | |
# steps: | |
# - name: Checkout Code | |
# uses: actions/checkout@v4 | |
# - name: Set up Python 3.11 | |
# uses: actions/setup-python@v5 | |
# with: | |
# python-version: 3.11 | |
# - name: Cache python packages | |
# uses: actions/cache@v4 | |
# with: | |
# path: ${{ env.Python3_ROOT_DIR }}/lib/python3.11/site-packages | |
# key: testing-arm-packages-${{ hashFiles('**/setup.py') }} | |
# - name: Install cross-assembler | |
# run: | | |
# sudo apt update -y | |
# sudo apt install -y --no-install-recommends gcc-aarch64-linux-gnu | |
# - name: Install dependencies | |
# run: pip install -e '.[testing,arm]' --extra-index-url https://download.pytorch.org/whl/cpu | |
# - name: Test arm | |
# run: CI=1 ARM64=1 CLANG=1 python -m pytest -n=auto test/ -k 'not (test_nn.py and (test_conv_transpose2d or test_conv2d))' --ignore=test/models --ignore=test/test_speed_v_torch.py --ignore=test/test_net_speed.py --ignore=test/test_specific_conv.py --ignore=test/unit/test_disk_tensor.py |