swap order in rsqrt #6089
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Unit Tests | |
env: | |
# increment this when downloads substantially change to avoid the internet | |
DOWNLOAD_CACHE_VERSION: '9' | |
CAPTURE_PROCESS_REPLAY: 1 | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
on: | |
push: | |
branches: | |
- master | |
pull_request: | |
workflow_dispatch: | |
jobs: | |
llvmspeed: | |
name: LLVM Speed | |
runs-on: ubuntu-24.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: llvm-speed | |
deps: testing_minimal | |
llvm: 'true' | |
- name: External Benchmark Schedule | |
run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py | |
- name: Speed Test | |
run: LLVM=1 LLVMOPT=1 python3 test/test_speed_v_torch.py | |
- name: Speed Test (BEAM=2) | |
run: BEAM=2 LLVM=1 LLVMOPT=1 python3 test/test_speed_v_torch.py | |
docs: | |
name: Docs | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
deps: docs | |
pydeps: "capstone" | |
- name: Use as an external package | |
run: | | |
mkdir $HOME/test_external_dir | |
cd $HOME/test_external_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
pip install mypy | |
mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))" | |
- name: Run beautiful_mnist with tinygrad only | |
run: | | |
mkdir $GITHUB_WORKSPACE/test_dir | |
cd $GITHUB_WORKSPACE/test_dir | |
python -m venv venv | |
source venv/bin/activate | |
pip install $GITHUB_WORKSPACE | |
cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py . | |
PYTHONPATH=$GITHUB_WORKSPACE BS=2 STEPS=10 python beautiful_mnist.py | |
- name: Test Docs Build | |
run: python -m mkdocs build --strict | |
- name: Test Docs | |
run: | | |
python docs/abstractions2.py | |
python docs/abstractions3.py | |
- name: Test Quickstart | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py | |
- name: Test DEBUG | |
run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())" | |
- name: Compile EfficientNet to C and test it | |
run: | | |
CLANG=1 PYTHONPATH="." python examples/compile_efficientnet.py > recognize.c | |
clang -O2 recognize.c -lm -o recognize | |
cat test/models/efficientnet/Chicken.jpg | ./recognize | grep cock | |
autogen: | |
name: Autogen | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
opencl: 'true' | |
amd: 'true' | |
cuda: 'true' | |
webgpu: 'true' | |
llvm: 'true' | |
- name: Verify OpenCL autogen | |
run: | | |
cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak | |
./autogen_stubs.sh opencl | |
diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py | |
- name: Verify CUDA autogen | |
run: | | |
cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak | |
cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak | |
./autogen_stubs.sh cuda | |
./autogen_stubs.sh nv | |
diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py | |
diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py | |
- name: Verify AMD autogen | |
run: | | |
cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak | |
cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak | |
cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak | |
./autogen_stubs.sh hsa | |
./autogen_stubs.sh comgr | |
./autogen_stubs.sh amd | |
diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py | |
diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py | |
diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py | |
- name: Verify Linux autogen | |
run: | | |
cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak | |
cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak | |
./autogen_stubs.sh libc | |
./autogen_stubs.sh io_uring | |
diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py | |
diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py | |
- name: Verify WebGPU autogen | |
run: | | |
cp tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak | |
./autogen_stubs.sh webgpu | |
diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py | |
- name: Verify LLVM autogen | |
run: | | |
cp tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak | |
./autogen_stubs.sh llvm | |
diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py | |
tc: | |
name: Tensor Core tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: uops-minimal | |
deps: testing_minimal | |
- name: Test IMAGE=2 support | |
run: | | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- name: Test emulated METAL tensor cores | |
run: | | |
DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated AMX tensor cores | |
run: PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Test emulated AMD tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated CUDA tensor cores | |
run: | | |
DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 | |
DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
DEBUG=2 EMULATE_CUDA_SM75=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 | |
PYTHONPATH="." DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded | |
- name: Test emulated INTEL OpenCL tensor cores | |
run: DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py | |
- name: Full test tensor cores | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores | |
- name: Test tensor cores (TC=3) | |
run: | | |
TC=3 DEBUG=3 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
TC=3 PYTHONPATH=. DEBUG=3 EMULATE_AMD=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py | |
TC=3 DEBUG=3 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16 | |
TC=3 PYTHONPATH=. DEBUG=3 EMULATE_INTEL=1 PYTHON=1 N=16 HALF=1 python3 ./extra/gemm/simple_matmul.py | |
TC=3 PYTHONPATH=. DEBUG=3 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm | |
- name: Test device flop counts | |
run: | | |
PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf | |
PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul | |
bepython: | |
name: Python Backend | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: be-minimal | |
deps: testing_minimal | |
- name: Test dtype with Python emulator | |
run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py | |
- name: Test ops with Python emulator | |
run: DEBUG=2 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_max_pool2d or test_max_pool2d_simple or test_max_pool2d_bigger_stride or test_avg_pool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int or test_interpolate_bilinear or test_interpolate_bilinear_corners_aligned or test_scaled_dot_product_attention or test_cummax)" --durations=20 | |
- name: Test uops with Python emulator | |
run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20 | |
- name: Test symbolic with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py | |
- name: test_linearizer_failures with Python emulator | |
run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1 | |
linter: | |
name: Linters | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
# TODO: run the pre-commit hook to replace a lot of this | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: linting-only | |
python-version: '3.10' | |
deps: linting | |
- name: Lint bad-indentation and trailing-whitespace with pylint | |
run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y . | |
- name: Lint with ruff | |
run: | | |
pip3 install --upgrade --force-reinstall ruff | |
python3 -m ruff check . | |
- name: Lint tinygrad with pylint | |
run: python -m pylint tinygrad/ | |
- name: Run mypy | |
run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt | |
unittest: | |
name: Unit Tests | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: unittest-12 | |
deps: testing | |
- name: Test README | |
run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py | |
- name: Run unit tests | |
run: PYTHONPATH="." python -m pytest -n=auto test/unit/ | |
- name: Repo line count < 11200 lines | |
run: MAX_LINE_COUNT=11200 python sz.py | |
fuzzing: | |
name: Fuzzing | |
runs-on: ubuntu-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: fuzzing-minimal | |
deps: testing_minimal | |
- name: Fuzz Test symbolic | |
run: python test/external/fuzz_symbolic.py | |
- name: Fuzz Test shapetracker | |
run: | | |
PYTHONPATH="." python test/external/fuzz_shapetracker.py | |
PYTHONPATH="." python test/external/fuzz_shapetracker_math.py | |
testgpuimage: | |
name: 'GPU IMAGE Tests' | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: gpu-image | |
deps: testing_minimal | |
opencl: 'true' | |
- name: Run Kernel Count Test | |
run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py | |
- name: Test WINO=1 | |
run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d | |
- name: Test GPU IMAGE=2 ops + training | |
run: | | |
PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20 | |
PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
testopenpilot: | |
name: 'openpilot Compile Tests' | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: openpilot-compile | |
deps: testing | |
opencl: 'true' | |
- name: Test openpilot model kernel count and gate usage | |
run: | | |
PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2105 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx | |
- name: Test openpilot alt model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx | |
- name: Test openpilot fastvits model correctness (float32) | |
run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
testopencl: | |
name: 'ONNX+Optimization Tests' | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: onnxoptl | |
deps: testing,testing_tf | |
python-version: '3.11' | |
opencl: 'true' | |
llvm: 'true' | |
- name: Test ONNX (GPU) | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test ONNX (CLANG) | |
run: CLANG=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test ONNX (LLVM) | |
run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Run CLOUD=1 Test | |
run: | | |
CLOUDDEV=CLANG CLOUD=1 python3 test/test_tiny.py | |
CLOUDDEV=GPU CLOUD=1 python3 test/test_tiny.py | |
CLOUDDEV=GPU IMAGE=2 CLOUD=1 python3 test/test_tiny.py | |
- name: Test Optimization Helpers | |
run: PYTHONPATH="." DEBUG=1 python3 extra/optimization/test_helpers.py | |
- name: Test Action Space | |
run: PYTHONPATH="." DEBUG=1 GPU=1 python3 extra/optimization/get_action_space.py | |
- name: Test Beam Search | |
run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- name: Test MLPerf stuff | |
run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20 | |
- name: Run handcode_opt | |
run: PYTHONPATH=. MODEL=resnet GPU=1 DEBUG=1 BS=4 HALF=0 python3 examples/handcode_opt.py | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
testmodels: | |
name: Models (llvm+clang+gpu) | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: models | |
deps: testing | |
opencl: 'true' | |
llvm: 'true' | |
- name: Test models (llvm) | |
run: LLVM=1 python -m pytest -n=auto test/models --durations=20 | |
- name: Test models (gpu) | |
run: GPU=1 python -m pytest -n=auto test/models --durations=20 | |
- name: Test models (clang) | |
run: CLANG=1 python -m pytest -n=auto test/models --durations=20 | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
testdsp: | |
name: Linux (DSP) | |
runs-on: ubuntu-24.04 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: dsp-minimal | |
deps: testing_minimal | |
pydeps: "onnx==1.16.0 onnxruntime" | |
llvm: "true" | |
- name: Set up Docker Buildx | |
uses: docker/setup-buildx-action@v3 | |
- name: Build QEMU Docker with cache | |
uses: docker/build-push-action@v4 | |
with: | |
file: extra/dsp/Dockerfile | |
push: false | |
load: true | |
tags: qemu-hexagon:latest | |
cache-from: type=gha | |
cache-to: type=gha,mode=min | |
- name: Run test_tiny on DSP | |
run: DEBUG=2 DSP=1 python test/test_tiny.py | |
- name: Test quantize onnx | |
run: PYTHONPATH="." DEBUG=2 DSP=1 python3 test/test_quantize_onnx.py | |
- name: Test LLVM=1 DEVECTORIZE=0 | |
run: LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure" | |
#- name: Test CLANG=1 DEVECTORIZE=0 | |
# run: CLANG=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure" | |
testwebgpu: | |
name: Linux (WebGPU) | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: webgpu-minimal | |
deps: testing_minimal | |
python-version: '3.11' | |
webgpu: 'true' | |
- name: Check Device.DEFAULT (WEBGPU) and print some source | |
run: | | |
WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT" | |
WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run selected webgpu tests | |
run: | | |
WEBGPU=1 python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit \ | |
--ignore=test/test_copy_speed.py --ignore=test/test_rearrange_einops.py --ignore=test/test_speed_v_torch.py \ | |
--ignore=test/test_fuzz_shape_ops.py --ignore=test/test_linearizer_failures.py --durations=20 | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
tests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [llvm, clang, gpu, ptx, amd, nv] #, triton] | |
name: Linux (${{ matrix.backend }}) | |
runs-on: ubuntu-22.04 | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: ${{ matrix.backend }}-minimal | |
deps: testing_minimal${{matrix.backend=='ptx'&&',cuda'||matrix.backend=='triton'&&',triton'||''}} | |
opencl: ${{ matrix.backend == 'gpu' && 'true' }} | |
amd: ${{ matrix.backend == 'amd' && 'true' }} | |
cuda: ${{ (matrix.backend == 'ptx' || matrix.backend == 'triton' || matrix.backend == 'nv') && 'true' }} | |
- name: Set env | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'gpu' && 'GPU=1' || matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nPTX=1\nMOCKGPU=1' || matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nMOCKGPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' || matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' || matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV | |
- name: Check Device.DEFAULT and print some source | |
run: | | |
PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CLANG','CUDA','GPU','AMD','NV'], Device.DEFAULT" | |
DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add | |
- name: Run pytest (not cuda or amd) | |
if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv' | |
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20 | |
- name: Run pytest (cuda) | |
if: matrix.backend=='ptx'||matrix.backend=='triton'||matrix.backend=='nv' | |
run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20 | |
- name: Run pytest (amd) | |
if: matrix.backend=='amd' | |
run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py test/external/external_test_am.py --durations=20 | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
# ****** OSX Tests ****** | |
testmetal2: | |
name: MacOS (unit) | |
runs-on: macos-14 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: metal2 | |
deps: testing | |
python-version: '3.11' | |
amd: 'true' | |
cuda: 'true' | |
- name: Run real world test | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20 | |
- name: Test models (Metal) | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/models --durations=20 | |
- name: Run ONNX | |
run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20 | |
- name: Test tensor core ops (fake) | |
run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm | |
- name: Test tensor core ops (real) | |
run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm | |
- name: Test LLaMA compile speed | |
run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py | |
- name: Test Beam Search | |
run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py | |
- name: Fuzz Test linearizer | |
run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py | |
# - name: Fuzz Test models schedule | |
# run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py | |
- name: Run TRANSCENDENTAL math | |
run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20 | |
- name: Run pytest (amd) | |
env: | |
MOCKGPU: 1 | |
AMD: 1 | |
FORWARD_ONLY: 1 | |
run: | | |
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20 | |
- name: Run pytest (ptx) | |
env: | |
MOCKGPU: 1 | |
PTX: 1 | |
NV: 1 | |
FORWARD_ONLY: 1 | |
run: | | |
python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20 | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
osxwebgpu: | |
name: MacOS (WebGPU) | |
runs-on: macos-14 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: osx-webgpu | |
webgpu: 'true' | |
- name: Build WEBGPU Efficientnet | |
run: WEBGPU=1 python3 -m examples.compile_efficientnet | |
- name: Clean npm cache | |
run: npm cache clean --force | |
- name: Install Puppeteer | |
run: npm install puppeteer | |
- name: Run WEBGPU Efficientnet | |
run: node test/web/test_webgpu.js | |
osxtests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [metal, llvm, clang] | |
name: MacOS (${{ matrix.backend }}) | |
runs-on: macos-15 | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: macos-${{ matrix.backend }}-minimal | |
deps: testing_minimal | |
llvm: ${{ matrix.backend == 'llvm' && 'true' }} | |
- name: Set env | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1' || matrix.backend == 'metal' && 'METAL=1\nJIT=2'}}" >> $GITHUB_ENV | |
- name: Check Device.DEFAULT and print some source | |
run: | | |
python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT" | |
DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus | |
- name: Run pytest (${{ matrix.backend }}) | |
run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20 | |
- name: Run process replay tests | |
uses: ./.github/actions/process-replay | |
# ****** Windows Tests ****** | |
wintests: | |
strategy: | |
fail-fast: false | |
matrix: | |
backend: [llvm, clang] | |
name: Windows (${{ matrix.backend }}) | |
runs-on: windows-latest | |
timeout-minutes: 10 | |
steps: | |
- name: Checkout Code | |
uses: actions/checkout@v4 | |
- name: Setup Environment | |
uses: ./.github/actions/setup-tinygrad | |
with: | |
key: windows-minimal | |
deps: testing_minimal | |
- name: Set env | |
shell: bash | |
run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' || matrix.backend == 'clang' && 'CLANG=1'}}" >> $GITHUB_ENV | |
- name: Run pytest (${{ matrix.backend }}) | |
shell: bash | |
run: | | |
python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT" | |
python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20 |