reorder expand (#9051) #6211

Workflow file for this run

	name: Unit Tests
	env:
	# increment this when downloads substantially change to avoid the internet
	DOWNLOAD_CACHE_VERSION: '9'
	CAPTURE_PROCESS_REPLAY: 1
	GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}

	on:
	push:
	branches:
	- master
	pull_request:
	workflow_dispatch:

	jobs:
	llvmspeed:
	name: LLVM Speed
	runs-on: ubuntu-24.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: llvm-speed
	deps: testing_minimal
	llvm: 'true'
	- name: External Benchmark Schedule
	run: PYTHONPATH="." python3 test/external/external_benchmark_schedule.py
	- name: Speed Test
	run: LLVM=1 LLVMOPT=1 python3 test/test_speed_v_torch.py
	- name: Speed Test (BEAM=2)
	run: BEAM=2 LLVM=1 LLVMOPT=1 python3 test/test_speed_v_torch.py

	docs:
	name: Docs
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	deps: docs
	pydeps: "capstone"
	- name: Use as an external package
	run: \|
	mkdir $HOME/test_external_dir
	cd $HOME/test_external_dir
	python -m venv venv
	source venv/bin/activate
	pip install $GITHUB_WORKSPACE
	python -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	# Test using VIZ=1 after package installation
	VIZ=1 python -c "from tinygrad.tensor import Tensor; Tensor([1,2,3,4,5]).realize()" & VIZ_PID=$!
	echo "started VIZ server at: $(ps -p "$VIZ_PID" -o pid=)"
	i=0; while ((i++ < 10)); do curl -sSf localhost:8000 > /dev/null && break \|\| { echo "VIZ verification attempt $i/10"; sleep 1; }; done; ((i > 10)) && echo "Could not verify VIZ server" && exit 1
	kill $VIZ_PID
	pip install mypy
	mypy -c "from tinygrad.tensor import Tensor; print(Tensor([1,2,3,4,5]))"
	- name: Run beautiful_mnist with tinygrad only
	run: \|
	mkdir $GITHUB_WORKSPACE/test_dir
	cd $GITHUB_WORKSPACE/test_dir
	python -m venv venv
	source venv/bin/activate
	pip install $GITHUB_WORKSPACE
	cp $GITHUB_WORKSPACE/examples/beautiful_mnist.py .
	PYTHONPATH=$GITHUB_WORKSPACE BS=2 STEPS=10 python beautiful_mnist.py
	- name: Test Docs Build
	run: python -m mkdocs build --strict
	- name: Test Docs
	run: \|
	python docs/abstractions2.py
	python docs/abstractions3.py
	- name: Test Quickstart
	run: awk '/```python/{flag=1;next}/```/{flag=0}flag' docs/quickstart.md > quickstart.py && PYTHONPATH=. python quickstart.py
	- name: Test DEBUG
	run: DEBUG=100 python3 -c "from tinygrad import Tensor; N = 1024; a, b = Tensor.rand(N, N), Tensor.rand(N, N); c = (a.reshape(N, 1, N) * b.T.reshape(1, N, N)).sum(axis=2); print((c.numpy() - (a.numpy() @ b.numpy())).mean())"
	- name: Compile EfficientNet to C and test it
	run: \|
	CPU=1 PYTHONPATH="." python examples/compile_efficientnet.py > recognize.c
	clang -O2 recognize.c -lm -o recognize
	cat test/models/efficientnet/Chicken.jpg \| ./recognize \| grep cock

	autogen:
	name: Autogen
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	opencl: 'true'
	amd: 'true'
	cuda: 'true'
	webgpu: 'true'
	llvm: 'true'
	- name: Verify OpenCL autogen
	run: \|
	cp tinygrad/runtime/autogen/opencl.py /tmp/opencl.py.bak
	./autogen_stubs.sh opencl
	diff /tmp/opencl.py.bak tinygrad/runtime/autogen/opencl.py
	- name: Verify CUDA autogen
	run: \|
	cp tinygrad/runtime/autogen/cuda.py /tmp/cuda.py.bak
	cp tinygrad/runtime/autogen/nv_gpu.py /tmp/nv_gpu.py.bak
	./autogen_stubs.sh cuda
	./autogen_stubs.sh nv
	diff /tmp/cuda.py.bak tinygrad/runtime/autogen/cuda.py
	diff /tmp/nv_gpu.py.bak tinygrad/runtime/autogen/nv_gpu.py
	- name: Verify AMD autogen
	run: \|
	cp tinygrad/runtime/autogen/hsa.py /tmp/hsa.py.bak
	cp tinygrad/runtime/autogen/comgr.py /tmp/comgr.py.bak
	cp tinygrad/runtime/autogen/amd_gpu.py /tmp/amd_gpu.py.bak
	./autogen_stubs.sh hsa
	./autogen_stubs.sh comgr
	./autogen_stubs.sh amd
	diff /tmp/hsa.py.bak tinygrad/runtime/autogen/hsa.py
	diff /tmp/comgr.py.bak tinygrad/runtime/autogen/comgr.py
	diff /tmp/amd_gpu.py.bak tinygrad/runtime/autogen/amd_gpu.py
	- name: Verify Linux autogen
	run: \|
	cp tinygrad/runtime/autogen/libc.py /tmp/libc.py.bak
	cp tinygrad/runtime/autogen/io_uring.py /tmp/io_uring.py.bak
	./autogen_stubs.sh libc
	./autogen_stubs.sh io_uring
	diff /tmp/libc.py.bak tinygrad/runtime/autogen/libc.py
	diff /tmp/io_uring.py.bak tinygrad/runtime/autogen/io_uring.py
	- name: Verify WebGPU autogen
	run: \|
	cp tinygrad/runtime/autogen/webgpu.py /tmp/webgpu.py.bak
	./autogen_stubs.sh webgpu
	diff /tmp/webgpu.py.bak tinygrad/runtime/autogen/webgpu.py
	- name: Verify LLVM autogen
	run: \|
	cp tinygrad/runtime/autogen/llvm.py /tmp/llvm.py.bak
	./autogen_stubs.sh llvm
	diff /tmp/llvm.py.bak tinygrad/runtime/autogen/llvm.py

	torchbackend:
	name: Torch Backend Tests
	runs-on: ubuntu-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: torch-backend-pillow-torchvision
	deps: testing_minimal
	pydeps: "pillow torchvision"
	- name: Install ninja
	run: \|
	sudo apt update \|\| true
	sudo apt install -y --no-install-recommends ninja-build
	- name: Test one op
	run: PYTHONPATH=. FORWARD_ONLY=1 TINY_BACKEND=1 python3 test/test_ops.py TestOps.test_add
	- name: Test ResNet-18
	run: PYTHONPATH=. DEBUG=2 python3 extra/torch_backend/example.py
	- name: Test Ops with TINY_BACKEND (expect failure)
	run: PYTHONPATH=. TINY_BACKEND=1 python3 -m pytest test/test_ops.py \|\| true
	- name: Test beautiful_mnist in torch with TINY_BACKEND (expect failure)
	run: PYTHONPATH=. TORCH_DEBUG=1 TINY_BACKEND=1 python3 examples/other_mnist/beautiful_mnist_torch.py \|\| true

	tc:
	name: Tensor Core tests
	runs-on: ubuntu-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: uops-minimal
	deps: testing_minimal
	- name: Test IMAGE=2 support
	run: \|
	IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	IMAGE=2 PYTHON=1 python3 test/test_ops.py TestOps.test_simple_conv2d
	- name: Test emulated METAL tensor cores
	run: \|
	DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_big_gemm
	PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded
	- name: Test emulated AMX tensor cores
	run: PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	- name: Test emulated AMD tensor cores
	run: \|
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=16 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 N=64 HALF=1 ACC_HALF=1 python3 ./extra/gemm/simple_matmul.py
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded
	- name: Test emulated CUDA tensor cores
	run: \|
	DEBUG=2 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
	DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	DEBUG=2 EMULATE_CUDA_SM75=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
	PYTHONPATH="." DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_linearizer.py TestLinearizer.test_tensor_cores TestLinearizer.test_tensor_cores_padded
	- name: Test emulated INTEL OpenCL tensor cores
	run: DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 HALF=1 N=64 python3 ./extra/gemm/simple_matmul.py
	- name: Full test tensor cores
	run: \|
	PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
	PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 ALLOW_TF32=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
	PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
	PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 ./test/test_linearizer.py TestLinearizer.test_tensor_cores
	- name: Test tensor cores (TC=3)
	run: \|
	TC=3 DEBUG=3 EMULATE_METAL=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	TC=3 PYTHONPATH=. DEBUG=3 EMULATE_AMD=1 PYTHON=1 N=16 HALF=1 ACC_HALF=0 python3 ./extra/gemm/simple_matmul.py
	TC=3 DEBUG=3 EMULATE_CUDA=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm_fp16
	TC=3 PYTHONPATH=. DEBUG=3 EMULATE_INTEL=1 PYTHON=1 N=16 HALF=1 python3 ./extra/gemm/simple_matmul.py
	TC=3 PYTHONPATH=. DEBUG=3 AMX=1 EMULATE_AMX=1 FORWARD_ONLY=1 PYTHON=1 python3 test/test_ops.py TestOps.test_gemm
	- name: Test device flop counts
	run: \|
	PYTHONPATH=. DEBUG=2 EMULATE_METAL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	PYTHONPATH=. DEBUG=2 EMULATE_AMD=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	PYTHONPATH=. DEBUG=2 EMULATE_CUDA=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	PYTHONPATH=. DEBUG=2 EMULATE_INTEL=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStatsMatmulHalf
	PYTHONPATH=. DEBUG=2 AMX=1 EMULATE_AMX=1 PYTHON=1 python3 ./test/test_uops_stats.py TestUOpsStats.test_simple_matmul

	bepython:
	name: Python Backend
	runs-on: ubuntu-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: be-minimal
	deps: testing_minimal
	- name: Test dtype with Python emulator
	run: DEBUG=1 PYTHONPATH=. PYTHON=1 python3 -m pytest -n=auto test/test_dtype.py test/test_dtype_alu.py
	- name: Test ops with Python emulator
	run: DEBUG=2 PYTHON=1 python3 -m pytest -n=auto test/test_ops.py -k "not (test_split or test_simple_cumsum or test_cumsum or test_einsum or test_dot or test_dot_1d or test_big_gemm or test_broadcastdot or test_multidot or test_var_axis or test_std_axis or test_broadcast_full or test_broadcast_partial or test_simple_conv3d or test_dilated_conv_transpose2d or test_simple_conv_transpose3d or test_large_input_conv2d or test_max_pool2d or test_max_pool2d_simple or test_max_pool2d_bigger_stride or test_avg_pool2d or test_cat or test_scaled_product_attention or test_scaled_product_attention_causal or test_slice_fancy_indexing_dim_inject_none or test_slice_fancy_indexing_list_indices or test_slice_fancy_indexing_no_dim_collapse or test_slice_fancy_indexing_tuple_indices or test_slice_fancy_indexing_list_with_tensors or test_slice_fancy_indexing_dim_collapse_int or test_interpolate_bilinear or test_interpolate_bilinear_corners_aligned or test_scaled_dot_product_attention or test_cummax)" --durations=20
	- name: Test uops with Python emulator
	run: PYTHON=1 python3 -m pytest test/test_uops.py --durations=20
	- name: Test symbolic with Python emulator
	run: PYTHONPATH=. PYTHON=1 python3 test/test_symbolic_ops.py
	- name: test_linearizer_failures with Python emulator
	run: PYTHONPATH=. PYTHON=1 python3 -m pytest -rA test/test_linearizer_failures.py::TestLinearizerFailures::test_failure_1

	linter:
	name: Linters
	runs-on: ubuntu-latest
	timeout-minutes: 10

	# TODO: run the pre-commit hook to replace a lot of this
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: linting-only
	python-version: '3.10'
	deps: linting
	- name: Lint bad-indentation and trailing-whitespace with pylint
	run: python -m pylint --disable=all -e W0311 -e C0303 --jobs=0 --indent-string=' ' --recursive=y .
	- name: Lint with ruff
	run: \|
	pip3 install --upgrade --force-reinstall ruff
	python3 -m ruff check .
	- name: Lint tinygrad with pylint
	run: python -m pylint tinygrad/
	- name: Run mypy
	run: python -m mypy --strict-equality --lineprecision-report . && cat lineprecision.txt

	unittest:
	name: Unit Tests
	runs-on: ubuntu-latest
	timeout-minutes: 10

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: unittest-12
	deps: testing_unit
	- name: Test README
	run: awk '/```python/{flag=1;next}/```/{flag=0}flag' README.md > README.py && PYTHONPATH=. python README.py
	- name: Run unit tests
	run: PYTHONPATH="." python -m pytest -n=auto test/unit/
	- name: Repo line count < 11300 lines
	run: MAX_LINE_COUNT=11300 python sz.py

	fuzzing:
	name: Fuzzing
	runs-on: ubuntu-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: fuzzing-minimal
	deps: testing_minimal
	- name: Fuzz Test symbolic
	run: python test/external/fuzz_symbolic.py
	- name: Fuzz Test shapetracker
	run: \|
	PYTHONPATH="." python test/external/fuzz_shapetracker.py
	PYTHONPATH="." python test/external/fuzz_shapetracker_math.py

	testgpuimage:
	name: 'GPU IMAGE Tests'
	runs-on: ubuntu-20.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: gpu-image
	deps: testing_minimal
	opencl: 'true'
	- name: Run Kernel Count Test
	run: PYTHONPATH="." GPU=1 python -m pytest -n=auto test/external/external_test_opt.py
	- name: Test WINO=1
	run: GPU=1 DEBUG=2 WINO=1 python3 test/test_ops.py TestOps.test_simple_conv2d
	- name: Test GPU IMAGE=2 ops + training
	run: \|
	PYTHONPATH="." GPU=1 IMAGE=2 python -m pytest -n=auto test/test_ops.py --durations=20
	PYTHONPATH="." GPU=1 IMAGE=2 python3 test/models/test_end2end.py TestEnd2End.test_linear_mnist
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testopenpilot:
	name: 'openpilot Compile Tests'
	runs-on: ubuntu-20.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: openpilot-compile
	deps: testing
	opencl: 'true'
	- name: Test openpilot model kernel count and gate usage
	run: \|
	PYTHONPATH="." ALLOWED_KERNEL_COUNT=209 ALLOWED_READ_IMAGE=2105 ALLOWED_GATED_READ_IMAGE=29 FLOAT16=0 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/v0.9.4/selfdrive/modeld/models/supercombo.onnx
	- name: Test openpilot alt model correctness (float32)
	run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/3799fe46b3a629e491d4b8498b8ae83e4c88c304/selfdrive/modeld/models/supercombo.onnx
	- name: Test openpilot fastvits model correctness (float32)
	run: PYTHONPATH="." FLOAT16=0 DEBUGCL=1 GPU=1 IMAGE=2 python examples/openpilot/compile3.py https://github.com/commaai/openpilot/raw/9118973ed03c1ae1d40cf69a29507ec2cc78efd7/selfdrive/modeld/models/supercombo.onnx
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testopencl:
	name: 'ONNX+Optimization Tests'
	runs-on: ubuntu-22.04
	timeout-minutes: 20

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: onnxoptl
	deps: testing,testing_tf
	python-version: '3.11'
	opencl: 'true'
	llvm: 'true'
	- name: Test ONNX (GPU)
	run: GPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test ONNX (CPU)
	run: CPU=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test ONNX (LLVM)
	run: LLVM=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Run CLOUD=1 Test
	run: \|
	CLOUDDEV=CPU CLOUD=1 python3 test/test_tiny.py
	CLOUDDEV=GPU CLOUD=1 python3 test/test_tiny.py
	CLOUDDEV=GPU IMAGE=2 CLOUD=1 python3 test/test_tiny.py
	- name: Test Optimization Helpers
	run: PYTHONPATH="." DEBUG=1 python3 extra/optimization/test_helpers.py
	- name: Test Action Space
	run: PYTHONPATH="." DEBUG=1 GPU=1 python3 extra/optimization/get_action_space.py
	- name: Test Beam Search
	run: PYTHONPATH="." GPU=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	- name: Test MLPerf stuff
	run: GPU=1 python -m pytest -n=auto test/external/external_test_optim.py test/external/external_test_losses.py test/external/external_test_metrics.py test/external/external_test_datasets.py --durations=20
	- name: Run handcode_opt
	run: PYTHONPATH=. MODEL=resnet GPU=1 DEBUG=1 BS=4 HALF=0 python3 examples/handcode_opt.py
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testmodels:
	name: Models (llvm+cpu+gpu)
	runs-on: ubuntu-22.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: models
	deps: testing
	opencl: 'true'
	llvm: 'true'
	- name: Test models (llvm)
	run: LLVM=1 python -m pytest -n=auto test/models --durations=20
	- name: Test models (gpu)
	run: GPU=1 python -m pytest -n=auto test/models --durations=20
	- name: Test models (cpu)
	run: CPU=1 python -m pytest -n=auto test/models --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	testdsp:
	name: Linux (DSP)
	runs-on: ubuntu-24.04
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: dsp-minimal
	deps: testing_minimal
	pydeps: "onnx==1.16.0 onnxruntime"
	llvm: "true"
	- name: Set up Docker Buildx
	uses: docker/setup-buildx-action@v3
	- name: Build QEMU Docker with cache
	uses: docker/build-push-action@v4
	with:
	file: extra/dsp/Dockerfile
	push: false
	load: true
	tags: qemu-hexagon:latest
	cache-from: type=gha
	cache-to: type=gha,mode=min
	- name: Run test_tiny on DSP
	run: DEBUG=2 DSP=1 python test/test_tiny.py
	- name: Test quantize onnx
	run: PYTHONPATH="." DEBUG=2 DSP=1 python3 test/test_quantize_onnx.py
	- name: Test LLVM=1 DEVECTORIZE=0
	run: LLVM=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"
	- name: Test CPU=1 DEVECTORIZE=0
	run: CPU=1 DEVECTORIZE=0 python3 -m pytest -n auto test/test_tiny.py test/test_ops.py -k "not test_avg_pool3d_failure"

	testwebgpu:
	name: Linux (WebGPU)
	runs-on: ubuntu-22.04
	timeout-minutes: 20
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: webgpu-minimal
	deps: testing_minimal
	python-version: '3.11'
	webgpu: 'true'
	- name: Check Device.DEFAULT (WEBGPU) and print some source
	run: \|
	WEBGPU=1 python -c "from tinygrad import Device; assert Device.DEFAULT == 'WEBGPU', Device.DEFAULT"
	WEBGPU=1 DEBUG=4 FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run selected webgpu tests
	run: \|
	WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Vulkan" python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit \
	--ignore=test/test_copy_speed.py --ignore=test/test_rearrange_einops.py \
	--ignore=test/test_fuzz_shape_ops.py --ignore=test/test_linearizer_failures.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	tests:
	strategy:
	fail-fast: false
	matrix:
	backend: [llvm, cpu, gpu, ptx, amd, nv] #, triton]

	name: Linux (${{ matrix.backend }})
	runs-on: ubuntu-22.04
	timeout-minutes: 20

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: ${{ matrix.backend }}-minimal
	deps: testing_minimal${{matrix.backend=='ptx'&&',cuda'\|\|matrix.backend=='triton'&&',triton'\|\|''}}
	opencl: ${{ matrix.backend == 'gpu' && 'true' }}
	amd: ${{ matrix.backend == 'amd' && 'true' }}
	cuda: ${{ (matrix.backend == 'ptx' \|\| matrix.backend == 'triton' \|\| matrix.backend == 'nv') && 'true' }}
	- name: Set env
	run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1' \|\| matrix.backend == 'gpu' && 'GPU=1' \|\| matrix.backend == 'PTX' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nCUDA=1\nPTX=1\nMOCKGPU=1' \|\| matrix.backend == 'triton' && 'FORWARD_ONLY=1\nJIT=1\nOPT=2\nNV=1\nMOCKGPU=1\nTRITON=1\nTRITON_PTXAS_PATH=/usr/bin/ptxas' \|\| matrix.backend == 'amd' && 'AMD=1\nMOCKGPU=1\nFORWARD_ONLY=1' \|\| matrix.backend == 'nv' && 'NV=1\nMOCKGPU=1\nFORWARD_ONLY=1' }}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	PYTHONPATH=${{ github.workspace }} python3 -c "from tinygrad import Device; assert Device.DEFAULT in ['LLVM','CPU','CUDA','GPU','AMD','NV'], Device.DEFAULT"
	DEBUG=5 PYTHONPATH=${{ github.workspace }} FORWARD_ONLY=1 python3 test/test_ops.py TestOps.test_add
	- name: Run pytest (not cuda or amd)
	if: matrix.backend!='ptx' && matrix.backend!='triton' && matrix.backend != 'amd' && matrix.backend != 'nv'
	run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
	- name: Run pytest (cuda)
	if: matrix.backend=='ptx'\|\|matrix.backend=='triton'\|\|matrix.backend=='nv'
	run: python -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --ignore test/test_gc.py --durations=20
	- name: Run pytest (amd)
	if: matrix.backend=='amd'
	run: python -m pytest -n=auto test/test_ops.py test/test_dtype.py test/test_dtype_alu.py test/test_linearizer.py test/test_randomness.py test/imported/test_indexing.py test/test_hcq.py test/external/external_test_am.py --durations=20
	- name: Run TRANSCENDENTAL math
	run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** OSX Tests ****

	testmetal2:
	name: MacOS (unit)
	runs-on: macos-14
	timeout-minutes: 10

	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: metal2
	deps: testing
	python-version: '3.11'
	amd: 'true'
	cuda: 'true'
	- name: Run real world test
	run: JIT=2 METAL=1 python -m pytest -n=auto test/models/test_real_world.py --durations=20
	- name: Test models (Metal)
	run: JIT=2 METAL=1 python -m pytest -n=auto test/models --durations=20
	- name: Run ONNX
	run: JIT=2 METAL=1 python -m pytest -n=auto test/external/external_test_onnx_backend.py --durations=20
	- name: Test tensor core ops (fake)
	run: TC=2 METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_gemm
	- name: Test tensor core ops (real)
	run: METAL=1 DEBUG=3 python test/test_ops.py TestOps.test_big_gemm
	- name: Test LLaMA compile speed
	run: PYTHONPATH="." METAL=1 python test/external/external_test_speed_llama.py
	- name: Test Beam Search
	run: PYTHONPATH="." METAL=1 IGNORE_BEAM_CACHE=1 python3 -m pytest extra/optimization/test_beam_search.py
	- name: Fuzz Test linearizer
	run: PYTHONPATH="." METAL=1 DEPTH=4 FUZZ_N=50 FUZZ_MAX_SIZE=1000000 python test/external/fuzz_linearizer.py
	# - name: Fuzz Test models schedule
	# run: FUZZ_SCHEDULE=1 FUZZ_SCHEDULE_MAX_PATHS=5 python -m pytest test/models/test_train.py test/models/test_end2end.py
	- name: Run TRANSCENDENTAL math
	run: TRANSCENDENTAL=2 python -m pytest -n=auto test/test_ops.py::TestOps::test_sin test/test_ops.py::TestOps::test_cos test/test_ops.py::TestOps::test_tan test/test_ops.py::TestOps::test_exp test/test_ops.py::TestOps::test_log --durations=20
	- name: Run pytest (amd)
	env:
	MOCKGPU: 1
	AMD: 1
	FORWARD_ONLY: 1
	run: \|
	python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
	- name: Run pytest (ptx)
	env:
	MOCKGPU: 1
	PTX: 1
	NV: 1
	FORWARD_ONLY: 1
	run: \|
	python3 -m pytest -n=auto test/test_hcq.py test/test_tiny.py --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	osxwebgpu:
	name: MacOS (WebGPU)
	runs-on: macos-14
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: osx-webgpu
	webgpu: 'true'
	- name: Build WEBGPU Efficientnet
	run: WEBGPU=1 WEBGPU_BACKEND="WGPUBackendType_Metal" python3 -m examples.compile_efficientnet
	- name: Clean npm cache
	run: npm cache clean --force
	- name: Install Puppeteer
	run: npm install puppeteer
	- name: Run WEBGPU Efficientnet
	run: node test/web/test_webgpu.js

	osxtests:
	strategy:
	fail-fast: false
	matrix:
	backend: [metal, llvm, cpu]
	name: MacOS (${{ matrix.backend }})
	runs-on: macos-15
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: macos-${{ matrix.backend }}-minimal
	deps: testing_minimal
	llvm: ${{ matrix.backend == 'llvm' && 'true' }}
	- name: Set env
	run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1' \|\| matrix.backend == 'metal' && 'METAL=1\nJIT=2'}}" >> $GITHUB_ENV
	- name: Check Device.DEFAULT and print some source
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
	DEBUG=4 python3 test/test_tiny.py TestTiny.test_plus
	- name: Run pytest (${{ matrix.backend }})
	run: python3 -m pytest -n=auto test/ --ignore=test/models --ignore=test/unit --durations=20
	- name: Run process replay tests
	uses: ./.github/actions/process-replay

	# **** Windows Tests ****

	wintests:
	strategy:
	fail-fast: false
	matrix:
	backend: [llvm, cpu]

	name: Windows (${{ matrix.backend }})
	runs-on: windows-latest
	timeout-minutes: 10
	steps:
	- name: Checkout Code
	uses: actions/checkout@v4
	- name: Setup Environment
	uses: ./.github/actions/setup-tinygrad
	with:
	key: windows-minimal
	deps: testing_unit
	- name: Set env
	shell: bash
	run: printf "${{ matrix.backend == 'llvm' && 'LLVM=1' \|\| matrix.backend == 'cpu' && 'CPU=1'}}" >> $GITHUB_ENV
	- name: Run unit tests
	if: matrix.backend=='llvm'
	run: python -m pytest -n=auto test/unit/ --ignore=test/unit/test_disk_tensor.py --ignore=test/unit/test_elf.py --ignore=test/unit/test_tar.py
	- name: Run pytest (${{ matrix.backend }})
	shell: bash
	run: \|
	python -c "from tinygrad import Device; assert Device.DEFAULT == '${{ matrix.backend }}'.upper(), Device.DEFAULT"
	python -m pytest -n=auto test/test_tiny.py test/test_ops.py --durations=20

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

reorder expand (#9051) #6211

Workflow file

reorder expand (#9051) #6211

Jobs

Run details

Workflow file for this run