diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6d488597..a6d13a85 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -170,4 +170,6 @@ steps: - "Boltz" env: + RETESTITEMS_NWORKERS: 4 + RETESTITEMS_NWORKER_THREADS: 2 SECRET_CODECOV_TOKEN: "wMpDLaAVEHe6EJAc+LZBl4jF3wADVN6F+15vr/ONJHOv/XXbtYovuc1PCQwhz0AzZjWpSO12IDTyKfwVgYvqaGYfQ9yGyplJtSu2MiL2k44B/IY+wEZhsfkBIhXlG89si5A/I+/f8T8QuwxBqBLh8fYq7oxC+gNzKhbj8vIT4n5hCusvYYGufgKRC2U9P4ij0Sf40egQ5B+StaTykqJNq1163UARjNBypHIVDbYE0HUHiF7WB4eI5LxBBzlcHmsUkuGp6ZlqAu/8C83k65lwDnyHDfjvBM24q9GQTDFA5r7RUfYKHElQEBPk3GhoJn7XGIfD2pC0VNcw5jYCwsX2mw==;U2FsdGVkX1+euKMib66zno5Kkw7OxXo6v4RnkAA/HElJM46qfX17VgZ9iVLg45jOOWRgghmyYuy2WQ8RcVbuOg==" diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 9b52f3e8..92a52376 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -38,9 +38,15 @@ jobs: - uses: julia-actions/julia-runtest@v1 env: GROUP: "CPU" + RETESTITEMS_NWORKERS: 4 + RETESTITEMS_NWORKER_THREADS: 2 - uses: julia-actions/julia-processcoverage@v1 with: directories: src,ext - uses: codecov/codecov-action@v4 with: files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true + fail_ci_if_error: true + diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml new file mode 100644 index 00000000..afeac18b --- /dev/null +++ b/.github/workflows/Downgrade.yml @@ -0,0 +1,41 @@ +name: Downgrade +on: + pull_request: + branches: + - main + paths-ignore: + - 'docs/**' + push: + branches: + - master + paths-ignore: + - 'docs/**' +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + version: ['1.9'] + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: ${{ matrix.version }} + - uses: cjdoris/julia-downgrade-compat-action@v1 + with: + skip: Pkg,TOML + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + env: + GROUP: "CPU" + RETESTITEMS_NWORKERS: 4 + RETESTITEMS_NWORKER_THREADS: 2 + - uses: julia-actions/julia-processcoverage@v1 + with: + directories: src,ext + - uses: codecov/codecov-action@v4 + with: + files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true + fail_ci_if_error: true diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml index edd131d1..16223f28 100644 --- a/.github/workflows/Downstream.yml +++ b/.github/workflows/Downstream.yml @@ -54,9 +54,15 @@ jobs: @info "Not compatible with this release. No problem." exception=err exit(0) # Exit immediately, as a success end + env: + RETESTITEMS_NWORKERS: 4 + RETESTITEMS_NWORKER_THREADS: 2 - uses: julia-actions/julia-processcoverage@v1 with: directories: src,ext - uses: codecov/codecov-action@v4 with: - files: lcov.info \ No newline at end of file + files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} + verbose: true + fail_ci_if_error: true \ No newline at end of file diff --git a/test/LocalPreferences.toml b/LocalPreferences.toml similarity index 100% rename from test/LocalPreferences.toml rename to LocalPreferences.toml diff --git a/Project.toml b/Project.toml index 38f0ed20..5797e3bf 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "LuxLib" uuid = "82251201-b29d-42c6-8e01-566dec8acb11" authors = ["Avik Pal and contributors"] -version = "0.3.9" +version = "0.3.10" [deps] ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" @@ -27,22 +27,43 @@ LuxLibReverseDiffExt = "ReverseDiff" LuxLibTrackerExt = "Tracker" [compat] -ChainRulesCore = "1" +Aqua = "0.8" +ChainRulesCore = "1.20" +ComponentArrays = "0.15" ForwardDiff = "0.10" -KernelAbstractions = "0.9" -LuxCUDA = "0.2, 0.3" -Markdown = "1" -NNlib = "0.8, 0.9" -PrecompileTools = "1" -Random = "1" +KernelAbstractions = "0.9.2" +LuxAMDGPU = "0.2" +LuxCUDA = "0.3" +LuxTestUtils = "0.1.15" +Markdown = "1.9" +NNlib = "0.9" +PrecompileTools = "1.2" +Random = "1.9" +ReTestItems = "1" Reexport = "1" ReverseDiff = "1" -Statistics = "1" +StableRNGs = "1" +Statistics = "1.9" +Test = "1.9" Tracker = "0.2" +Zygote = "0.6" julia = "1.9" [extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" +ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" +LuxAMDGPU = "83120cb1-ca15-4f04-bf3b-6967d2e6b60b" LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" -ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" -Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c" +LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823" +Reexport = "189a3867-3050-52da-a836-e630ba90ab69" +StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[targets] +test = ["Aqua", "ChainRulesCore", "ComponentArrays", "ForwardDiff", "LuxAMDGPU", "LuxCUDA", "LuxTestUtils", "Random", "ReTestItems", "Reexport", "StableRNGs", "Statistics", "Test", "Zygote"] diff --git a/ext/LuxLibForwardDiffExt.jl b/ext/LuxLibForwardDiffExt.jl index e6c52330..36818419 100644 --- a/ext/LuxLibForwardDiffExt.jl +++ b/ext/LuxLibForwardDiffExt.jl @@ -5,9 +5,7 @@ import ForwardDiff: Dual import LuxLib: AA # dropout -function LuxLib._dropout_fptype(x::AA{<:Dual}) - return ForwardDiff.valtype(eltype(x)) -end +LuxLib._dropout_fptype(x::AA{<:Dual}) = ForwardDiff.valtype(eltype(x)) # Convolutions: We might want to capture these furthur down in `conv!` # NOTE: In principle we can concatenate all of the partials along the batch dimension @@ -45,10 +43,14 @@ for op in [:conv, :depthwiseconv] y = $(op)(x_, w_, cdims; kwargs...) - dys₁ = ntuple(_ -> similar(x_, Vₓ, NNlib.output_size(cdims)..., - NNlib.channels_out(cdims), size(x, N)), P) - dys₂ = ntuple(_ -> similar(x_, Vₓ, NNlib.output_size(cdims)..., - NNlib.channels_out(cdims), size(x, N)), P) + dys₁ = ntuple( + _ -> similar(x_, Vₓ, NNlib.output_size(cdims)..., + NNlib.channels_out(cdims), size(x, N)), + P) + dys₂ = ntuple( + _ -> similar(x_, Vₓ, NNlib.output_size(cdims)..., + NNlib.channels_out(cdims), size(x, N)), + P) for i in 1:P $(op!)(dys₁[i], ForwardDiff.partials.(x, i), w_, cdims; kwargs...) $(op!)(dys₂[i], x_, ForwardDiff.partials.(w, i), cdims; kwargs...) diff --git a/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl b/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl index 78c347d1..e388950f 100644 --- a/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl +++ b/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl @@ -2,9 +2,8 @@ module LuxLibLuxCUDAExt using LuxCUDA, LuxLib import ChainRulesCore as CRC -import LuxLib: batchnorm, - batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics, - FP_32_64, ∂∅ +import LuxLib: batchnorm, batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics, + FP_32_64, ∂∅ include("batchnorm.jl") diff --git a/ext/LuxLibLuxCUDAExt/batchnorm.jl b/ext/LuxLibLuxCUDAExt/batchnorm.jl index dd4c68c2..14e9de58 100644 --- a/ext/LuxLibLuxCUDAExt/batchnorm.jl +++ b/ext/LuxLibLuxCUDAExt/batchnorm.jl @@ -1,8 +1,9 @@ using LuxCUDA using .cuDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward, - cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL, - cudnnBatchNormalizationForwardTraining, cudnnTensorDescriptor, CUDNN_TENSOR_NCHW, - cudnnDataType, dim4, scalingParameter, handle + cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL, + cudnnBatchNormalizationForwardTraining, cudnnTensorDescriptor, + CUDNN_TENSOR_NCHW, + cudnnDataType, dim4, scalingParameter, handle import LuxLib: FP_32_64 # NOTE: This can be upstreamed to LuxCUDA once we drop support for v1.6 @@ -169,7 +170,8 @@ function cudnnBNBackward!(∂g::DenseCuArray{T}, g::DenseCuArray{T}, ∂b::Dense xd = cudnnTensorDescriptor(x) ∂yd = cudnnTensorDescriptor(∂y) ∂xd = cudnnTensorDescriptor(∂x) - gd = cudnnTensorDescriptor(CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(_wsize(x))), + gd = cudnnTensorDescriptor( + CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(_wsize(x))), dim4(_wsize(x), Val(CUDNN_TENSOR_NCHW))) xmean = xmean === nothing ? CU_NULL : xmean diff --git a/ext/LuxLibLuxCUDATrackerExt.jl b/ext/LuxLibLuxCUDATrackerExt.jl index 06f45a8a..782f0c08 100644 --- a/ext/LuxLibLuxCUDATrackerExt.jl +++ b/ext/LuxLibLuxCUDATrackerExt.jl @@ -2,9 +2,9 @@ module LuxLibLuxCUDATrackerExt using LuxCUDA, LuxLib, Tracker import Tracker: @grad, - data, nobacksies, track, TrackedArray, TrackedVector, TrackedReal + data, nobacksies, track, TrackedArray, TrackedVector, TrackedReal import LuxLib: AA, AV, batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics, - FP_32_64, ∂∅, __is_tracked + FP_32_64, ∂∅, __is_tracked # api/batchnorm.jl const TR_CUDNN_BN_ARRAY_TYPE = Union{TrackedArray{<:Any, <:Any, <:CuArray{<:FP_32_64, 2}}, diff --git a/ext/LuxLibReverseDiffExt.jl b/ext/LuxLibReverseDiffExt.jl index 129282cd..d9ae9088 100644 --- a/ext/LuxLibReverseDiffExt.jl +++ b/ext/LuxLibReverseDiffExt.jl @@ -3,8 +3,8 @@ module LuxLibReverseDiffExt using ChainRulesCore, LuxLib, ReverseDiff import ChainRulesCore as CRC import LuxLib: AA, __is_tracked -import ReverseDiff: TrackedArray, - TrackedReal, decrement_deriv!, increment_deriv!, value, @grad_from_chainrules +import ReverseDiff: TrackedArray, TrackedReal, decrement_deriv!, increment_deriv!, value, + @grad_from_chainrules # Patches: Needs upstreaming @inline function increment_deriv!(t::Union{TrackedArray, TrackedReal}, ::NoTangent, i) diff --git a/src/LuxLib.jl b/src/LuxLib.jl index 799f4ed3..b4068fdf 100644 --- a/src/LuxLib.jl +++ b/src/LuxLib.jl @@ -23,7 +23,7 @@ include("api/groupnorm.jl") include("api/instancenorm.jl") include("api/layernorm.jl") -export batchnorm, groupnorm, instancenorm, layernorm -export alpha_dropout, dropout +export batchnorm, groupnorm, instancenorm, layernorm, + alpha_dropout, dropout end diff --git a/src/impl/groupnorm.jl b/src/impl/groupnorm.jl index facbf38d..fcf96c15 100644 --- a/src/impl/groupnorm.jl +++ b/src/impl/groupnorm.jl @@ -1,7 +1,7 @@ # Low-Level Kernels ## Original Implementation: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu -@kernel function _compute_fused_params_kernel!(scale, bias, @Const(C), @Const(K), @Const(μ), - @Const(σ⁻¹), @Const(γ), @Const(β)) +@kernel function _compute_fused_params_kernel!(scale, bias, @Const(C), @Const(K), + @Const(μ), @Const(σ⁻¹), @Const(γ), @Const(β)) idx = @index(Global) ng = _div_idx(idx, K) c = _mod_idx(idx, C) @@ -27,8 +27,8 @@ end @inbounds dY_dscale[idx] = γ[c] * σ⁻¹[ng] end -@kernel function _groupnorm_xscale_and_bias_kernel!(X_scale, bias, @Const(alpha), @Const(μ), - @Const(σ⁻¹), @Const(ds_sum), @Const(db_sum)) +@kernel function _groupnorm_xscale_and_bias_kernel!(X_scale, bias, @Const(alpha), + @Const(μ), @Const(σ⁻¹), @Const(ds_sum), @Const(db_sum)) idx = @index(Global) @inbounds x = (db_sum[idx] * μ[idx] - ds_sum[idx]) * (σ⁻¹[idx]^3) * alpha @inbounds X_scale[idx] = x diff --git a/test/Project.toml b/test/Project.toml deleted file mode 100644 index 892c199a..00000000 --- a/test/Project.toml +++ /dev/null @@ -1,18 +0,0 @@ -[deps] -Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" -ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" -ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66" -ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" -JET = "c3a54625-cd67-489e-a8e7-0a5a0ff4e31b" -LuxAMDGPU = "83120cb1-ca15-4f04-bf3b-6967d2e6b60b" -LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda" -LuxLib = "82251201-b29d-42c6-8e01-566dec8acb11" -LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531" -Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" -SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" -StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3" -Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" -TestSetExtensions = "98d24dd4-01ad-11ea-1b02-c9a08f80db04" -Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" diff --git a/test/api/batchnorm.jl b/test/api/batchnorm.jl deleted file mode 100644 index cc739f69..00000000 --- a/test/api/batchnorm.jl +++ /dev/null @@ -1,56 +0,0 @@ -using LuxLib, Test - -include("../test_utils.jl") - -rng = get_stable_rng(12345) - -function _setup_batchnorm(aType, T, sz; affine::Bool=true, track_stats::Bool) - x = randn(T, sz) |> aType - scale = affine ? aType(randn(T, sz[end - 1])) : nothing - bias = affine ? aType(randn(T, sz[end - 1])) : nothing - - if track_stats - running_mean = randn(T, sz[end - 1]) |> aType - running_var = abs2.(randn(T, sz[end - 1])) |> aType - return x, scale, bias, running_mean, running_var - else - return x, scale, bias, nothing, nothing - end -end - -@testset "$mode: Batch Normalization" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - sz in ((4, 4, 6, 2), (8, 2), (4, 4, 4, 3, 2)), - training in (Val(true), Val(false)), - affine in (true, false), - track_stats in (true, false) - - T === Float16 && mode == "AMDGPU" && continue - - _f = (args...) -> batchnorm(args...; epsilon, training, momentum=T(0.9)) - - epsilon = T(1e-5) - x, scale, bias, rm, rv = _setup_batchnorm(aType, T, sz; track_stats, affine) - - y, nt = batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9)) - - @inferred batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9)) - - @jet _f(x, scale, bias, rm, rv) - - @test y isa aType{T, length(sz)} - @test size(y) == sz - - if rm !== nothing - @test size(nt.running_mean) == (size(x, length(sz) - 1),) - @test size(nt.running_var) == (size(x, length(sz) - 1),) - end - - if __istraining(training) && affine - fp16 = T == Float16 - __f = (args...) -> sum(first(batchnorm(x, args..., rm, rv; epsilon, - training, momentum=T(0.9)))) - @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 - end - end -end diff --git a/test/api/batchnorm_tests.jl b/test/api/batchnorm_tests.jl new file mode 100644 index 00000000..581e1a59 --- /dev/null +++ b/test/api/batchnorm_tests.jl @@ -0,0 +1,54 @@ +@testitem "Batch Normalization" setup=[SharedTestSetup] begin + rng = get_stable_rng(12345) + + function _setup_batchnorm(aType, T, sz; affine::Bool=true, track_stats::Bool) + x = randn(T, sz) |> aType + scale = affine ? aType(randn(T, sz[end - 1])) : nothing + bias = affine ? aType(randn(T, sz[end - 1])) : nothing + + if track_stats + running_mean = randn(T, sz[end - 1]) |> aType + running_var = abs2.(randn(T, sz[end - 1])) |> aType + return x, scale, bias, running_mean, running_var + else + return x, scale, bias, nothing, nothing + end + end + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + sz in ((4, 4, 6, 2), (8, 2), (4, 4, 4, 3, 2)), + training in (Val(true), Val(false)), + affine in (true, false), + track_stats in (true, false) + + T === Float16 && mode == "AMDGPU" && continue + + _f = (args...) -> batchnorm(args...; epsilon, training, momentum=T(0.9)) + + epsilon = T(1e-5) + x, scale, bias, rm, rv = _setup_batchnorm(aType, T, sz; track_stats, affine) + + y, nt = batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9)) + + @inferred batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9)) + + @jet _f(x, scale, bias, rm, rv) + + @test y isa aType{T, length(sz)} + @test size(y) == sz + + if rm !== nothing + @test size(nt.running_mean) == (size(x, length(sz) - 1),) + @test size(nt.running_var) == (size(x, length(sz) - 1),) + end + + if __istraining(training) && affine + fp16 = T == Float16 + __f = (args...) -> sum(first(batchnorm(x, args..., rm, rv; epsilon, + training, momentum=T(0.9)))) + @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 + end + end + end +end diff --git a/test/api/dropout.jl b/test/api/dropout.jl deleted file mode 100644 index 34bba846..00000000 --- a/test/api/dropout.jl +++ /dev/null @@ -1,156 +0,0 @@ -using Statistics, Test, LuxLib - -include("../test_utils.jl") - -rng = get_stable_rng(12345) - -@testset "$mode: Dropout" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) - - T === Float16 && mode == "AMDGPU" && continue - - x = randn(rng, T, x_shape) |> aType - - @inferred dropout(rng, x, T(0.5), Val(true); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, T(0.5), Val(true); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test mask_ isa aType{T, length(x_shape)} - @test size(mask_) == x_shape - @test rng != rng_ - - __f = x -> sum(first(dropout(rng, x, T(0.5), Val(true); dims=Colon()))) - - fp16 = T == Float16 - @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu - @jet sum(first(dropout(rng, x, T(0.5), Val(true); dims=Colon()))) - - @inferred dropout(rng, x, T(0.5), Val(true); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, T(0.5), Val(false); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test rng == rng_ - @test y == x - end -end - -@testset "$mode: Dropout with Preset Mask" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) - - T === Float16 && mode == "AMDGPU" && continue - - x = randn(rng, T, x_shape) |> aType - mask = rand(T, x_shape) |> aType - - # Update mask - @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test mask_ isa aType{T, length(x_shape)} - @test size(mask_) == x_shape - @test rng != rng_ - @test mask != mask_ - - __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(true); - dims=Colon()))) - - fp16 = T == Float16 - @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu - @jet sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()))) - - # Try using mask if possible (possible!!) - @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test mask_ isa aType{T, length(x_shape)} - @test size(mask_) == x_shape - @test rng == rng_ - @test mask == mask_ - - __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); - dims=Colon()))) - - fp16 = T == Float16 - @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu - @jet sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()))) - - mask = rand(T, (x_shape[1:(end - 1)]..., 13)) |> aType - - # Try using mask if possible (not possible!!) - @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test mask_ isa aType{T, length(x_shape)} - @test size(mask_) == x_shape - @test rng != rng_ - @test mask != mask_ - - __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); - dims=Colon()))) - - fp16 = T == Float16 - @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu - @jet sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()))) - - # Testing Mode - @inferred dropout(rng, x, mask, T(0.5), Val(false), Val(false); dims=Colon()) - - y, mask_, rng_ = dropout(rng, x, mask, T(0.5), Val(false), Val(false); dims=Colon()) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test mask_ isa aType{T, length(x_shape)} - @test mask_ == mask - @test rng == rng_ - end -end - -@testset "$mode: Alpha Dropout" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) - - T === Float16 && mode == "AMDGPU" && continue - - x = randn(rng, T, x_shape) |> aType - - @inferred alpha_dropout(rng, x, T(0.5), Val(true)) - - y, rng_ = alpha_dropout(rng, x, T(0.5), Val(true)) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test rng != rng_ - - @test_broken isapprox(std(y), std(x); atol=1.0f-2, rtol=1.0f-2) - - __f = x -> sum(first(alpha_dropout(rng, x, T(0.5), Val(true)))) - - fp16 = T == Float16 - @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu - @jet sum(first(alpha_dropout(rng, x, T(0.5), Val(true)))) - - @inferred alpha_dropout(rng, x, T(0.5), Val(false)) - - y, rng_ = alpha_dropout(rng, x, T(0.5), Val(false)) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - @test rng == rng_ - @test y == x - end -end diff --git a/test/api/dropout_tests.jl b/test/api/dropout_tests.jl new file mode 100644 index 00000000..816156b8 --- /dev/null +++ b/test/api/dropout_tests.jl @@ -0,0 +1,171 @@ +@testitem "Dropout" setup=[SharedTestSetup] begin + using Statistics + + rng = get_stable_rng(12345) + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) + + T === Float16 && mode == "AMDGPU" && continue + + x = randn(rng, T, x_shape) |> aType + + @inferred dropout(rng, x, T(0.5), Val(true); dims=Colon()) + + y, mask_, rng_ = dropout(rng, x, T(0.5), Val(true); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test mask_ isa aType{T, length(x_shape)} + @test size(mask_) == x_shape + @test rng != rng_ + + __f = x -> sum(first(dropout(rng, x, T(0.5), Val(true); dims=Colon()))) + + fp16 = T == Float16 + @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu + @jet sum(first(dropout(rng, x, T(0.5), Val(true); dims=Colon()))) + + @inferred dropout(rng, x, T(0.5), Val(true); dims=Colon()) + + y, mask_, rng_ = dropout(rng, x, T(0.5), Val(false); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test rng == rng_ + @test y == x + end + end +end + +@testitem "Dropout with Preset Mask" setup=[SharedTestSetup] begin + using Statistics + + rng = get_stable_rng(12345) + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) + + T === Float16 && mode == "AMDGPU" && continue + + x = randn(rng, T, x_shape) |> aType + mask = rand(T, x_shape) |> aType + + # Update mask + @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()) + + y, mask_, rng_ = dropout( + rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test mask_ isa aType{T, length(x_shape)} + @test size(mask_) == x_shape + @test rng != rng_ + @test mask != mask_ + + __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(true); + dims=Colon()))) + + fp16 = T == Float16 + @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu + @jet sum(first(dropout( + rng, x, mask, T(0.5), Val(true), Val(true); dims=Colon()))) + + # Try using mask if possible (possible!!) + @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) + + y, mask_, rng_ = dropout( + rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test mask_ isa aType{T, length(x_shape)} + @test size(mask_) == x_shape + @test rng == rng_ + @test mask == mask_ + + __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); + dims=Colon()))) + fp16 = T == Float16 + @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu + @jet sum(first(dropout( + rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()))) + mask = rand(T, (x_shape[1:(end - 1)]..., 13)) |> aType + + # Try using mask if possible (not possible!!) + @inferred dropout(rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) + + y, mask_, rng_ = dropout( + rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test mask_ isa aType{T, length(x_shape)} + @test size(mask_) == x_shape + @test rng != rng_ + @test mask != mask_ + + __f = x -> sum(first(dropout(rng, x, mask, T(0.5), Val(true), Val(false); + dims=Colon()))) + fp16 = T == Float16 + @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu + @jet sum(first(dropout( + rng, x, mask, T(0.5), Val(true), Val(false); dims=Colon()))) + # Testing Mode + @inferred dropout(rng, x, mask, T(0.5), Val(false), Val(false); dims=Colon()) + + y, mask_, rng_ = dropout( + rng, x, mask, T(0.5), Val(false), Val(false); dims=Colon()) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test mask_ isa aType{T, length(x_shape)} + @test mask_ == mask + @test rng == rng_ + end + end +end + +@testitem "Alpha Dropout" setup=[SharedTestSetup] begin + using Statistics + + rng = get_stable_rng(12345) + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + x_shape in ((2, 3), (2, 2, 3), (2, 2, 3, 1), (2, 2, 1, 3, 1)) + + T === Float16 && mode == "AMDGPU" && continue + + x = randn(rng, T, x_shape) |> aType + + @inferred alpha_dropout(rng, x, T(0.5), Val(true)) + + y, rng_ = alpha_dropout(rng, x, T(0.5), Val(true)) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test rng != rng_ + + @test_broken isapprox(std(y), std(x); atol=1.0f-2, rtol=1.0f-2) + + __f = x -> sum(first(alpha_dropout(rng, x, T(0.5), Val(true)))) + + fp16 = T == Float16 + @eval @test_gradients $__f $x atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 gpu_testing=$on_gpu + @jet sum(first(alpha_dropout(rng, x, T(0.5), Val(true)))) + + @inferred alpha_dropout(rng, x, T(0.5), Val(false)) + + y, rng_ = alpha_dropout(rng, x, T(0.5), Val(false)) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + @test rng == rng_ + @test y == x + end + end +end diff --git a/test/api/groupnorm.jl b/test/api/groupnorm.jl deleted file mode 100644 index 55931fe8..00000000 --- a/test/api/groupnorm.jl +++ /dev/null @@ -1,89 +0,0 @@ -using LuxLib, Test - -include("../test_utils.jl") - -function _setup_groupnorm(aType, T, sz, groups) - x = randn(T, sz) |> aType - scale = randn(T, sz[end - 1]) |> aType - bias = randn(T, sz[end - 1]) |> aType - return x, scale, bias -end - -function _groupnorm_generic_fallback(x, scale, bias, epsilon, groups) - sz = size(x) - N = ndims(x) - x_reshaped = reshape(x, sz[1:(N - 2)]..., sz[N - 1] ÷ groups, groups, sz[N]) - x_, xmean, xvar = LuxLib._normalization(x_reshaped, nothing, nothing, scale, bias, - Val(Tuple(collect(1:(N - 1)))), Val(false), nothing, epsilon) - - return reshape(x_, sz) -end - -@testset "$mode: GroupNorm KernelAbstractions" for (mode, aType, on_gpu) in MODES - @testset "eltype $T, size $sz, ngroups $groups" for T in (Float32, - Float64), sz in ((16, 16, 6, 4), (32, 32, 6, 4), (64, 64, 12, 4)), - groups in (2, 3) - - T === Float16 && mode == "AMDGPU" && continue - - _f = (args...) -> groupnorm(args...; groups, epsilon) - - epsilon = T(1e-5) - x, scale, bias = _setup_groupnorm(aType, T, sz, groups) - - y = _f(x, scale, bias) - - gs_x, gs_scale, gs_bias = Zygote.gradient(sum ∘ _f, x, scale, bias) - - @inferred groupnorm(x, scale, bias; groups, epsilon) - - # @jet _f(x, scale, bias) # test_call throws exception - LuxTestUtils.JET.@test_opt target_modules=(LuxLib,) _f(x, scale, bias) - - @test y isa aType{T, length(sz)} - @test size(y) == sz - - # Use the generic implementation to compare against - __f = (args...) -> _groupnorm_generic_fallback(args..., epsilon, groups) - - y_ = __f(x, scale, bias) - - gs_x_, gs_scale_, gs_bias_ = Zygote.gradient(sum ∘ __f, x, scale, bias) - - # The KA implementation reorders operations manually for maximal - # performance. Hence equality cannot be guaranteed. - @test check_approx(y, y_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_x, gs_x_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_scale, gs_scale_; atol=1.0f-3, rtol=1.0f-3) - @test check_approx(gs_bias, gs_bias_; atol=1.0f-3, rtol=1.0f-3) - - fp16 = T == Float16 - __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon)) - @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-3 rtol=1.0f-3 soft_fail=$fp16 - end -end - -@testset "$mode: GroupNorm Generic Fallback" for (mode, aType, on_gpu) in MODES - @testset "eltype $T, size $sz, ngroups $groups" for T in (Float16, - Float32, Float64), sz in ((4, 6, 2), (8, 8, 8, 6, 2), (3, 16, 16, 12, 2)), - groups in (2, 3) - - T === Float16 && mode == "AMDGPU" && continue - - _f = (args...) -> groupnorm(args...; groups, epsilon) - - epsilon = T(1e-5) - x, scale, bias = _setup_groupnorm(aType, T, sz, groups) - y = _f(x, scale, bias) - - @inferred groupnorm(x, scale, bias; groups, epsilon) - @jet _f(x, scale, bias) - - @test y isa aType{T, length(sz)} - @test size(y) == sz - - fp16 = T == Float16 - __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon)) - @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 - end -end diff --git a/test/api/groupnorm_tests.jl b/test/api/groupnorm_tests.jl new file mode 100644 index 00000000..64fdc2fe --- /dev/null +++ b/test/api/groupnorm_tests.jl @@ -0,0 +1,95 @@ +@testsetup module GroupNormSetup +using LuxLib + +function _setup_groupnorm(aType, T, sz, groups) + x = randn(T, sz) |> aType + scale = randn(T, sz[end - 1]) |> aType + bias = randn(T, sz[end - 1]) |> aType + return x, scale, bias +end + +function _groupnorm_generic_fallback(x, scale, bias, epsilon, groups) + sz = size(x) + N = ndims(x) + x_reshaped = reshape(x, sz[1:(N - 2)]..., sz[N - 1] ÷ groups, groups, sz[N]) + x_, xmean, xvar = LuxLib._normalization(x_reshaped, nothing, nothing, scale, bias, + Val(Tuple(collect(1:(N - 1)))), Val(false), nothing, epsilon) + + return reshape(x_, sz) +end + +export _setup_groupnorm, _groupnorm_generic_fallback +end + +@testitem "Group Normalization KernelAbstractions" setup=[SharedTestSetup, GroupNormSetup] begin + @testset "$mode" for (mode, aType, on_gpu) in MODES + @testset "eltype $T, size $sz, ngroups $groups" for T in (Float32, Float64), + sz in ((16, 16, 6, 4), (32, 32, 6, 4), (64, 64, 12, 4)), + groups in (2, 3) + + T === Float16 && mode == "AMDGPU" && continue + + _f = (args...) -> groupnorm(args...; groups, epsilon) + + epsilon = T(1e-5) + x, scale, bias = _setup_groupnorm(aType, T, sz, groups) + + y = _f(x, scale, bias) + + gs_x, gs_scale, gs_bias = Zygote.gradient(sum ∘ _f, x, scale, bias) + + @inferred groupnorm(x, scale, bias; groups, epsilon) + + # @jet _f(x, scale, bias) # test_call throws exception + LuxTestUtils.JET.@test_opt target_modules=(LuxLib,) _f(x, scale, bias) + + @test y isa aType{T, length(sz)} + @test size(y) == sz + + # Use the generic implementation to compare against + __f = (args...) -> _groupnorm_generic_fallback(args..., epsilon, groups) + + y_ = __f(x, scale, bias) + + gs_x_, gs_scale_, gs_bias_ = Zygote.gradient(sum ∘ __f, x, scale, bias) + + # The KA implementation reorders operations manually for maximal + # performance. Hence equality cannot be guaranteed. + @test check_approx(y, y_; atol=1.0f-3, rtol=1.0f-3) + @test check_approx(gs_x, gs_x_; atol=1.0f-3, rtol=1.0f-3) + @test check_approx(gs_scale, gs_scale_; atol=1.0f-3, rtol=1.0f-3) + @test check_approx(gs_bias, gs_bias_; atol=1.0f-3, rtol=1.0f-3) + + fp16 = T == Float16 + __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon)) + @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-3 rtol=1.0f-3 soft_fail=$fp16 + end + end +end + +@testitem "Group Normalization Generic Fallback" setup=[SharedTestSetup, GroupNormSetup] begin + @testset "$mode" for (mode, aType, on_gpu) in MODES + @testset "eltype $T, size $sz, ngroups $groups" for T in (Float16, + Float32, Float64), sz in ((4, 6, 2), (8, 8, 8, 6, 2), (3, 16, 16, 12, 2)), + groups in (2, 3) + + T === Float16 && mode == "AMDGPU" && continue + + _f = (args...) -> groupnorm(args...; groups, epsilon) + + epsilon = T(1e-5) + x, scale, bias = _setup_groupnorm(aType, T, sz, groups) + y = _f(x, scale, bias) + + @inferred groupnorm(x, scale, bias; groups, epsilon) + @jet _f(x, scale, bias) + + @test y isa aType{T, length(sz)} + @test size(y) == sz + + fp16 = T == Float16 + __f = (args...) -> sum(groupnorm(x, args...; groups, epsilon)) + @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu atol=1.0f-2 rtol=1.0f-2 soft_fail=$fp16 + end + end +end diff --git a/test/api/instancenorm.jl b/test/api/instancenorm.jl deleted file mode 100644 index e318a095..00000000 --- a/test/api/instancenorm.jl +++ /dev/null @@ -1,45 +0,0 @@ -using LuxLib, Statistics, Test - -include("../test_utils.jl") - -rng = get_stable_rng(12345) - -function _setup_instancenorm(aType, T, sz; affine::Bool=true) - x = randn(T, sz) |> aType - scale = affine ? aType(ones(T, sz[end - 1])) : nothing - bias = affine ? aType(zeros(T, sz[end - 1])) : nothing - return x, scale, bias -end - -@testset "$mode: Instance Norm" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - sz in ((4, 4, 6, 2), (3, 4, 2), (4, 4, 4, 3, 2)), - training in (Val(true), Val(false)), - affine in (true, false) - - T === Float16 && mode == "AMDGPU" && continue - - _f = (args...) -> instancenorm(args...; epsilon, training) - - epsilon = T(1e-5) - x, scale, bias = _setup_instancenorm(aType, T, sz; affine) - - y, nt = instancenorm(x, scale, bias; epsilon, training) - - @inferred instancenorm(x, scale, bias; epsilon, training) - @jet _f(x, scale, bias) - @test y isa aType{T, length(sz)} - @test size(y) == sz - - _target_std = ones(ntuple(_ -> 1, length(sz) - 2)..., size(x)[(end - 1):end]...) - @eval @test check_approx(std(Array($y); dims=1:($(length(sz) - 2))), - $_target_std; atol=0.2, rtol=0.2) - @test std(y; dims=1:(length(sz) - 2)) != std(x; dims=1:(length(sz) - 2)) - - if __istraining(training) && affine - fp16 = T == Float16 - __f = (args...) -> sum(first(instancenorm(x, args...; epsilon, training))) - @eval @test_gradients $__f $scale $bias soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 gpu_testing=$on_gpu - end - end -end diff --git a/test/api/instancenorm_tests.jl b/test/api/instancenorm_tests.jl new file mode 100644 index 00000000..8c8ea1e5 --- /dev/null +++ b/test/api/instancenorm_tests.jl @@ -0,0 +1,45 @@ +@testitem "Instance Normalization" setup=[SharedTestSetup] begin + using Statistics + + rng = get_stable_rng(12345) + + function _setup_instancenorm(aType, T, sz; affine::Bool=true) + x = randn(T, sz) |> aType + scale = affine ? aType(ones(T, sz[end - 1])) : nothing + bias = affine ? aType(zeros(T, sz[end - 1])) : nothing + return x, scale, bias + end + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + sz in ((4, 4, 6, 2), (3, 4, 2), (4, 4, 4, 3, 2)), + training in (Val(true), Val(false)), + affine in (true, false) + + T === Float16 && mode == "AMDGPU" && continue + + _f = (args...) -> instancenorm(args...; epsilon, training) + + epsilon = T(1e-5) + x, scale, bias = _setup_instancenorm(aType, T, sz; affine) + + y, nt = instancenorm(x, scale, bias; epsilon, training) + + @inferred instancenorm(x, scale, bias; epsilon, training) + @jet _f(x, scale, bias) + @test y isa aType{T, length(sz)} + @test size(y) == sz + + _target_std = ones(ntuple(_ -> 1, length(sz) - 2)..., size(x)[(end - 1):end]...) + @eval @test check_approx(std(Array($y); dims=1:($(length(sz) - 2))), + $_target_std; atol=0.2, rtol=0.2) + @test std(y; dims=1:(length(sz) - 2)) != std(x; dims=1:(length(sz) - 2)) + + if __istraining(training) && affine + fp16 = T == Float16 + __f = (args...) -> sum(first(instancenorm(x, args...; epsilon, training))) + @eval @test_gradients $__f $scale $bias soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 gpu_testing=$on_gpu + end + end + end +end diff --git a/test/api/layernorm.jl b/test/api/layernorm.jl deleted file mode 100644 index 1e4282e6..00000000 --- a/test/api/layernorm.jl +++ /dev/null @@ -1,48 +0,0 @@ -using LuxLib, Statistics, Test - -include("../test_utils.jl") - -function _setup_layernorm(aType, T, x_size, affine_shape) - x = randn(T, x_size) |> aType - if affine_shape !== nothing - scale = randn(T, affine_shape..., 1) |> aType - bias = randn(T, affine_shape..., 1) |> aType - return x, scale, bias - else - return x, nothing, nothing - end -end - -@testset "$mode: LayerNorm" for (mode, aType, on_gpu) in MODES - for T in (Float16, Float32, Float64), - x_shape in ((3, 3, 2, 1), (2, 2, 2, 1), (2, 3, 2, 2)), - affine_shape in (nothing, x_shape[1:3], (1, 1, 1), (1, 1, x_shape[3])) - - T === Float16 && mode == "AMDGPU" && continue - - dims = Colon() - epsilon = T(1e-5) - _f = (args...) -> layernorm(args...; dims, epsilon) - - x, scale, bias = _setup_layernorm(aType, T, x_shape, affine_shape) - - @inferred _f(x, scale, bias) - @jet _f(x, scale, bias) - - y = _f(x, scale, bias) - - @test y isa aType{T, length(x_shape)} - @test size(y) == x_shape - - if affine_shape === nothing - @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3) - @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1) - end - - fp16 = T == Float16 - if affine_shape !== nothing - __f = (args...) -> sum(_f(x, args...)) - @eval @test_gradients $__f $scale $bias soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 gpu_testing=$on_gpu - end - end -end diff --git a/test/api/layernorm_tests.jl b/test/api/layernorm_tests.jl new file mode 100644 index 00000000..4cd2d9d4 --- /dev/null +++ b/test/api/layernorm_tests.jl @@ -0,0 +1,48 @@ +@testitem "Layer Normalization" setup=[SharedTestSetup] begin + using Statistics + + function _setup_layernorm(aType, T, x_size, affine_shape) + x = randn(T, x_size) |> aType + if affine_shape !== nothing + scale = randn(T, affine_shape..., 1) |> aType + bias = randn(T, affine_shape..., 1) |> aType + return x, scale, bias + else + return x, nothing, nothing + end + end + + @testset "$mode" for (mode, aType, on_gpu) in MODES + for T in (Float16, Float32, Float64), + x_shape in ((3, 3, 2, 1), (2, 2, 2, 1), (2, 3, 2, 2)), + affine_shape in (nothing, x_shape[1:3], (1, 1, 1), (1, 1, x_shape[3])) + + T === Float16 && mode == "AMDGPU" && continue + + dims = Colon() + epsilon = T(1e-5) + _f = (args...) -> layernorm(args...; dims, epsilon) + + x, scale, bias = _setup_layernorm(aType, T, x_shape, affine_shape) + + @inferred _f(x, scale, bias) + @jet _f(x, scale, bias) + + y = _f(x, scale, bias) + + @test y isa aType{T, length(x_shape)} + @test size(y) == x_shape + + if affine_shape === nothing + @test check_approx(mean(y; dims), 0; atol=1e-3, rtol=1e-3) + @test check_approx(std(y; dims), 1; atol=1e-1, rtol=1e-1) + end + + fp16 = T == Float16 + if affine_shape !== nothing + __f = (args...) -> sum(_f(x, args...)) + @eval @test_gradients $__f $scale $bias soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2 gpu_testing=$on_gpu + end + end + end +end diff --git a/test/aqua.jl b/test/aqua.jl deleted file mode 100644 index efe7d1e8..00000000 --- a/test/aqua.jl +++ /dev/null @@ -1,10 +0,0 @@ -using Aqua, ChainRulesCore, LuxLib, Test - -@testset "All Tests (except Ambiguity)" begin - Aqua.test_all(LuxLib; ambiguities=false) -end - -@testset "Ambiguity Tests" begin - # The exclusions are due to CRC.@nondifferentiable - Aqua.test_ambiguities(LuxLib; exclude=[ChainRulesCore.frule, Core.kwcall]) -end diff --git a/test/aqua_tests.jl b/test/aqua_tests.jl new file mode 100644 index 00000000..f339224a --- /dev/null +++ b/test/aqua_tests.jl @@ -0,0 +1,4 @@ +@testitem "Aqua: Quality Assurance" begin + using Aqua + Aqua.test_all(LuxLib) +end diff --git a/test/ext/LuxLibForwardDiffExt.jl b/test/ext/LuxLibForwardDiffExt.jl deleted file mode 100644 index a76e29be..00000000 --- a/test/ext/LuxLibForwardDiffExt.jl +++ /dev/null @@ -1,17 +0,0 @@ -using LuxLib, ForwardDiff, Test - -include("../test_utils.jl") - -rng = get_stable_rng(12345) - -@testset "$mode: dropout" for (mode, aType, on_gpu) in MODES - x = randn(rng, Float32, 10, 2) |> aType - x_dual = ForwardDiff.Dual.(x) - - @test_nowarn dropout(rng, x_dual, 0.5f0, Val(true); dims=:) - - x_dropout = dropout(rng, x, 0.5f0, Val(true); dims=:)[1] - x_dual_dropout = ForwardDiff.value.(dropout(rng, x_dual, 0.5f0, Val(true); dims=:)[1]) - - @test check_approx(x_dropout, x_dual_dropout) -end diff --git a/test/forwarddiff_tests.jl b/test/forwarddiff_tests.jl new file mode 100644 index 00000000..63139883 --- /dev/null +++ b/test/forwarddiff_tests.jl @@ -0,0 +1,95 @@ +@testitem "Efficient JVPs" setup=[SharedTestSetup] begin + using ForwardDiff, Zygote, ComponentArrays + + struct LuxLibTestTag end + + # Computes (∂f/∂x)u + function jvp_forwarddiff(f, x, u) + uu = reshape(u, axes(x)) + y = ForwardDiff.Dual{ + typeof(ForwardDiff.Tag(LuxLibTestTag(), eltype(x))), eltype(x), + 1}.(x, ForwardDiff.Partials.(tuple.(uu))) + return vec(ForwardDiff.partials.(vec(f(y)), 1)) + end + + function jvp_forwarddiff(f, x::ComponentArray, u) + xx = getdata(x) + uu = vec(u) + y = ComponentArray( + ForwardDiff.Dual{ + typeof(ForwardDiff.Tag(LuxLibTestTag(), eltype(x))), + eltype(x), 1}.(xx, ForwardDiff.Partials.(tuple.(uu))), + getaxes(x)) + return vec(ForwardDiff.partials.(vec(f(y)), 1)) + end + + ## This exists exclusively for testing. It has horrifying performance implications + function jvp_forwarddiff_concrete(f, x, u) + Jₓ = ForwardDiff.jacobian(f, x) + return Jₓ * vec(u) + end + + function jvp_zygote(f, x, u) + Jₓ = only(Zygote.jacobian(f, x)) + return Jₓ * vec(u) + end + + function test_jvp_computation(f, x, u, on_gpu) + jvp₁ = jvp_forwarddiff(f, x, u) + if !(x isa ComponentArray && on_gpu) + # ComponentArray + ForwardDiff on GPU don't play nice + jvp₂ = jvp_forwarddiff_concrete(f, x, u) + @test check_approx(jvp₁, jvp₂; atol=1e-5, rtol=1e-5) + + jvp₃ = jvp_zygote(f, x, u) + @test check_approx(jvp₁, jvp₃; atol=1e-5, rtol=1e-5) + end + end + + @testset "$(mode): Jacobian Vector Products" for (mode, aType, on_gpu) in MODES + @testset "$(op)(; flipped = $flipped)" for flipped in (true, false), + op in (depthwiseconv, conv) + + op === depthwiseconv && on_gpu && continue + + input_dims = [(2, 4, 2, 1, 3), (4, 4, 1, 3), (4, 4, 3, 2), (4, 1, 3), (4, 3, 2)] + weight_dims = if op === conv + [(2, 2, 2, 1, 4), (3, 3, 1, 4), (3, 3, 3, 2), (3, 1, 4), (3, 3, 2)] + else + [(2, 2, 2, 1, 1), (3, 3, 1, 1), (3, 3, 3, 3), (3, 1, 1), (3, 3, 3)] + end + + @testset "Input Dims: $(in_dims) | Weight Dims: $(w_dims)" for (in_dims, w_dims) in zip( + input_dims, weight_dims) + x = randn(Float32, in_dims...) |> aType + w = randn(Float32, w_dims...) |> aType + ux = randn(Float32, size(x)...) |> aType + uw = randn(Float32, size(w)...) |> aType + u = randn(Float32, length(x) + length(w)) |> aType + + test_jvp_computation(x -> op(x, w; flipped), x, ux, on_gpu) + test_jvp_computation(w -> op(x, w; flipped), w, uw, on_gpu) + test_jvp_computation(xw -> op(xw.x, xw.w; flipped), ComponentArray(; x, w), + u, on_gpu) + end + end + end +end + +@testitem "ForwardDiff dropout" setup=[SharedTestSetup] begin + using ForwardDiff + + rng = get_stable_rng(12345) + + @testset "$mode: dropout" for (mode, aType, on_gpu) in MODES + x = randn(rng, Float32, 10, 2) |> aType + x_dual = ForwardDiff.Dual.(x) + + @test_nowarn dropout(rng, x_dual, 0.5f0, Val(true); dims=:) + + x_dropout = dropout(rng, x, 0.5f0, Val(true); dims=:)[1] + x_dual_dropout = ForwardDiff.value.(dropout(rng, x_dual, 0.5f0, Val(true); dims=:)[1]) + + @test check_approx(x_dropout, x_dual_dropout) + end +end diff --git a/test/jvp.jl b/test/jvp.jl deleted file mode 100644 index 17e72363..00000000 --- a/test/jvp.jl +++ /dev/null @@ -1,75 +0,0 @@ -using LuxLib, ForwardDiff, Zygote, Test -using ComponentArrays - -include("test_utils.jl") - -struct LuxLibTestTag end - -# Computes (∂f/∂x)u -function jvp_forwarddiff(f, x, u) - uu = reshape(u, axes(x)) - y = ForwardDiff.Dual{typeof(ForwardDiff.Tag(LuxLibTestTag(), eltype(x))), eltype(x), - 1}.(x, ForwardDiff.Partials.(tuple.(uu))) - return vec(ForwardDiff.partials.(vec(f(y)), 1)) -end - -function jvp_forwarddiff(f, x::ComponentArray, u) - xx = getdata(x) - uu = vec(u) - y = ComponentArray(ForwardDiff.Dual{typeof(ForwardDiff.Tag(LuxLibTestTag(), - eltype(x))), eltype(x), 1}.(xx, ForwardDiff.Partials.(tuple.(uu))), - getaxes(x)) - return vec(ForwardDiff.partials.(vec(f(y)), 1)) -end - -## This exists exclusively for testing. It has horrifying performance implications -function jvp_forwarddiff_concrete(f, x, u) - Jₓ = ForwardDiff.jacobian(f, x) - return Jₓ * vec(u) -end - -function jvp_zygote(f, x, u) - Jₓ = only(Zygote.jacobian(f, x)) - return Jₓ * vec(u) -end - -function test_jvp_computation(f, x, u, on_gpu) - jvp₁ = jvp_forwarddiff(f, x, u) - if !(x isa ComponentArray && on_gpu) - # ComponentArray + ForwardDiff on GPU don't play nice - jvp₂ = jvp_forwarddiff_concrete(f, x, u) - @test check_approx(jvp₁, jvp₂; atol=1e-5, rtol=1e-5) - - jvp₃ = jvp_zygote(f, x, u) - @test check_approx(jvp₁, jvp₃; atol=1e-5, rtol=1e-5) - end -end - -@testset "$mode: Jacobian Vector Products" for (mode, aType, on_gpu) in MODES - @testset "$(op)(; flipped = $flipped)" for flipped in (true, false), - op in (depthwiseconv, conv) - - op === depthwiseconv && on_gpu && continue - - input_dims = [(2, 4, 2, 1, 3), (4, 4, 1, 3), (4, 4, 3, 2), (4, 1, 3), (4, 3, 2)] - weight_dims = if op === conv - [(2, 2, 2, 1, 4), (3, 3, 1, 4), (3, 3, 3, 2), (3, 1, 4), (3, 3, 2)] - else - [(2, 2, 2, 1, 1), (3, 3, 1, 1), (3, 3, 3, 3), (3, 1, 1), (3, 3, 3)] - end - - @testset "Input Dims: $(in_dims) | Weight Dims: $(w_dims)" for (in_dims, w_dims) in zip(input_dims, - weight_dims) - x = randn(Float32, in_dims...) |> aType - w = randn(Float32, w_dims...) |> aType - ux = randn(Float32, size(x)...) |> aType - uw = randn(Float32, size(w)...) |> aType - u = randn(Float32, length(x) + length(w)) |> aType - - test_jvp_computation(x -> op(x, w; flipped), x, ux, on_gpu) - test_jvp_computation(w -> op(x, w; flipped), w, uw, on_gpu) - test_jvp_computation(xw -> op(xw.x, xw.w; flipped), ComponentArray(; x, w), u, - on_gpu) - end - end -end diff --git a/test/runtests.jl b/test/runtests.jl index 56b1d384..8ba7978a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,18 +1,3 @@ -using SafeTestsets, Test, TestSetExtensions +using ReTestItems -@testset ExtendedTestSet "LuxLib" begin - @safetestset "Dropout" include("api/dropout.jl") - - @testset "Normalization" begin - @safetestset "BatchNorm" include("api/batchnorm.jl") - @safetestset "GroupNorm" include("api/groupnorm.jl") - @safetestset "InstanceNorm" include("api/instancenorm.jl") - @safetestset "LayerNorm" include("api/layernorm.jl") - end - - @safetestset "ForwardDiff Extension" include("ext/LuxLibForwardDiffExt.jl") - - @safetestset "Efficient Jacobian-Vector-Products" include("jvp.jl") - - @safetestset "Aqua Tests" include("aqua.jl") -end +ReTestItems.runtests(@__DIR__) diff --git a/test/test_utils.jl b/test/shared_testsetup.jl similarity index 67% rename from test/test_utils.jl rename to test/shared_testsetup.jl index f671252a..886b20d6 100644 --- a/test/test_utils.jl +++ b/test/shared_testsetup.jl @@ -1,8 +1,9 @@ -using LuxLib, LuxTestUtils, StableRNGs, Test, Zygote -using LuxCUDA, LuxAMDGPU -using LuxTestUtils: @jet, @test_gradients, check_approx +@testsetup module SharedTestSetup +import Reexport: @reexport -CUDA.allowscalar(false) +using LuxLib, LuxCUDA, LuxAMDGPU +@reexport using LuxTestUtils, StableRNGs, Test, Zygote +import LuxTestUtils: @jet, @test_gradients, check_approx const GROUP = get(ENV, "GROUP", "All") @@ -26,3 +27,7 @@ end get_stable_rng(seed=12345) = StableRNG(seed) __istraining(::Val{training}) where {training} = training + +export cpu_testing, cuda_testing, amdgpu_testing, MODES, get_stable_rng, __istraining, + check_approx, @jet, @test_gradients +end