Migrate to Distributed Testing using ReTestItems.jl

LuxDL · Feb 11, 2024 · b189239 · b189239
1 parent 5dfece0
commit b189239
Show file tree

Hide file tree

Showing 31 changed files with 638 additions and 571 deletions.
diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml
@@ -170,4 +170,6 @@ steps:
               - "Boltz"
 
 env:
+  RETESTITEMS_NWORKERS: 4
+  RETESTITEMS_NWORKER_THREADS: 2
   SECRET_CODECOV_TOKEN: "wMpDLaAVEHe6EJAc+LZBl4jF3wADVN6F+15vr/ONJHOv/XXbtYovuc1PCQwhz0AzZjWpSO12IDTyKfwVgYvqaGYfQ9yGyplJtSu2MiL2k44B/IY+wEZhsfkBIhXlG89si5A/I+/f8T8QuwxBqBLh8fYq7oxC+gNzKhbj8vIT4n5hCusvYYGufgKRC2U9P4ij0Sf40egQ5B+StaTykqJNq1163UARjNBypHIVDbYE0HUHiF7WB4eI5LxBBzlcHmsUkuGp6ZlqAu/8C83k65lwDnyHDfjvBM24q9GQTDFA5r7RUfYKHElQEBPk3GhoJn7XGIfD2pC0VNcw5jYCwsX2mw==;U2FsdGVkX1+euKMib66zno5Kkw7OxXo6v4RnkAA/HElJM46qfX17VgZ9iVLg45jOOWRgghmyYuy2WQ8RcVbuOg=="
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -38,9 +38,15 @@ jobs:
       - uses: julia-actions/julia-runtest@v1
         env:
           GROUP: "CPU"
+          RETESTITEMS_NWORKERS: 4
+          RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,ext
       - uses: codecov/codecov-action@v4
         with:
           files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          fail_ci_if_error: true
+
diff --git a/.github/workflows/Downgrade.yml b/.github/workflows/Downgrade.yml
@@ -0,0 +1,41 @@
+name: Downgrade
+on:
+  pull_request:
+    branches:
+      - main
+    paths-ignore:
+      - 'docs/**'
+  push:
+    branches:
+      - master
+    paths-ignore:
+      - 'docs/**'
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        version: ['1.9']
+    steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v1
+        with:
+          version: ${{ matrix.version }}
+      - uses: cjdoris/julia-downgrade-compat-action@v1
+        with:
+          skip: Pkg,TOML
+      - uses: julia-actions/julia-buildpkg@v1
+      - uses: julia-actions/julia-runtest@v1
+        env:
+          GROUP: "CPU"
+          RETESTITEMS_NWORKERS: 4
+          RETESTITEMS_NWORKER_THREADS: 2
+      - uses: julia-actions/julia-processcoverage@v1
+        with:
+          directories: src,ext
+      - uses: codecov/codecov-action@v4
+        with:
+          files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          fail_ci_if_error: true
diff --git a/.github/workflows/Downstream.yml b/.github/workflows/Downstream.yml
@@ -54,9 +54,15 @@ jobs:
             @info "Not compatible with this release. No problem." exception=err
             exit(0)  # Exit immediately, as a success
           end
+        env:
+          RETESTITEMS_NWORKERS: 4
+          RETESTITEMS_NWORKER_THREADS: 2
       - uses: julia-actions/julia-processcoverage@v1
         with:
           directories: src,ext
       - uses: codecov/codecov-action@v4
         with:
-          files: lcov.info
+          files: lcov.info
+          token: ${{ secrets.CODECOV_TOKEN }}
+          verbose: true
+          fail_ci_if_error: true
diff --git a/test/LocalPreferences.toml → LocalPreferences.toml b/test/LocalPreferences.toml → LocalPreferences.toml
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "LuxLib"
 uuid = "82251201-b29d-42c6-8e01-566dec8acb11"
 authors = ["Avik Pal <[email protected]> and contributors"]
-version = "0.3.9"
+version = "0.3.10"
 
 [deps]
 ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
@@ -27,22 +27,43 @@ LuxLibReverseDiffExt = "ReverseDiff"
 LuxLibTrackerExt = "Tracker"
 
 [compat]
-ChainRulesCore = "1"
+Aqua = "0.8"
+ChainRulesCore = "1.20"
+ComponentArrays = "0.15"
 ForwardDiff = "0.10"
-KernelAbstractions = "0.9"
-LuxCUDA = "0.2, 0.3"
-Markdown = "1"
-NNlib = "0.8, 0.9"
-PrecompileTools = "1"
-Random = "1"
+KernelAbstractions = "0.9.2"
+LuxAMDGPU = "0.2"
+LuxCUDA = "0.3"
+LuxTestUtils = "0.1.15"
+Markdown = "1.9"
+NNlib = "0.9"
+PrecompileTools = "1.2"
+Random = "1.9"
+ReTestItems = "1"
 Reexport = "1"
 ReverseDiff = "1"
-Statistics = "1"
+StableRNGs = "1"
+Statistics = "1.9"
+Test = "1.9"
 Tracker = "0.2"
+Zygote = "0.6"
 julia = "1.9"
 
 [extras]
+Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
+ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
+ComponentArrays = "b0b7db55-cfe3-40fc-9ded-d10e2dbeff66"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
+LuxAMDGPU = "83120cb1-ca15-4f04-bf3b-6967d2e6b60b"
 LuxCUDA = "d0bbae9a-e099-4d5b-a835-1c6931763bda"
-ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267"
-Tracker = "9f7883ad-71c0-57eb-9f7f-b5c9e6d3789c"
+LuxTestUtils = "ac9de150-d08f-4546-94fb-7472b5760531"
+Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
+ReTestItems = "817f1d60-ba6b-4fd5-9520-3cf149f6a823"
+Reexport = "189a3867-3050-52da-a836-e630ba90ab69"
+StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
+Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
+Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
+Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"
+
+[targets]
+test = ["Aqua", "ChainRulesCore", "ComponentArrays", "ForwardDiff", "LuxAMDGPU", "LuxCUDA", "LuxTestUtils", "Random", "ReTestItems", "Reexport", "StableRNGs", "Statistics", "Test", "Zygote"]
diff --git a/ext/LuxLibForwardDiffExt.jl b/ext/LuxLibForwardDiffExt.jl
@@ -5,9 +5,7 @@ import ForwardDiff: Dual
 import LuxLib: AA
 
 # dropout
-function LuxLib._dropout_fptype(x::AA{<:Dual})
-    return ForwardDiff.valtype(eltype(x))
-end
+LuxLib._dropout_fptype(x::AA{<:Dual}) = ForwardDiff.valtype(eltype(x))
 
 # Convolutions: We might want to capture these furthur down in `conv!`
 # NOTE: In principle we can concatenate all of the partials along the batch dimension
@@ -45,10 +43,14 @@ for op in [:conv, :depthwiseconv]
 
         y = $(op)(x_, w_, cdims; kwargs...)
 
-        dys₁ = ntuple(_ -> similar(x_, Vₓ, NNlib.output_size(cdims)...,
-                NNlib.channels_out(cdims), size(x, N)), P)
-        dys₂ = ntuple(_ -> similar(x_, Vₓ, NNlib.output_size(cdims)...,
-                NNlib.channels_out(cdims), size(x, N)), P)
+        dys₁ = ntuple(
+            _ -> similar(x_, Vₓ, NNlib.output_size(cdims)...,
+                NNlib.channels_out(cdims), size(x, N)),
+            P)
+        dys₂ = ntuple(
+            _ -> similar(x_, Vₓ, NNlib.output_size(cdims)...,
+                NNlib.channels_out(cdims), size(x, N)),
+            P)
         for i in 1:P
             $(op!)(dys₁[i], ForwardDiff.partials.(x, i), w_, cdims; kwargs...)
             $(op!)(dys₂[i], x_, ForwardDiff.partials.(w, i), cdims; kwargs...)

diff --git a/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl b/ext/LuxLibLuxCUDAExt/LuxLibLuxCUDAExt.jl
@@ -2,9 +2,8 @@ module LuxLibLuxCUDAExt
 
 using LuxCUDA, LuxLib
 import ChainRulesCore as CRC
-import LuxLib: batchnorm,
-    batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics,
-    FP_32_64, ∂∅
+import LuxLib: batchnorm, batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics,
+               FP_32_64, ∂∅
 
 include("batchnorm.jl")
 

diff --git a/ext/LuxLibLuxCUDAExt/batchnorm.jl b/ext/LuxLibLuxCUDAExt/batchnorm.jl
@@ -1,8 +1,9 @@
 using LuxCUDA
 using .cuDNN: CUDNN_BN_MIN_EPSILON, cudnnBatchNormalizationBackward,
-    cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL,
-    cudnnBatchNormalizationForwardTraining, cudnnTensorDescriptor, CUDNN_TENSOR_NCHW,
-    cudnnDataType, dim4, scalingParameter, handle
+              cudnnBatchNormalizationForwardInference, CUDNN_BATCHNORM_SPATIAL,
+              cudnnBatchNormalizationForwardTraining, cudnnTensorDescriptor,
+              CUDNN_TENSOR_NCHW,
+              cudnnDataType, dim4, scalingParameter, handle
 import LuxLib: FP_32_64
 
 # NOTE: This can be upstreamed to LuxCUDA once we drop support for v1.6
@@ -169,7 +170,8 @@ function cudnnBNBackward!(∂g::DenseCuArray{T}, g::DenseCuArray{T}, ∂b::Dense
     xd = cudnnTensorDescriptor(x)
     ∂yd = cudnnTensorDescriptor(∂y)
     ∂xd = cudnnTensorDescriptor(∂x)
-    gd = cudnnTensorDescriptor(CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(_wsize(x))),
+    gd = cudnnTensorDescriptor(
+        CUDNN_TENSOR_NCHW, cudnnDataType(T), Cint(length(_wsize(x))),
         dim4(_wsize(x), Val(CUDNN_TENSOR_NCHW)))
 
     xmean = xmean === nothing ? CU_NULL : xmean

diff --git a/ext/LuxLibLuxCUDATrackerExt.jl b/ext/LuxLibLuxCUDATrackerExt.jl
@@ -2,9 +2,9 @@ module LuxLibLuxCUDATrackerExt
 
 using LuxCUDA, LuxLib, Tracker
 import Tracker: @grad,
-    data, nobacksies, track, TrackedArray, TrackedVector, TrackedReal
+                data, nobacksies, track, TrackedArray, TrackedVector, TrackedReal
 import LuxLib: AA, AV, batchnorm_cudnn, ∇batchnorm_cudnn, _get_batchnorm_statistics,
-    FP_32_64, ∂∅, __is_tracked
+               FP_32_64, ∂∅, __is_tracked
 
 # api/batchnorm.jl
 const TR_CUDNN_BN_ARRAY_TYPE = Union{TrackedArray{<:Any, <:Any, <:CuArray{<:FP_32_64, 2}},

diff --git a/ext/LuxLibReverseDiffExt.jl b/ext/LuxLibReverseDiffExt.jl
@@ -3,8 +3,8 @@ module LuxLibReverseDiffExt
 using ChainRulesCore, LuxLib, ReverseDiff
 import ChainRulesCore as CRC
 import LuxLib: AA, __is_tracked
-import ReverseDiff: TrackedArray,
-    TrackedReal, decrement_deriv!, increment_deriv!, value, @grad_from_chainrules
+import ReverseDiff: TrackedArray, TrackedReal, decrement_deriv!, increment_deriv!, value,
+                    @grad_from_chainrules
 
 # Patches: Needs upstreaming
 @inline function increment_deriv!(t::Union{TrackedArray, TrackedReal}, ::NoTangent, i)

diff --git a/src/LuxLib.jl b/src/LuxLib.jl
@@ -23,7 +23,7 @@ include("api/groupnorm.jl")
 include("api/instancenorm.jl")
 include("api/layernorm.jl")
 
-export batchnorm, groupnorm, instancenorm, layernorm
-export alpha_dropout, dropout
+export batchnorm, groupnorm, instancenorm, layernorm,
+       alpha_dropout, dropout
 
 end
diff --git a/src/impl/groupnorm.jl b/src/impl/groupnorm.jl
@@ -1,7 +1,7 @@
 # Low-Level Kernels
 ## Original Implementation: https://github.com/pytorch/pytorch/blob/master/caffe2/operators/group_norm_op.cu
-@kernel function _compute_fused_params_kernel!(scale, bias, @Const(C), @Const(K), @Const(μ),
-        @Const(σ⁻¹), @Const(γ), @Const(β))
+@kernel function _compute_fused_params_kernel!(scale, bias, @Const(C), @Const(K),
+        @Const(μ), @Const(σ⁻¹), @Const(γ), @Const(β))
     idx = @index(Global)
     ng = _div_idx(idx, K)
     c = _mod_idx(idx, C)
@@ -27,8 +27,8 @@ end
     @inbounds dY_dscale[idx] = γ[c] * σ⁻¹[ng]
 end
 
-@kernel function _groupnorm_xscale_and_bias_kernel!(X_scale, bias, @Const(alpha), @Const(μ),
-        @Const(σ⁻¹), @Const(ds_sum), @Const(db_sum))
+@kernel function _groupnorm_xscale_and_bias_kernel!(X_scale, bias, @Const(alpha),
+        @Const(μ), @Const(σ⁻¹), @Const(ds_sum), @Const(db_sum))
     idx = @index(Global)
     @inbounds x = (db_sum[idx] * μ[idx] - ds_sum[idx]) * (σ⁻¹[idx]^3) * alpha
     @inbounds X_scale[idx] = x

diff --git a/test/Project.toml b/test/Project.toml
diff --git a/test/api/batchnorm.jl b/test/api/batchnorm.jl
diff --git a/test/api/batchnorm_tests.jl b/test/api/batchnorm_tests.jl
@@ -0,0 +1,54 @@
+@testitem "Batch Normalization" setup=[SharedTestSetup] begin
+    rng = get_stable_rng(12345)
+
+    function _setup_batchnorm(aType, T, sz; affine::Bool=true, track_stats::Bool)
+        x = randn(T, sz) |> aType
+        scale = affine ? aType(randn(T, sz[end - 1])) : nothing
+        bias = affine ? aType(randn(T, sz[end - 1])) : nothing
+
+        if track_stats
+            running_mean = randn(T, sz[end - 1]) |> aType
+            running_var = abs2.(randn(T, sz[end - 1])) |> aType
+            return x, scale, bias, running_mean, running_var
+        else
+            return x, scale, bias, nothing, nothing
+        end
+    end
+
+    @testset "$mode" for (mode, aType, on_gpu) in MODES
+        for T in (Float16, Float32, Float64),
+            sz in ((4, 4, 6, 2), (8, 2), (4, 4, 4, 3, 2)),
+            training in (Val(true), Val(false)),
+            affine in (true, false),
+            track_stats in (true, false)
+
+            T === Float16 && mode == "AMDGPU" && continue
+
+            _f = (args...) -> batchnorm(args...; epsilon, training, momentum=T(0.9))
+
+            epsilon = T(1e-5)
+            x, scale, bias, rm, rv = _setup_batchnorm(aType, T, sz; track_stats, affine)
+
+            y, nt = batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9))
+
+            @inferred batchnorm(x, scale, bias, rm, rv; epsilon, training, momentum=T(0.9))
+
+            @jet _f(x, scale, bias, rm, rv)
+
+            @test y isa aType{T, length(sz)}
+            @test size(y) == sz
+
+            if rm !== nothing
+                @test size(nt.running_mean) == (size(x, length(sz) - 1),)
+                @test size(nt.running_var) == (size(x, length(sz) - 1),)
+            end
+
+            if __istraining(training) && affine
+                fp16 = T == Float16
+                __f = (args...) -> sum(first(batchnorm(x, args..., rm, rv; epsilon,
+                    training, momentum=T(0.9))))
+                @eval @test_gradients $__f $scale $bias gpu_testing=$on_gpu soft_fail=$fp16 atol=1.0f-2 rtol=1.0f-2
+            end
+        end
+    end
+end