add cuda tests

FluxML · CarloLucibello · Mar 11, 2023 · Dec 29, 2022 · Dec 30, 2022 · Dec 30, 2022
commit 2ecf19ba1d4a0381e50f5b3d8c176eb6e171ef28
diff --git a/test/cuda/layers.jl b/test/cuda/layers.jl
@@ -338,3 +338,29 @@ end
     @test eltype(pool(reshape(gx,3,4,1))) == Float16
   end
 end
+
+@testset "MultiHeadAttention" begin
+  dim = 4; nheads = 2; len = 3; batch_size = 5
+  mha_cpu = MultiHeadAttention(dim; nheads)
+  x_cpu = rand(Float32, (dim, len, batch_size))
+  y_cpu, α_cpu = mha_cpu(x_cpu, withscores=true)
+
+  mha_gpu = mha_cpu |> gpu
+  x_gpu = x_cpu |> gpu
+  y_gpu, α_gpu = mha_gpu(x_gpu, withscores=true)
+  @test y_gpu isa CuArray{Float32}
+  @test α_gpu isa CuArray{Float32}
+  @test Array(y_gpu) ≈ y_cpu atol=1e-4
+  @test Array(α_gpu) ≈ α_cpu atol=1e-4
+
+  gm_cpu, gx_cpu = gradient(mha_cpu, x_cpu) do mha, x
+    y, α = mha(x, withscores=true)
+    return sum(y.^2) + sum(α.^2)
+  end
+  gm_gpu, gx_gpu = gradient(mha_gpu, x_gpu) do mha, x
+    y, α = mha(x, withscores=true)
+    return sum(y.^2) + sum(α.^2)
+  end
+  test_grad_equal(gm_gpu, gm_cpu)
+  test_grad_equal(gx_gpu, gx_cpu)
+end
diff --git a/test/runtests.jl b/test/runtests.jl
@@ -33,6 +33,7 @@ Random.seed!(0)
   end
 
   @testset "Layers" begin
+    include("layers/attention.jl")
     include("layers/basic.jl")
     include("layers/normalisation.jl")
     include("layers/stateless.jl")

diff --git a/test/test_utils.jl b/test/test_utils.jl
@@ -96,3 +96,16 @@ function test_grad_type(g::NamedTuple, x::T) where T
         test_grad_type(g[f], getfield(x, f))
     end
 end
+
+test_grad_equal(g1::Nothing, g2::Nothing) = nothing
+
+function test_grad_equal(g1::AnyCuArray{T}, g2::Array{T}; atol=1e-4) where T 
+    @test Array(g1) ≈ g2 atol=atol
+end
+
+function test_grad_equal(g1::T1, g2::T2) where {T1 <: NamedTuple, T2 <: NamedTuple}
+    @test fieldnames(T1) == fieldnames(T2)
+    for f in fieldnames(T1)
+        test_grad_equal(g1[f], g2[f])
+    end
+end