From e59d534de4279add4b4eb0032d58b9305df18be0 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Fri, 15 Dec 2023 10:57:38 -0600 Subject: [PATCH 1/5] Add Enzyme GPU support Fix examples tests with CUDA backend Add synchronize rule --- examples/utils.jl | 2 +- ext/EnzymeExt.jl | 459 ++++++++++++++++++++++++++++---------- src/KernelAbstractions.jl | 12 + src/cpu.jl | 4 +- src/reflection.jl | 3 - test/Project.toml | 1 + test/examples.jl | 1 + test/extensions/enzyme.jl | 15 +- test/runtests.jl | 4 + 9 files changed, 376 insertions(+), 125 deletions(-) diff --git a/examples/utils.jl b/examples/utils.jl index ea3d3db3..20eb4d3a 100644 --- a/examples/utils.jl +++ b/examples/utils.jl @@ -1,5 +1,5 @@ # EXCLUDE FROM TESTING -if Base.find_package("CUDA") !== nothing +if backend_str == "CUDA" && Base.find_package("CUDA") !== nothing using CUDA using CUDA.CUDAKernels const backend = CUDABackend() diff --git a/ext/EnzymeExt.jl b/ext/EnzymeExt.jl index 16bace15..04eecd87 100644 --- a/ext/EnzymeExt.jl +++ b/ext/EnzymeExt.jl @@ -1,151 +1,382 @@ module EnzymeExt - if isdefined(Base, :get_extension) - using EnzymeCore - using EnzymeCore.EnzymeRules - else - using ..EnzymeCore - using ..EnzymeCore.EnzymeRules - end - import KernelAbstractions: Kernel, StaticSize, launch_config, __groupsize, __groupindex, blocks, mkcontext, CompilerMetadata, CPU, Backend +if isdefined(Base, :get_extension) + using EnzymeCore + using EnzymeCore.EnzymeRules +else + using ..EnzymeCore + using ..EnzymeCore.EnzymeRules +end - function EnzymeCore.compiler_job_from_backend(b::Backend, @nospecialize(F::Type), @nospecialize(TT::Type)) - error("EnzymeCore.compiler_job_from_backend is not yet implemented for $(typeof(b)), please file an issue.") - end +import KernelAbstractions: + Kernel, + StaticSize, + launch_config, + allocate, + blocks, + mkcontext, + CompilerMetadata, + CPU, + GPU, + argconvert, + supports_enzyme, + __fake_compiler_job, + backend, + __index_Group_Cartesian, + __index_Global_Linear, + __groupsize, + __groupindex, + Backend, + synchronize - EnzymeRules.inactive(::Type{StaticSize}, x...) = nothing +function EnzymeCore.compiler_job_from_backend( + b::Backend, + @nospecialize(F::Type), + @nospecialize(TT::Type) +) + error( + "EnzymeCore.compiler_job_from_backend is not yet implemented for $(typeof(b)), please file an issue.", + ) +end - # https://github.com/EnzymeAD/Enzyme.jl/issues/1516 - # On the CPU `autodiff_deferred` can deadlock. - function fwd(ctx, f, args...) - EnzymeCore.autodiff_deferred(Forward, Const(f), Const{Nothing}, Const(ctx), args...) - return nothing - end +EnzymeRules.inactive(::Type{StaticSize}, x...) = nothing - function aug_fwd(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} - TapeType = EnzymeCore.tape_type(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - forward, _ = EnzymeCore.autodiff_deferred_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), TapeType, Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - subtape[__groupindex(ctx)] = forward(Const(f), Const(ctx), args...)[1] - return nothing - end +# https://github.com/EnzymeAD/Enzyme.jl/issues/1516 +# On the CPU `autodiff_deferred` can deadlock. +# Hence a specialized CPU version +function cpu_fwd(ctx, f, args...) + EnzymeCore.autodiff(Forward, Const(f), Const{Nothing}, Const(ctx), args...) + return nothing +end - function rev(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} - TapeType = EnzymeCore.tape_type(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - _, reverse = EnzymeCore.autodiff_deferred_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), TapeType, Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - tp = subtape[__groupindex(ctx)] - reverse(Const(f), Const(ctx), args..., tp) - return nothing - end +function gpu_fwd(ctx, f, args...) + EnzymeCore.autodiff_deferred(Forward, Const(f), Const{Nothing}, Const(ctx), args...) + return nothing +end - function fwd_cpu(ctx, f, args...) - EnzymeCore.autodiff(Forward, Const(f), Const{Nothing}, Const(ctx), args...) - return nothing - end +function EnzymeRules.forward( + func::Const{<:Kernel{CPU}}, + ::Type{Const{Nothing}}, + args...; + ndrange = nothing, + workgroupsize = nothing, +) + kernel = func.val + f = kernel.f + fwd_kernel = similar(kernel, cpu_fwd) - function aug_fwd_cpu(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} - forward, _ = EnzymeCore.autodiff_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - subtape[__groupindex(ctx)] = forward(Const(f), Const(ctx), args...)[1] - return nothing - end + fwd_kernel(f, args...; ndrange, workgroupsize) +end - function rev_cpu(ctx, f::FT, ::Val{ModifiedBetween}, subtape, args...) where {ModifiedBetween, FT} - _, reverse = EnzymeCore.autodiff_thunk(ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), Const{Core.Typeof(f)}, Const{Nothing}, Const{Core.Typeof(ctx)}, map(Core.Typeof, args)...) - tp = subtape[__groupindex(ctx)] - reverse(Const(f), Const(ctx), args..., tp) - return nothing - end +function EnzymeRules.forward( + func::Const{<:Kernel{<:GPU}}, + ::Type{Const{Nothing}}, + args...; + ndrange = nothing, + workgroupsize = nothing, +) + kernel = func.val + f = kernel.f + fwd_kernel = similar(kernel, gpu_fwd) - function EnzymeRules.forward(func::Const{<:Kernel}, ::Type{Const{Nothing}}, args...; ndrange=nothing, workgroupsize=nothing) - kernel = func.val - f = kernel.f - fwd_kernel = similar(kernel, fwd) + fwd_kernel(f, args...; ndrange, workgroupsize) +end - fwd_kernel(f, args...; ndrange, workgroupsize) - end +_enzyme_mkcontext(kernel::Kernel{CPU}, ndrange, iterspace, dynamic) = + mkcontext(kernel, first(blocks(iterspace)), ndrange, iterspace, dynamic) +_enzyme_mkcontext(kernel::Kernel{<:GPU}, ndrange, iterspace, dynamic) = + mkcontext(kernel, ndrange, iterspace) - function EnzymeRules.forward(func::Const{<:Kernel{CPU}}, ::Type{Const{Nothing}}, args...; ndrange=nothing, workgroupsize=nothing) - kernel = func.val - f = kernel.f - fwd_kernel = similar(kernel, fwd_cpu) +_augmented_return(::Kernel{CPU}, subtape, arg_refs, tape_type) = + AugmentedReturn{Nothing,Nothing,Tuple{Array,typeof(arg_refs),typeof(tape_type)}}( + nothing, + nothing, + (subtape, arg_refs, tape_type), + ) +_augmented_return(::Kernel{<:GPU}, subtape, arg_refs, tape_type) = + AugmentedReturn{Nothing,Nothing,Any}(nothing, nothing, (subtape, arg_refs, tape_type)) - fwd_kernel(f, args...; ndrange, workgroupsize) - end +function _create_tape_kernel( + kernel::Kernel{CPU}, + ModifiedBetween, + FT, + ctxTy, + ndrange, + iterspace, + args2..., +) + TapeType = EnzymeCore.tape_type( + ReverseSplitModified(ReverseSplitWithPrimal, ModifiedBetween), + FT, + Const{Nothing}, + Const{ctxTy}, + map(Core.Typeof, args2)..., + ) + subtape = Array{TapeType}(undef, size(blocks(iterspace))) + aug_kernel = similar(kernel, cpu_aug_fwd) + return TapeType, subtape, aug_kernel +end - function EnzymeRules.augmented_primal(config::Config, func::Const{<:Kernel{CPU}}, ::Type{Const{Nothing}}, args::Vararg{Any, N}; ndrange=nothing, workgroupsize=nothing) where N - kernel = func.val - f = kernel.f +function _create_tape_kernel( + kernel::Kernel{<:GPU}, + ModifiedBetween, + FT, + ctxTy, + ndrange, + iterspace, + args2..., +) + # For peeking at the TapeType we need to first construct a correct compilation job + # this requires the use of the device side representation of arguments. + # So we convert the arguments here, this is a bit wasteful since the `aug_kernel` call + # will later do the same. + dev_args2 = ((argconvert(kernel, a) for a in args2)...,) + dev_TT = map(Core.Typeof, dev_args2) - ndrange, workgroupsize, iterspace, dynamic = launch_config(kernel, ndrange, workgroupsize) - block = first(blocks(iterspace)) + job = + EnzymeCore.compiler_job_from_backend(backend(kernel), typeof(() -> return), Tuple{}) + TapeType = EnzymeCore.tape_type( + job, + ReverseSplitModified(ReverseSplitWithPrimal, ModifiedBetween), + FT, + Const{Nothing}, + Const{ctxTy}, + dev_TT..., + ) - ctx = mkcontext(kernel, block, ndrange, iterspace, dynamic) - ctxTy = Core.Typeof(ctx) # CompilerMetadata{ndrange(kernel), Core.Typeof(dynamic)} + # Allocate per thread + subtape = allocate(backend(kernel), TapeType, prod(ndrange)) - # TODO autodiff_deferred on the func.val - ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) + aug_kernel = similar(kernel, gpu_aug_fwd) + return TapeType, subtape, aug_kernel +end - FT = Const{Core.Typeof(f)} +_create_rev_kernel(kernel::Kernel{CPU}) = similar(kernel, cpu_rev) +_create_rev_kernel(kernel::Kernel{<:GPU}) = similar(kernel, gpu_rev) - arg_refs = ntuple(Val(N)) do i - Base.@_inline_meta - if args[i] isa Active - Ref(EnzymeCore.make_zero(args[i].val)) - else - nothing - end - end - args2 = ntuple(Val(N)) do i - Base.@_inline_meta - if args[i] isa Active - EnzymeCore.MixedDuplicated(args[i].val, arg_refs[i]) - else - args[i] - end - end +function cpu_aug_fwd( + ctx, + f::FT, + ::Val{ModifiedBetween}, + subtape, + ::Val{TapeType}, + args..., +) where {ModifiedBetween,FT,TapeType} + # A2 = Const{Nothing} -- since f->Nothing + forward, _ = EnzymeCore.autodiff_thunk( + ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), + Const{Core.Typeof(f)}, + Const{Nothing}, + Const{Core.Typeof(ctx)}, + map(Core.Typeof, args)..., + ) - # TODO in KA backends like CUDAKernels, etc have a version with a parent job type - TapeType = EnzymeCore.tape_type(ReverseSplitModified(ReverseSplitWithPrimal, ModifiedBetween), FT, Const{Nothing}, Const{ctxTy}, map(Core.Typeof, args2)...) + # On the CPU: F is a per block function + # On the CPU: subtape::Vector{Vector} + I = __index_Group_Cartesian(ctx, CartesianIndex(1, 1)) #=fake=# + subtape[I] = forward(Const(f), Const(ctx), args...)[1] + return nothing +end +function cpu_rev( + ctx, + f::FT, + ::Val{ModifiedBetween}, + subtape, + ::Val{TapeType}, + args..., +) where {ModifiedBetween,FT,TapeType} + _, reverse = EnzymeCore.autodiff_thunk( + ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), + Const{Core.Typeof(f)}, + Const{Nothing}, + Const{Core.Typeof(ctx)}, + map(Core.Typeof, args)..., + ) + I = __index_Group_Cartesian(ctx, CartesianIndex(1, 1)) #=fake=# + tp = subtape[I] + reverse(Const(f), Const(ctx), args..., tp) + return nothing +end - subtape = Array{TapeType}(undef, size(blocks(iterspace))) +# GPU support +function gpu_aug_fwd( + ctx, + f::FT, + ::Val{ModifiedBetween}, + subtape, + ::Val{TapeType}, + args..., +) where {ModifiedBetween,FT,TapeType} + # A2 = Const{Nothing} -- since f->Nothing + forward, _ = EnzymeCore.autodiff_deferred_thunk( + ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), + TapeType, + Const{Core.Typeof(f)}, + Const{Nothing}, + Const{Core.Typeof(ctx)}, + map(Core.Typeof, args)..., + ) - aug_kernel = similar(kernel, aug_fwd_cpu) + # On the GPU: F is a per thread function + # On the GPU: subtape::Vector + I = __index_Global_Linear(ctx) + subtape[I] = forward(Const(f), Const(ctx), args...)[1] + return nothing +end - aug_kernel(f, ModifiedBetween, subtape, args2...; ndrange, workgroupsize) +function gpu_rev( + ctx, + f::FT, + ::Val{ModifiedBetween}, + subtape, + ::Val{TapeType}, + args..., +) where {ModifiedBetween,FT,TapeType} + # XXX: TapeType and A2 as args to autodiff_deferred_thunk + _, reverse = EnzymeCore.autodiff_deferred_thunk( + ReverseSplitModified(ReverseSplitWithPrimal, Val(ModifiedBetween)), + TapeType, + Const{Core.Typeof(f)}, + Const{Nothing}, + Const{Core.Typeof(ctx)}, + map(Core.Typeof, args)..., + ) + I = __index_Global_Linear(ctx) + tp = subtape[I] + reverse(Const(f), Const(ctx), args..., tp) + return nothing +end - # TODO the fact that ctxTy is type unstable means this is all type unstable. - # Since custom rules require a fixed return type, explicitly cast to Any, rather - # than returning a AugmentedReturn{Nothing, Nothing, T} where T. +function EnzymeRules.augmented_primal( + config::Config, + func::Const{<:Kernel}, + ::Type{Const{Nothing}}, + args::Vararg{Any,N}; + ndrange = nothing, + workgroupsize = nothing, +) where {N} + kernel = func.val + f = kernel.f - res = AugmentedReturn{Nothing, Nothing, Tuple{Array, typeof(arg_refs)}}(nothing, nothing, (subtape, arg_refs)) - return res - end + ndrange, workgroupsize, iterspace, dynamic = + launch_config(kernel, ndrange, workgroupsize) + ctx = _enzyme_mkcontext(kernel, ndrange, iterspace, dynamic) + ctxTy = Core.Typeof(ctx) # CompilerMetadata{ndrange(kernel), Core.Typeof(dynamic)} + # TODO autodiff_deferred on the func.val + ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) - function EnzymeRules.reverse(config::Config, func::Const{<:Kernel{CPU}}, ::Type{<:EnzymeCore.Annotation}, tape, args::Vararg{Any, N}; ndrange=nothing, workgroupsize=nothing) where N - subtape, arg_refs = tape + FT = Const{Core.Typeof(f)} - args2 = ntuple(Val(N)) do i - Base.@_inline_meta - if args[i] isa Active - EnzymeCore.MixedDuplicated(args[i].val, arg_refs[i]) + arg_refs = ntuple(Val(N)) do i + Base.@_inline_meta + if args[i] isa Active + if func.val isa Kernel{<:GPU} + error("Active kernel arguments not supported on GPU") else - args[i] + Ref(EnzymeCore.make_zero(args[i].val)) end + else + nothing end + end + args2 = ntuple(Val(N)) do i + Base.@_inline_meta + if args[i] isa Active + MixedDuplicated(args[i].val, arg_refs[i]) + else + args[i] + end + end - kernel = func.val - f = kernel.f + TapeType, subtape, aug_kernel = _create_tape_kernel( + kernel, + ModifiedBetween, + FT, + ctxTy, + ndrange, + iterspace, + args2..., + ) + aug_kernel(f, ModifiedBetween, subtape, Val(TapeType), args2...; ndrange, workgroupsize) - ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) + # TODO the fact that ctxTy is type unstable means this is all type unstable. + # Since custom rules require a fixed return type, explicitly cast to Any, rather + # than returning a AugmentedReturn{Nothing, Nothing, T} where T. + return _augmented_return(kernel, subtape, arg_refs, TapeType) +end - rev_kernel = similar(func.val, rev_cpu) - rev_kernel(f, ModifiedBetween, subtape, args2...; ndrange, workgroupsize) - return ntuple(Val(N)) do i - Base.@_inline_meta - if args[i] isa Active - arg_refs[i][] - else - nothing - end +function EnzymeRules.reverse( + config::Config, + func::Const{<:Kernel}, + ::Type{<:EnzymeCore.Annotation}, + tape, + args::Vararg{Any,N}; + ndrange = nothing, + workgroupsize = nothing, +) where {N} + subtape, arg_refs, tape_type = tape + + args2 = ntuple(Val(N)) do i + Base.@_inline_meta + if args[i] isa Active + MixedDuplicated(args[i].val, arg_refs[i]) + else + args[i] + end + end + + kernel = func.val + f = kernel.f + + ModifiedBetween = Val((overwritten(config)[1], false, overwritten(config)[2:end]...)) + + rev_kernel = _create_rev_kernel(kernel) + rev_kernel( + f, + ModifiedBetween, + subtape, + Val(tape_type), + args2...; + ndrange, + workgroupsize, + ) + res = ntuple(Val(N)) do i + Base.@_inline_meta + if args[i] isa Active + arg_refs[i][] + else + nothing end end + # Reverse synchronization right after the kernel launch + synchronize(backend(kernel)) + return res +end + +# Synchronize rules +# TODO: Right now we do the synchronization as part of the kernel launch in the augmented primal +# and reverse rules. This is not ideal, as we would want to launch the kernel in the reverse +# synchronize rule and then synchronize where the launch was. However, with the current +# kernel semantics this ensures correctness for now. +function EnzymeRules.augmented_primal( + config::Config, + func::Const{typeof(synchronize)}, + ::Type{Const{Nothing}}, + backend::T, +) where {T<:EnzymeCore.Annotation} + synchronize(backend.val) + return AugmentedReturn(nothing, nothing, nothing) +end + +function EnzymeRules.reverse( + config::Config, + func::Const{typeof(synchronize)}, + ::Type{Const{Nothing}}, + tape, + backend, +) + # noop for now + return (nothing,) +end + end diff --git a/src/KernelAbstractions.jl b/src/KernelAbstractions.jl index efec0c4d..3cc263c0 100644 --- a/src/KernelAbstractions.jl +++ b/src/KernelAbstractions.jl @@ -698,6 +698,18 @@ end __size(args::Tuple) = Tuple{args...} __size(i::Int) = Tuple{i} +""" + argconvert(::Kernel, arg) + +Convert arguments to the device side representation. +""" +argconvert(k::Kernel{T}, arg) where T = + error("Don't know how to convert arguments for Kernel{$T}") + +# Enzyme support +supports_enzyme(::Backend) = false +function __fake_compiler_job end + ### # Extras # - LoopInfo diff --git a/src/cpu.jl b/src/cpu.jl index 8c3e8afd..9779c79f 100644 --- a/src/cpu.jl +++ b/src/cpu.jl @@ -191,4 +191,6 @@ end end # Argument conversion -KernelAbstractions.argconvert(k::Kernel{CPU}, arg) = arg +argconvert(k::Kernel{CPU}, arg) = arg + +supports_enzyme(::CPU) = true diff --git a/src/reflection.jl b/src/reflection.jl index 3ab8080c..e0be71c6 100644 --- a/src/reflection.jl +++ b/src/reflection.jl @@ -1,9 +1,6 @@ import InteractiveUtils export @ka_code_typed, @ka_code_llvm -argconvert(k::Kernel{T}, arg) where T = - error("Don't know how to convert arguments for Kernel{$T}") - using UUIDs const Cthulhu = Base.PkgId(UUID("f68482b8-f384-11e8-15f7-abe071a5a75f"), "Cthulhu") diff --git a/test/Project.toml b/test/Project.toml index b407094b..1fe0211e 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" diff --git a/test/examples.jl b/test/examples.jl index e92aa804..9b1d48f7 100644 --- a/test/examples.jl +++ b/test/examples.jl @@ -20,6 +20,7 @@ function examples_testsuite(backend_str) @testset "$(basename(example))" for example in examples @eval module $(gensym()) + backend_str = $backend_str include($example) end @test true diff --git a/test/extensions/enzyme.jl b/test/extensions/enzyme.jl index 39e6916a..3d8a5082 100644 --- a/test/extensions/enzyme.jl +++ b/test/extensions/enzyme.jl @@ -38,20 +38,23 @@ function enzyme_testsuite(backend, ArrayT, supports_reverse=true) Enzyme.autodiff(Reverse, square_caller, Duplicated(A, dA), Const(backend())) @test all(dA .≈ (2:2:128)) + # active arguments not support for GPU kernels + if backend == CPU + A .= (1:1:64) + dA .= 1 - A .= (1:1:64) - dA .= 1 - - _, dB, _ = Enzyme.autodiff(Reverse, mul_caller, Duplicated(A, dA), Active(1.2), Const(backend()))[1] + _, dB, _ = Enzyme.autodiff(Reverse, mul_caller, Duplicated(A, dA), Active(1.2), Const(backend()))[1] - @test all(dA .≈ 1.2) - @test dB ≈ sum(1:1:64) + @test all(dA .≈ 1.2) + @test dB ≈ sum(1:1:64) + end end A .= (1:1:64) dA .= 1 Enzyme.autodiff(Forward, square_caller, Duplicated(A, dA), Const(backend())) + KernelAbstractions.synchronize(backend()) @test all(dA .≈ 2:2:128) end diff --git a/test/runtests.jl b/test/runtests.jl index 15c33e27..a87181c2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,5 +74,9 @@ include("extensions/enzyme.jl") @static if VERSION >= v"1.7.0" @testset "Enzyme" begin enzyme_testsuite(CPU, Array) + # Requires CUDA + # if CUDA.functional() && CUDA.has_cuda_gpu() + # enzyme_testsuite(CUDABackend, CuArray) + # end end end From 4797541ca34f8aee15e82e891578315317c89a6a Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 18 Jun 2024 16:37:20 -0400 Subject: [PATCH 2/5] Update Project.toml --- test/Project.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Project.toml b/test/Project.toml index 1fe0211e..b407094b 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,6 +1,5 @@ [deps] Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" -CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" InteractiveUtils = "b77e0a4c-d291-57a0-90e8-8db25a27a240" LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" From d8516891ba0a4ccd07c6ac165c5f72cfc96ce877 Mon Sep 17 00:00:00 2001 From: William Moses Date: Tue, 18 Jun 2024 16:37:41 -0400 Subject: [PATCH 3/5] Update utils.jl --- examples/utils.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/utils.jl b/examples/utils.jl index 20eb4d3a..ea3d3db3 100644 --- a/examples/utils.jl +++ b/examples/utils.jl @@ -1,5 +1,5 @@ # EXCLUDE FROM TESTING -if backend_str == "CUDA" && Base.find_package("CUDA") !== nothing +if Base.find_package("CUDA") !== nothing using CUDA using CUDA.CUDAKernels const backend = CUDABackend() From 775eb1e3fefed2b74ef2f5eaf55cb217fb3da5d3 Mon Sep 17 00:00:00 2001 From: Michel Schanen Date: Thu, 27 Jun 2024 14:53:42 -0500 Subject: [PATCH 4/5] Buildkite for CUDA reverse --- .buildkite/pipeline.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 6a132136..2e0e553d 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -50,7 +50,7 @@ steps: julia -e 'println("+++ :julia: Running tests") using CUDA include("test/extensions/enzyme.jl") - enzyme_testsuite(CUDABackend, CuArray, false)' + enzyme_testsuite(CUDABackend, CuArray, true)' agents: queue: "juliagpu" cuda: "*" From d6b9be9c4c364710530c22fbba80cf3a9f90c6ce Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Sat, 6 Jul 2024 05:05:41 -0400 Subject: [PATCH 5/5] Update test/runtests.jl --- test/runtests.jl | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index a87181c2..15c33e27 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -74,9 +74,5 @@ include("extensions/enzyme.jl") @static if VERSION >= v"1.7.0" @testset "Enzyme" begin enzyme_testsuite(CPU, Array) - # Requires CUDA - # if CUDA.functional() && CUDA.has_cuda_gpu() - # enzyme_testsuite(CUDABackend, CuArray) - # end end end