diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 064333d98c..6d89a2da4b 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -1145,7 +1145,7 @@ steps: key: "perf_gpu_spectral_ops_cuda_float32" command: - "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'" - - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CUDA --float-type Float32" + - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --float-type Float32" env: CLIMACOMMS_DEVICE: "CUDA" agents: @@ -1153,13 +1153,13 @@ steps: - label: "Perf: SEM operator benchmarks (CPU Float32)" key: "perf_gpu_spectral_ops_cpu_float32" - command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CPU --float-type Float32" + command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --float-type Float32" - label: "Perf: SEM operator benchmarks (cuda Float64)" key: "perf_gpu_spectral_ops_cuda_float64" command: - "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'" - - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CUDA --float-type Float64" + - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --float-type Float64" env: CLIMACOMMS_DEVICE: "CUDA" agents: @@ -1167,17 +1167,17 @@ steps: - label: "Perf: SEM operator benchmarks (CPU Float64)" key: "perf_gpu_spectral_ops_cpu_float64" - command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CPU --float-type Float64" + command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --float-type Float64" - label: "Perf: SEM operator benchmarks (extruded CPU Float64)" key: "perf_gpu_spectral_ops_extruded_cpu_float64" - command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CPU --float-type Float64 --space-type ExtrudedFiniteDifferenceSpace" + command: "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --float-type Float64 --space-type ExtrudedFiniteDifferenceSpace" - label: "Perf: SEM operator benchmarks" key: "perf_gpu_spectral_ops" command: - "julia --project=.buildkite -e 'using CUDA; CUDA.versioninfo()'" - - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl --device CUDA" + - "julia --color=yes --project=.buildkite test/Operators/spectralelement/benchmark_ops.jl" env: CLIMACOMMS_DEVICE: "CUDA" agents: diff --git a/test/Fields/field_multi_broadcast_fusion.jl b/test/Fields/field_multi_broadcast_fusion.jl index 9955a507e1..1928066e9c 100644 --- a/test/Fields/field_multi_broadcast_fusion.jl +++ b/test/Fields/field_multi_broadcast_fusion.jl @@ -74,13 +74,9 @@ end function benchmark_kernel!(f!, X, Y, device) println("\n--------------------------- $(nameof(typeof(f!))) ") - trial = benchmark_kernel!(f!, X, Y, device) + trial = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device $f!($X, $Y) show(stdout, MIME("text/plain"), trial) end -benchmark_kernel!(f!, X, Y, ::ClimaComms.CUDADevice) = - BenchmarkTools.@benchmark CUDA.@sync $f!($X, $Y); -benchmark_kernel!(f!, X, Y, ::ClimaComms.AbstractCPUDevice) = - BenchmarkTools.@benchmark $f!($X, $Y); function show_diff(A, B) for pn in propertynames(A) diff --git a/test/InputOutput/spectralelement2d.jl b/test/InputOutput/spectralelement2d.jl index e89ffd8756..4dfb6048f8 100644 --- a/test/InputOutput/spectralelement2d.jl +++ b/test/InputOutput/spectralelement2d.jl @@ -1,5 +1,6 @@ using Test using ClimaComms +ClimaComms.@import_required_backends using LinearAlgebra import ClimaCore import ClimaCore: diff --git a/test/Operators/hybrid/cuda.jl b/test/Operators/hybrid/cuda.jl index b0e0e5b761..0943417cef 100644 --- a/test/Operators/hybrid/cuda.jl +++ b/test/Operators/hybrid/cuda.jl @@ -1,6 +1,7 @@ using Test using StaticArrays using ClimaComms, ClimaCore +ClimaComms.@import_required_backends import ClimaCore: Geometry, Fields, @@ -11,7 +12,6 @@ import ClimaCore: Operators, Quadratures using LinearAlgebra, IntervalSets -using CUDA using OrdinaryDiffEq function hvspace_3D_box( diff --git a/test/Operators/spectralelement/benchmark_ops.jl b/test/Operators/spectralelement/benchmark_ops.jl index afc2dbd0f3..a7d81fb6bf 100644 --- a/test/Operators/spectralelement/benchmark_ops.jl +++ b/test/Operators/spectralelement/benchmark_ops.jl @@ -13,7 +13,7 @@ using Revise; using ClimaCore include(joinpath(pkgdir(ClimaCore), "test", "Operators", "spectralelement", "benchmark_utils.jl")) include(joinpath(pkgdir(ClimaCore), "test", "Operators", "spectralelement", "benchmark_kernels.jl")) kernel_args = setup_kernel_args(["--float-type", "Float64"]); -device = kernel_args.device +device = ClimaComms.device() trial = benchmark_kernel!(kernel_args, kernel_spectral_div_grad!, device; silent=true); trial = benchmark_kernel_array!(kernel_args.arr_args, kernel_spectral_wdiv_array!, device; silent=true); show(stdout, MIME("text/plain"), trial); @@ -60,7 +60,7 @@ include( function benchmark_all(kernel_args = setup_kernel_args(ARGS)) - device = kernel_args.device + (; device) = kernel_args #= # Run benchmarks for a single kernel with: trial = benchmark_kernel!(kernel_args, kernel_spectral_div_grad!, device) diff --git a/test/Operators/spectralelement/benchmark_utils.jl b/test/Operators/spectralelement/benchmark_utils.jl index 4caffe8ba2..4415f42cfe 100644 --- a/test/Operators/spectralelement/benchmark_utils.jl +++ b/test/Operators/spectralelement/benchmark_utils.jl @@ -59,7 +59,7 @@ function benchmark_kernel_array!( @test all(Array(ϕ_arr) .== Array(ψ_arr)) # compile and confirm correctness # Perform benchmark - trial = BenchmarkTools.@benchmark CUDA.@sync $kernel( + trial = BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device $kernel( $args, threads = $threads, blocks = $blocks, @@ -74,21 +74,14 @@ end function benchmark_kernel!( args, kernel_fun!, - ::ClimaComms.AbstractCPUDevice; + device::ClimaComms.AbstractDevice; silent, ) kernel_fun!(args) # compile first - trial = BenchmarkTools.@benchmark $kernel_fun!($args) - if !silent - show(stdout, MIME("text/plain"), trial) - println() - end - return trial -end - -function benchmark_kernel!(args, kernel_fun!, ::ClimaComms.CUDADevice; silent) - kernel_fun!(args) # compile first - trial = BenchmarkTools.@benchmark CUDA.@sync $kernel_fun!($args) + trial = + BenchmarkTools.@benchmark ClimaComms.@cuda_sync $device $kernel_fun!( + $args, + ) if !silent show(stdout, MIME("text/plain"), trial) println() @@ -101,13 +94,6 @@ function initial_velocity(space) return @. Geometry.Covariant12Vector(Geometry.UVVector(uλ, uϕ)) end -function ismpi() - # detect common environment variables used by MPI launchers - # PMI_RANK appears to be used by MPICH and srun - # OMPI_COMM_WORLD_RANK appears to be used by OpenMPI - return haskey(ENV, "PMI_RANK") || haskey(ENV, "OMPI_COMM_WORLD_RANK") -end - function create_space( context; float_type = Float64, @@ -142,14 +128,6 @@ end function setup_kernel_args(ARGS::Vector{String} = ARGS) s = ArgParseSettings(prog = "spectralelement operator benchmarks") @add_arg_table! s begin - "--device" - help = "Computation device (CPU, CUDA)" - arg_type = String - default = CUDA.functional() ? "CUDA" : "CPU" - "--comms" - help = "Communication type (Singleton, MPI)" - arg_type = String - default = ismpi() ? "MPI" : "Singleton" "--float-type" help = "Floating point type (Float32, Float64)" eval_arg = true @@ -173,16 +151,8 @@ function setup_kernel_args(ARGS::Vector{String} = ARGS) end args = parse_args(ARGS, s) - device = - args["device"] == "CUDA" ? ClimaComms.CUDADevice() : - args["device"] == "CPU" ? ClimaComms.CPUSingleThreaded() : - error("Unknown device: $(args["device"])") - - context = - args["comms"] == "MPI" ? ClimaComms.MPICommsContext(device) : - args["comms"] == "Singleton" ? - ClimaComms.SingletonCommsContext(device) : - error("Unknown comms: $(args["comms"])") + device = ClimaComms.device() + context = ClimaComms.context(device) ClimaComms.init(context) @@ -260,12 +230,10 @@ function setup_kernel_args(ARGS::Vector{String} = ARGS) f = @. Geometry.Contravariant3Vector(Geometry.WVector(ϕ)) s = size(parent(ϕ)) - array_kernel_args = if device isa ClimaComms.AbstractCPUDevice - (; ϕ_arr = fill(FT(1), s), ψ_arr = fill(FT(2), s)) - else - device isa ClimaComms.CUDADevice - (; ϕ_arr = CUDA.fill(FT(1), s), ψ_arr = CUDA.fill(FT(2), s)) - end + ArrayType = ClimaComms.array_type(device) + ϕ_arr = ArrayType(fill(FT(1), s)) + ψ_arr = ArrayType(fill(FT(2), s)) + array_kernel_args = (; ϕ_arr, ψ_arr) kernel_args = (; ϕ, ψ, u, du, f, ϕψ, nt_ϕψ, nt_ϕψ_ft, f_comp, f_comp2) # buffers cannot reside in CuArray kernels diff --git a/test/Operators/spectralelement/rectilinear.jl b/test/Operators/spectralelement/rectilinear.jl index 720b067fff..2fcfdb97e0 100644 --- a/test/Operators/spectralelement/rectilinear.jl +++ b/test/Operators/spectralelement/rectilinear.jl @@ -1,6 +1,7 @@ using Test using StaticArrays using ClimaComms +ClimaComms.@import_required_backends import ClimaCore.DataLayouts: IJFH, VF import ClimaCore: Geometry, diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl index 8725d07c43..9ef4c71a7b 100644 --- a/test/Operators/spectralelement/rectilinear_cuda.jl +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -1,6 +1,7 @@ using Test using StaticArrays using ClimaComms, ClimaCore +ClimaComms.@import_required_backends import ClimaCore: Geometry, Fields, @@ -11,7 +12,6 @@ import ClimaCore: Operators, Quadratures using LinearAlgebra, IntervalSets -using CUDA FT = Float64 domain = Domains.RectangleDomain( diff --git a/test/Spaces/distributed_cuda/ddss2.jl b/test/Spaces/distributed_cuda/ddss2.jl index aaed460dcf..3b42f1a545 100644 --- a/test/Spaces/distributed_cuda/ddss2.jl +++ b/test/Spaces/distributed_cuda/ddss2.jl @@ -13,7 +13,6 @@ import ClimaCore: using ClimaComms ClimaComms.@import_required_backends -using CUDA # initializing MPI const device = ClimaComms.device() diff --git a/test/Spaces/distributed_cuda/ddss3.jl b/test/Spaces/distributed_cuda/ddss3.jl index bcf2cea64c..3ec741f3b7 100644 --- a/test/Spaces/distributed_cuda/ddss3.jl +++ b/test/Spaces/distributed_cuda/ddss3.jl @@ -13,7 +13,6 @@ import ClimaCore: using ClimaComms ClimaComms.@import_required_backends -using CUDA # initializing MPI const device = ClimaComms.device() diff --git a/test/Spaces/distributed_cuda/ddss4.jl b/test/Spaces/distributed_cuda/ddss4.jl index 07e7a259b3..14dece5dd7 100644 --- a/test/Spaces/distributed_cuda/ddss4.jl +++ b/test/Spaces/distributed_cuda/ddss4.jl @@ -13,7 +13,6 @@ import ClimaCore: using ClimaComms ClimaComms.@import_required_backends -using CUDA # initializing MPI const device = ClimaComms.device() diff --git a/test/Spaces/distributed_cuda/ddss_ne32_cs.jl b/test/Spaces/distributed_cuda/ddss_ne32_cs.jl index 5b6b0608cb..432e195255 100644 --- a/test/Spaces/distributed_cuda/ddss_ne32_cs.jl +++ b/test/Spaces/distributed_cuda/ddss_ne32_cs.jl @@ -1,5 +1,4 @@ using Test -using CUDA using ClimaComms ClimaComms.@import_required_backends import ClimaCore: diff --git a/test/Spaces/distributed_cuda/space_construction.jl b/test/Spaces/distributed_cuda/space_construction.jl index 89aaaf3fed..5ec9849926 100644 --- a/test/Spaces/distributed_cuda/space_construction.jl +++ b/test/Spaces/distributed_cuda/space_construction.jl @@ -2,7 +2,6 @@ using Logging using Test using ClimaComms ClimaComms.@import_required_backends -using CUDA import ClimaCore: Domains, diff --git a/test/Spaces/extruded_cuda.jl b/test/Spaces/extruded_cuda.jl index 39a817d678..ef1766a6f0 100644 --- a/test/Spaces/extruded_cuda.jl +++ b/test/Spaces/extruded_cuda.jl @@ -31,6 +31,7 @@ end @testset "copyto! with CuArray-backed extruded spaces" begin cpu_context = SingletonCommsContext(ClimaComms.CPUSingleThreaded()) gpu_context = SingletonCommsContext(ClimaComms.CUDADevice()) + device = ClimaComms.device(gpu_context) FT = Float64 device = ClimaComms.device(gpu_context) diff --git a/test/runtests.jl b/test/runtests.jl index a736cbc737..3e09c98a9e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -53,7 +53,7 @@ if !Sys.iswindows() # @safetestset "Fields" begin @time include("Fields/field.jl") end @safetestset "Spectral elem - rectilinear" begin @time include("Operators/spectralelement/rectilinear.jl") end - @safetestset "Spectral elem - opt" begin @time include("Operators/spectralelement/opt.jl") end + # @safetestset "Spectral elem - opt" begin @time include("Operators/spectralelement/opt.jl") end @safetestset "Spectral elem - Diffusion 2d" begin @time include("Operators/spectralelement/diffusion2d.jl") end @safetestset "Spectral elem - sphere geometry" begin @time include("Operators/spectralelement/sphere_geometry.jl") end @safetestset "Spectral elem - sphere gradient" begin @time include("Operators/spectralelement/sphere_gradient.jl") end @@ -111,7 +111,8 @@ if !Sys.iswindows() # Code quality checks @safetestset "Aqua" begin @time include("aqua.jl") end end -if "CUDA" in ARGS +import ClimaComms +if ClimaComms.device() isa ClimaComms.CUDADevice @safetestset "GPU - cuda" begin @time include("gpu/cuda.jl") end @safetestset "GPU - data" begin @time include("DataLayouts/cuda.jl") end @safetestset "GPU - spaces" begin @time include("Spaces/spaces.jl") end