From 36a975e1c3ca71924c8829422cfa31b0b59e99b0 Mon Sep 17 00:00:00 2001 From: Charles Kawczynski Date: Mon, 10 Jun 2024 19:55:26 -0400 Subject: [PATCH] Use ClimaComms allowscalar --- .../matrix_fields_multiple_field_solve.jl | 3 - src/MatrixFields/MatrixFields.jl | 3 +- src/MatrixFields/field2arrays.jl | 6 +- src/interface.jl | 3 - test/DataLayouts/unit_fill.jl | 2 +- test/Fields/field.jl | 6 +- test/InputOutput/spectralelement2d.jl | 8 +- test/Limiters/limiter.jl | 7 +- test/Operators/hybrid/extruded_sphere_cuda.jl | 2 +- test/Operators/integrals.jl | 11 ++- .../spectralelement/rectilinear_cuda.jl | 1 - test/Spaces/ddss1.jl | 6 +- test/Spaces/distributed/ddss2.jl | 5 +- test/Spaces/distributed/ddss3.jl | 5 +- test/Spaces/distributed/ddss4.jl | 2 - test/Spaces/distributed_cuda/ddss2.jl | 4 +- test/Spaces/distributed_cuda/ddss3.jl | 4 +- test/Spaces/extruded_cuda.jl | 87 ++++++++++--------- 18 files changed, 75 insertions(+), 90 deletions(-) diff --git a/ext/cuda/matrix_fields_multiple_field_solve.jl b/ext/cuda/matrix_fields_multiple_field_solve.jl index 3b1d2d9ec2..b22f233441 100644 --- a/ext/cuda/matrix_fields_multiple_field_solve.jl +++ b/ext/cuda/matrix_fields_multiple_field_solve.jl @@ -6,11 +6,8 @@ import ClimaCore.MatrixFields import ClimaCore.MatrixFields: _single_field_solve! import ClimaCore.MatrixFields: multiple_field_solve! import ClimaCore.MatrixFields: is_CuArray_type -import ClimaCore: allow_scalar import ClimaCore.Utilities.UnrolledFunctions: unrolled_map -allow_scalar(f, ::ClimaComms.CUDADevice, args...) = CUDA.@allowscalar f(args...) - is_CuArray_type(::Type{T}) where {T <: CUDA.CuArray} = true NVTX.@annotate function multiple_field_solve!( diff --git a/src/MatrixFields/MatrixFields.jl b/src/MatrixFields/MatrixFields.jl index 7bf359a024..5d9ab429c8 100644 --- a/src/MatrixFields/MatrixFields.jl +++ b/src/MatrixFields/MatrixFields.jl @@ -64,7 +64,6 @@ import ..Spaces import ..Spaces: local_geometry_type import ..Fields import ..Operators -import ..allow_scalar using ..Utilities.UnrolledFunctions @@ -120,7 +119,7 @@ function Base.show(io::IO, field::ColumnwiseBandMatrixField) end column_field = Fields.column(field, 1, 1, 1) io = IOContext(io, :compact => true, :limit => true) - allow_scalar(ClimaComms.device(field)) do + ClimaComms.allowscalar(ClimaComms.device(field)) do Base.print_array(io, column_field2array_view(column_field)) end else diff --git a/src/MatrixFields/field2arrays.jl b/src/MatrixFields/field2arrays.jl index 31015ef7b6..4a8d6069d0 100644 --- a/src/MatrixFields/field2arrays.jl +++ b/src/MatrixFields/field2arrays.jl @@ -53,16 +53,16 @@ function column_field2array(field::Fields.FiniteDifferenceField) last_row = matrix_d < n_cols - n_rows ? n_rows : n_cols - matrix_d diagonal_data_view = view(diagonal_data, first_row:last_row) - allow_scalar(ClimaComms.device(field)) do + ClimaComms.allowscalar(ClimaComms.device(field)) do copyto!(matrix_diagonal, diagonal_data_view) end - allow_scalar(ClimaComms.device(field)) do + ClimaComms.allowscalar(ClimaComms.device(field)) do copyto!(matrix_diagonal, diagonal_data_view) end end return matrix else # field represents a vector - return allow_scalar(ClimaComms.device(field)) do + return ClimaComms.allowscalar(ClimaComms.device(field)) do Array(column_field2array_view(field)) end end diff --git a/src/interface.jl b/src/interface.jl index 4c7b14a7cf..ceb5217eda 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -41,6 +41,3 @@ Base.@propagate_inbounds column_args(args::Tuple{Any}, inds...) = Base.@propagate_inbounds column_args(args::Tuple{}, inds...) = () function level end - -# TODO: move to ClimaComms -allow_scalar(f, ::ClimaComms.AbstractDevice, args...) = f(args...) diff --git a/test/DataLayouts/unit_fill.jl b/test/DataLayouts/unit_fill.jl index 4cf3e4c59c..703c6ec15f 100644 --- a/test/DataLayouts/unit_fill.jl +++ b/test/DataLayouts/unit_fill.jl @@ -1,6 +1,6 @@ #= julia --project -using Revise; include(joinpath("test", "DataLayouts", "fill.jl")) +using Revise; include(joinpath("test", "DataLayouts", "unit_fill.jl")) =# using Test using ClimaCore.DataLayouts diff --git a/test/Fields/field.jl b/test/Fields/field.jl index cb81b68977..5b008aae23 100644 --- a/test/Fields/field.jl +++ b/test/Fields/field.jl @@ -346,7 +346,7 @@ end end function call_getcolumn(fv, colidx, device) - ClimaCore.allow_scalar(device) do + ClimaComms.allowscalar(device) do fvcol = fv[colidx] end nothing @@ -364,7 +364,7 @@ end colidx = Fields.ColumnIndex((1, 1), 1) # arbitrary index device = ClimaComms.device() - ClimaCore.allow_scalar(device) do + ClimaComms.allowscalar(device) do @test all(parent(fv.c.a[colidx]) .== Float32(1)) @test all(parent(fv.f.y[colidx]) .== Float32(2)) @test propertynames(fv) == propertynames(fv[colidx]) @@ -825,7 +825,7 @@ convergence_rate(err, Δh) = zcf = Fields.coordinate_field(Y.y).z Δz = Fields.Δz_field(axes(zcf)) Δz_col = Δz[Fields.ColumnIndex((1, 1), 1)] - Δz_1 = ClimaCore.allow_scalar(device) do + Δz_1 = ClimaComms.allowscalar(device) do parent(Δz_col)[1] end key = zelem diff --git a/test/InputOutput/spectralelement2d.jl b/test/InputOutput/spectralelement2d.jl index 4ab617ba21..e89ffd8756 100644 --- a/test/InputOutput/spectralelement2d.jl +++ b/test/InputOutput/spectralelement2d.jl @@ -14,9 +14,6 @@ import ClimaCore: DataLayouts, InputOutput -using CUDA -CUDA.allowscalar(false) - function init_state(local_geometry, p) coord = local_geometry.coordinates x, y = coord.x, coord.y @@ -86,6 +83,7 @@ end reader = InputOutput.HDF5Reader(filename, context) restart_Y = InputOutput.read_field(reader, "Y") # read fieldvector from hdf5 file close(reader) - CUDA.allowscalar(true) - @test restart_Y == Y # test if restart is exact + ClimaComms.allowscalar(device) do + @test restart_Y == Y # test if restart is exact + end end diff --git a/test/Limiters/limiter.jl b/test/Limiters/limiter.jl index 5037de6180..8b679420e2 100644 --- a/test/Limiters/limiter.jl +++ b/test/Limiters/limiter.jl @@ -2,9 +2,8 @@ julia --project=test using Revise; include(joinpath("test", "Limiters", "limiter.jl")) =# -import CUDA -CUDA.allowscalar(false) using ClimaComms +ClimaComms.@import_required_backends using ClimaCore: DataLayouts, Fields, @@ -139,7 +138,7 @@ end S = map(Iterators.product(1:n1, 1:n2)) do (h1, h2) (h1, h2, slab(limiter.q_bounds, h1 + n1 * (h2 - 1))) end - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do @test all(map(T -> T[3][1].x ≈ 2 * (T[1] - 1), S)) # q_min @test all(map(T -> T[3][1].y ≈ 3 * (T[2] - 1), S)) # q_min @test all(map(T -> T[3][2].x ≈ 2 * T[1], S)) # q_max @@ -150,7 +149,7 @@ end SN = map(Iterators.product(1:n1, 1:n2)) do (h1, h2) (h1, h2, slab(limiter.q_bounds_nbr, h1 + n1 * (h2 - 1))) end - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do @test all(map(T -> T[3][1].x ≈ 2 * max(T[1] - 2, 0), SN)) # q_min @test all(map(T -> T[3][1].y ≈ 3 * max(T[2] - 2, 0), SN)) # q_min @test all(map(T -> T[3][2].x ≈ 2 * min(T[1] + 1, n1), SN)) # q_max diff --git a/test/Operators/hybrid/extruded_sphere_cuda.jl b/test/Operators/hybrid/extruded_sphere_cuda.jl index 1b62b207d0..7dfa78d649 100644 --- a/test/Operators/hybrid/extruded_sphere_cuda.jl +++ b/test/Operators/hybrid/extruded_sphere_cuda.jl @@ -125,7 +125,7 @@ end 2 .* cos.(coords_cpu.long .+ coords_cpu.lat), ) x_gpu = Geometry.UVWVector.(cosd.(coords_gpu.lat), 0.0, 0.0) - CUDA.allowscalar(false) + f_gpu = sin.(coords_gpu.lat .+ 2 .* coords_gpu.long) g_gpu = Geometry.UVVector.( diff --git a/test/Operators/integrals.jl b/test/Operators/integrals.jl index e7d8c71a33..fd05aa1f8f 100644 --- a/test/Operators/integrals.jl +++ b/test/Operators/integrals.jl @@ -1,8 +1,7 @@ using Test using JET -import CUDA -CUDA.allowscalar(false) import ClimaComms +ClimaComms.@import_required_backends import ClimaCore import ClimaCore: Spaces, Fields, Operators import ClimaCore.RecursiveApply: rmax @@ -38,8 +37,10 @@ function test_column_integral_definite!(center_space, alloc_lim) ᶠz = Fields.coordinate_field(face_space).z z_top = Fields.level(ᶠz, Operators.right_idx(face_space)) ᶜu = map(z -> (; one = one(z), powers = (z, z^2, z^3)), ᶜz) - CUDA.@allowscalar ∫u_ref = + device = ClimaComms.device(ᶜu) + ∫u_ref = ClimaComms.allowscalar(device) do map(z -> (; one = z, powers = (z^2 / 2, z^3 / 3, z^4 / 4)), z_top) + end ∫u_test = similar(∫u_ref) column_integral_definite!(∫u_test, ᶜu) @@ -118,8 +119,10 @@ function test_column_mapreduce!(space, alloc_lim) z_top_field = Fields.level(z_field, Operators.right_idx(space)) sin_field = @. sin(pi * z_field / z_top_field) square_and_sin(z, sin_value) = (; square = z^2, sin = sin_value) - CUDA.@allowscalar reduced_field_ref = + device = ClimaComms.device(z_field) + reduced_field_ref = ClimaComms.allowscalar(device) do map(z -> (; square = z^2, sin = one(z)), z_top_field) + end reduced_field_test = similar(reduced_field_ref) args = (square_and_sin, rmax, reduced_field_test, z_field, sin_field) diff --git a/test/Operators/spectralelement/rectilinear_cuda.jl b/test/Operators/spectralelement/rectilinear_cuda.jl index 394be93f2c..8725d07c43 100644 --- a/test/Operators/spectralelement/rectilinear_cuda.jl +++ b/test/Operators/spectralelement/rectilinear_cuda.jl @@ -48,7 +48,6 @@ grid_topology = Topologies.Topology2D( grid_space = Spaces.SpectralElementSpace2D(grid_topology, quad) coords = Fields.coordinate_field(grid_space) -CUDA.allowscalar(false) f = sin.(coords.x .+ 2 .* coords.y) g = Geometry.UVVector.(sin.(coords.x), 2 .* cos.(coords.y .+ coords.x)) diff --git a/test/Spaces/ddss1.jl b/test/Spaces/ddss1.jl index 2db17405d7..4a41674342 100644 --- a/test/Spaces/ddss1.jl +++ b/test/Spaces/ddss1.jl @@ -4,8 +4,6 @@ using Revise; include(joinpath("test", "Spaces", "ddss1.jl")) =# using Logging using Test -import CUDA -CUDA.allowscalar(false) import ClimaCore: Domains, @@ -70,11 +68,11 @@ init_state_vector(local_geometry, p) = Geometry.Covariant12Vector(1.0, -1.0) @testset "4x1 element mesh with periodic boundaries on 1 process" begin Nq = 3 space, comms_ctx = distributed_space((4, 1), (true, true), (Nq, 1, 1)) - + device = ClimaComms.device(comms_ctx) @test Topologies.nlocalelems(Spaces.topology(space)) == 4 - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do @test Topologies.local_neighboring_elements( Spaces.topology(space), 1, diff --git a/test/Spaces/distributed/ddss2.jl b/test/Spaces/distributed/ddss2.jl index 3e1950dfb3..7254df22ad 100644 --- a/test/Spaces/distributed/ddss2.jl +++ b/test/Spaces/distributed/ddss2.jl @@ -1,5 +1,3 @@ -import CUDA -CUDA.allowscalar(false) include("ddss_setup.jl") #= @@ -16,10 +14,11 @@ include("ddss_setup.jl") @testset "4x1 element mesh with periodic boundaries on 2 processes" begin Nq = 3 space, comms_ctx = distributed_space((4, 1), (true, true), (Nq, 1, 1)) + device = ClimaComms.device(comms_ctx) @test Topologies.nlocalelems(Spaces.topology(space)) == 2 - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do @test Topologies.local_neighboring_elements( Spaces.topology(space), 1, diff --git a/test/Spaces/distributed/ddss3.jl b/test/Spaces/distributed/ddss3.jl index 13cffeaf95..f482abaabc 100644 --- a/test/Spaces/distributed/ddss3.jl +++ b/test/Spaces/distributed/ddss3.jl @@ -1,5 +1,3 @@ -import CUDA -CUDA.allowscalar(false) include("ddss_setup.jl") #= @@ -37,9 +35,10 @@ partition numbers @testset "4x4 element mesh with non-periodic boundaries on 3 processes" begin Nq = 3 space, comms_ctx = distributed_space((4, 4), (false, false), (Nq, 1, 1)) + device = ClimaComms.device(comms_ctx) @test Topologies.nlocalelems(Spaces.topology(space)) == (pid == 1 ? 6 : 5) - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do if pid == 1 # gidx 1 @test Topologies.local_neighboring_elements( diff --git a/test/Spaces/distributed/ddss4.jl b/test/Spaces/distributed/ddss4.jl index 0e1f08f5c6..bbfacf4a3d 100644 --- a/test/Spaces/distributed/ddss4.jl +++ b/test/Spaces/distributed/ddss4.jl @@ -1,5 +1,3 @@ -import CUDA -CUDA.allowscalar(false) include("ddss_setup.jl") #= diff --git a/test/Spaces/distributed_cuda/ddss2.jl b/test/Spaces/distributed_cuda/ddss2.jl index 5276983836..aaed460dcf 100644 --- a/test/Spaces/distributed_cuda/ddss2.jl +++ b/test/Spaces/distributed_cuda/ddss2.jl @@ -1,5 +1,3 @@ -import CUDA -CUDA.allowscalar(false) using Logging using Test @@ -69,7 +67,7 @@ pid, nprocs = ClimaComms.init(context) @test Topologies.nlocalelems(Spaces.topology(space)) == 2 - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do @test Topologies.local_neighboring_elements( Spaces.topology(space), 1, diff --git a/test/Spaces/distributed_cuda/ddss3.jl b/test/Spaces/distributed_cuda/ddss3.jl index 43c84a3e11..bcf2cea64c 100644 --- a/test/Spaces/distributed_cuda/ddss3.jl +++ b/test/Spaces/distributed_cuda/ddss3.jl @@ -1,5 +1,3 @@ -import CUDA -CUDA.allowscalar(false) using Logging using Test @@ -88,7 +86,7 @@ partition numbers space = Spaces.SpectralElementSpace2D(topology, quad) @test Topologies.nlocalelems(Spaces.topology(space)) == (pid == 1 ? 6 : 5) - CUDA.@allowscalar begin + ClimaComms.allowscalar(device) do if pid == 1 # gidx 1 @test Topologies.local_neighboring_elements( diff --git a/test/Spaces/extruded_cuda.jl b/test/Spaces/extruded_cuda.jl index 4529ec7c0d..773da1e705 100644 --- a/test/Spaces/extruded_cuda.jl +++ b/test/Spaces/extruded_cuda.jl @@ -34,53 +34,56 @@ end gpu_context = SingletonCommsContext(ClimaComms.CUDADevice()) FT = Float64 - CUDA.allowscalar(true) - # TODO: add support and test for all spaces - cpuspace = TU.CenterExtrudedFiniteDifferenceSpace(FT; context = cpu_context) - gpuspace = TU.CenterExtrudedFiniteDifferenceSpace(FT; context = gpu_context) + device = ClimaComms.device(gpu_context) + ClimaComms.allowscalar(device) do + # TODO: add support and test for all spaces + cpuspace = + TU.CenterExtrudedFiniteDifferenceSpace(FT; context = cpu_context) + gpuspace = + TU.CenterExtrudedFiniteDifferenceSpace(FT; context = gpu_context) - # Test that all geometries match with CPU version: - @test compare( - cpuspace, - gpuspace, - x -> Spaces.local_geometry_data(Spaces.grid(x), Grids.CellCenter()), - ) - @test compare( - cpuspace, - gpuspace, - x -> Spaces.local_geometry_data(Spaces.grid(x), Grids.CellFace()), - ) + # Test that all geometries match with CPU version: + @test compare( + cpuspace, + gpuspace, + x -> Spaces.local_geometry_data(Spaces.grid(x), Grids.CellCenter()), + ) + @test compare( + cpuspace, + gpuspace, + x -> Spaces.local_geometry_data(Spaces.grid(x), Grids.CellFace()), + ) - space = gpuspace - Y = Fields.Field(typeof((; v = FT(0))), space) - X = Fields.Field(typeof((; v = FT(0))), space) - @. Y.v = 0 - @. X.v = 2 - @test all(parent(Y.v) .== 0) - @test all(parent(X.v) .== 2) - CUDA.allowscalar(false) - @. X.v = Y.v - CUDA.allowscalar(true) - @test all(parent(Y.v) .== parent(X.v)) + space = gpuspace + Y = Fields.Field(typeof((; v = FT(0))), space) + X = Fields.Field(typeof((; v = FT(0))), space) + @. Y.v = 0 + @. X.v = 2 + @test all(parent(Y.v) .== 0) + @test all(parent(X.v) .== 2) + end + @. X.v = Y.v + ClimaComms.allowscalar(device) do + @test all(parent(Y.v) .== parent(X.v)) + # TODO: add support and test for all spaces + cpuspace = TU.SpectralElementSpace2D(FT; context = cpu_context) + gpuspace = TU.SpectralElementSpace2D(FT; context = gpu_context) - CUDA.allowscalar(true) - # TODO: add support and test for all spaces - cpuspace = TU.SpectralElementSpace2D(FT; context = cpu_context) - gpuspace = TU.SpectralElementSpace2D(FT; context = gpu_context) + # Test that all geometries match with CPU version: + @test compare(cpuspace, gpuspace, x -> Spaces.local_geometry_data(x)) - # Test that all geometries match with CPU version: - @test compare(cpuspace, gpuspace, x -> Spaces.local_geometry_data(x)) + space = gpuspace + Y = Fields.Field(typeof((; v = FT(0))), space) + X = Fields.Field(typeof((; v = FT(0))), space) + @. Y.v = 0 + @. X.v = 2 + @test all(parent(Y.v) .== 0) + @test all(parent(X.v) .== 2) + end - space = gpuspace - Y = Fields.Field(typeof((; v = FT(0))), space) - X = Fields.Field(typeof((; v = FT(0))), space) - @. Y.v = 0 - @. X.v = 2 - @test all(parent(Y.v) .== 0) - @test all(parent(X.v) .== 2) - CUDA.allowscalar(false) @. X.v = Y.v - CUDA.allowscalar(true) - @test all(parent(Y.v) .== parent(X.v)) + ClimaComms.allowscalar(device) do + @test all(parent(Y.v) .== parent(X.v)) + end end