From db0159279feb5f3816a7fd0af47020724b272d18 Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Mon, 3 Feb 2025 12:09:32 -0800 Subject: [PATCH 1/3] Add GPU support for extruded 1D spaces --- .buildkite/pipeline.yml | 10 + ext/cuda/data_layouts_mapreduce.jl | 12 +- ext/cuda/data_layouts_threadblock.jl | 24 +- ext/cuda/operators_sem_shmem.jl | 294 ++++++++++++++++------ ext/cuda/operators_spectral_element.jl | 260 +++++++++++++++---- ext/cuda/topologies_dss.jl | 82 ++++-- src/Grids/spectralelement.jl | 12 +- src/MatrixFields/matrix_multiplication.jl | 2 + src/MatrixFields/operator_matrices.jl | 3 + src/Operators/finitedifference.jl | 35 ++- src/Operators/operator2stencil.jl | 3 + src/Operators/pointwisestencil.jl | 2 + src/Spaces/dss.jl | 13 +- src/Spaces/extruded.jl | 11 +- src/Topologies/dss.jl | 121 ++++----- test/DataLayouts/unit_mapreduce.jl | 17 ++ test/Operators/hybrid/unit_2d.jl | 6 +- test/Operators/hybrid/utils_2d.jl | 1 + test/Operators/unit_operators_examples.jl | 17 +- test/Spaces/opt_spaces.jl | 4 +- test/Spaces/unit_spaces.jl | 11 +- 21 files changed, 656 insertions(+), 284 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index a43b659021..3221c902c8 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -609,6 +609,16 @@ steps: - "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/hybrid/unit_2d.jl" - "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/hybrid/convergence_2d.jl" + - label: "Unit: hyb ops 2d CUDA" + key: unit_hyb_ops_2d_cuda + command: + - "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/hybrid/unit_2d.jl" + - "julia --color=yes --check-bounds=yes --project=.buildkite test/Operators/hybrid/convergence_2d.jl" + env: + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_gpus: 1 + - label: "Unit: hyb ops 3d" key: unit_hyb_ops_3d command: diff --git a/ext/cuda/data_layouts_mapreduce.jl b/ext/cuda/data_layouts_mapreduce.jl index 551b8d451d..74783b6d78 100644 --- a/ext/cuda/data_layouts_mapreduce.jl +++ b/ext/cuda/data_layouts_mapreduce.jl @@ -24,13 +24,7 @@ end function mapreduce_cuda( f, op, - data::Union{ - DataLayouts.VF, - DataLayouts.IJFH, - DataLayouts.IJHF, - DataLayouts.VIJFH, - DataLayouts.VIJHF, - }; + data::DataLayouts.AbstractData; weighted_jacobian = OnesArray(parent(data)), opargs..., ) @@ -132,9 +126,9 @@ function mapreduce_cuda_kernel!( gidx = _get_gidx(tidx, bidx, effective_blksize) reduction = CUDA.CuStaticSharedArray(T, shmemsize) reduction[tidx] = 0 - (Nij, _, _, Nv, Nh) = DataLayouts.universal_size(us) + (Ni, Nj, _, Nv, Nh) = DataLayouts.universal_size(us) Nf = 1 # a view into `fidx` always gives a size of Nf = 1 - nitems = Nv * Nij * Nij * Nf * Nh + nitems = Nv * Ni * Nj * Nf * Nh # load shmem if gidx ≤ nitems diff --git a/ext/cuda/data_layouts_threadblock.jl b/ext/cuda/data_layouts_threadblock.jl index 91cd0191fb..43f64319af 100644 --- a/ext/cuda/data_layouts_threadblock.jl +++ b/ext/cuda/data_layouts_threadblock.jl @@ -213,15 +213,15 @@ end us::DataLayouts.UniversalSize, n_max_threads::Integer, ) - (Nij, _, _, _, Nh) = DataLayouts.universal_size(us) + (Ni, Nj, _, _, Nh) = DataLayouts.universal_size(us) Nh_thread = min( - Int(fld(n_max_threads, Nij * Nij)), + Int(fld(n_max_threads, Ni * Nj)), maximum_allowable_threads()[3], Nh, ) Nh_blocks = cld(Nh, Nh_thread) - @assert prod((Nij, Nij, Nh_thread)) ≤ n_max_threads "threads,n_max_threads=($(prod((Nij, Nij, Nh_thread))),$n_max_threads)" - return (; threads = (Nij, Nij, Nh_thread), blocks = (Nh_blocks,)) + @assert prod((Ni, Nj, Nh_thread)) ≤ n_max_threads "threads,n_max_threads=($(prod((Ni, Nj, Nh_thread))),$n_max_threads)" + return (; threads = (Ni, Nj, Nh_thread), blocks = (Nh_blocks,)) end @inline function columnwise_universal_index(us::UniversalSize) (i, j, th) = CUDA.threadIdx() @@ -241,9 +241,9 @@ end n_max_threads::Integer; Nnames, ) - (Nij, _, _, _, Nh) = DataLayouts.universal_size(us) - @assert prod((Nij, Nij, Nnames)) ≤ n_max_threads "threads,n_max_threads=($(prod((Nij, Nij, Nnames))),$n_max_threads)" - return (; threads = (Nij, Nij, Nnames), blocks = (Nh,)) + (Ni, Nj, _, _, Nh) = DataLayouts.universal_size(us) + @assert prod((Ni, Nj, Nnames)) ≤ n_max_threads "threads,n_max_threads=($(prod((Ni, Nj, Nnames))),$n_max_threads)" + return (; threads = (Ni, Nj, Nnames), blocks = (Nh,)) end @inline function multiple_field_solve_universal_index(us::UniversalSize) (i, j, iname) = CUDA.threadIdx() @@ -258,12 +258,12 @@ end us::DataLayouts.UniversalSize, n_max_threads::Integer = 256; ) - (Nq, _, _, Nv, Nh) = DataLayouts.universal_size(us) - Nvthreads = min(fld(n_max_threads, Nq * Nq), maximum_allowable_threads()[3]) + (Ni, Nj, _, Nv, Nh) = DataLayouts.universal_size(us) + Nvthreads = min(fld(n_max_threads, Ni * Nj), maximum_allowable_threads()[3]) Nvblocks = cld(Nv, Nvthreads) - @assert prod((Nq, Nq, Nvthreads)) ≤ n_max_threads "threads,n_max_threads=($(prod((Nq, Nq, Nvthreads))),$n_max_threads)" - @assert Nq * Nq ≤ n_max_threads - return (; threads = (Nq, Nq, Nvthreads), blocks = (Nh, Nvblocks), Nvthreads) + @assert prod((Ni, Nj, Nvthreads)) ≤ n_max_threads "threads,n_max_threads=($(prod((Ni, Nj, Nvthreads))),$n_max_threads)" + @assert Ni * Nj ≤ n_max_threads + return (; threads = (Ni, Nj, Nvthreads), blocks = (Nh, Nvblocks), Nvthreads) end @inline function spectral_universal_index(space::Spaces.AbstractSpace) i = threadIdx().x diff --git a/ext/cuda/operators_sem_shmem.jl b/ext/cuda/operators_sem_shmem.jl index d7af370671..f5132b9dd7 100644 --- a/ext/cuda/operators_sem_shmem.jl +++ b/ext/cuda/operators_sem_shmem.jl @@ -4,6 +4,20 @@ import ClimaCore.Operators: Divergence, WeakDivergence, Gradient, WeakGradient, Curl, WeakCurl import ClimaCore.Operators: operator_return_eltype, get_local_geometry +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::Operators.Divergence{(1,)}, + arg, +) where {Nvt} + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + # allocate temp output + RT = Operators.operator_return_eltype(op, eltype(arg)) + Jv¹ = CUDA.CuStaticSharedArray(RT, (Nq, Nvt)) + return (Jv¹,) +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, @@ -20,6 +34,23 @@ Base.@propagate_inbounds function operator_shmem( return (Jv¹, Jv²) end +Base.@propagate_inbounds function operator_fill_shmem!( + op::Divergence{(1,)}, + (Jv¹,), + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + local_geometry = get_local_geometry(space, ij, slabidx) + i, _ = ij.I + Jv¹[i, vt] = + local_geometry.J ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) +end Base.@propagate_inbounds function operator_fill_shmem!( op::Divergence{(1, 2)}, (Jv¹, Jv²), @@ -44,6 +75,21 @@ Base.@propagate_inbounds function operator_fill_shmem!( ) end +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::WeakDivergence{(1,)}, + arg, +) where {Nvt} + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + # allocate temp output + RT = operator_return_eltype(op, eltype(arg)) + Nf = DataLayouts.typesize(FT, RT) + WJv¹ = CUDA.CuStaticSharedArray(RT, (Nq, Nvt)) + return (WJv¹,) +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, @@ -61,6 +107,23 @@ Base.@propagate_inbounds function operator_shmem( return (WJv¹, WJv²) end +Base.@propagate_inbounds function operator_fill_shmem!( + op::WeakDivergence{(1,)}, + (WJv¹,), + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + local_geometry = get_local_geometry(space, ij, slabidx) + i, _ = ij.I + WJv¹[i, vt] = + local_geometry.WJ ⊠ RecursiveApply.rmap( + v -> Geometry.contravariant1(v, local_geometry), + arg, + ) +end Base.@propagate_inbounds function operator_fill_shmem!( op::WeakDivergence{(1, 2)}, (WJv¹, WJv²), @@ -85,46 +148,39 @@ Base.@propagate_inbounds function operator_fill_shmem!( ) end +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::Gradient{(1,)}, + arg, +) where {Nvt} + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + return CUDA.CuStaticSharedArray(eltype(arg), (Nq, Nvt)) +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, op::Gradient{(1, 2)}, arg, ) where {Nvt} - FT = Spaces.undertype(space) QS = Spaces.quadrature_style(space) Nq = Quadratures.degrees_of_freedom(QS) - IT = eltype(arg) - ET = eltype(IT) - RT = operator_return_eltype(op, IT) - RT12 = Geometry.AxisTensor{ - ET, - 2, - Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}}, - SMatrix{2, 2, ET, 4}, - } - RT123 = Geometry.AxisTensor{ - ET, - 2, - Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}}, - SMatrix{2, 3, ET, 6}, - } - if RT <: Geometry.Covariant12Vector - # allocate temp output - input = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - return (input,) - elseif RT <: RT12 - v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - return (v₁, v₂) - elseif RT <: RT123 - v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) - return (v₁, v₂, v₃) - end + return CUDA.CuStaticSharedArray(eltype(arg), (Nq, Nq, Nvt)) end +Base.@propagate_inbounds function operator_fill_shmem!( + op::Gradient{(1,)}, + input, + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + i, _ = ij.I + input[i, vt] = arg +end Base.@propagate_inbounds function operator_fill_shmem!( op::Gradient{(1, 2)}, input, @@ -135,37 +191,23 @@ Base.@propagate_inbounds function operator_fill_shmem!( ) vt = threadIdx().z i, j = ij.I - local_geometry = get_local_geometry(space, ij, slabidx) - RT = operator_return_eltype(op, typeof(arg)) - ET = eltype(eltype(arg)) - RT12 = Geometry.AxisTensor{ - ET, - 2, - Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2)}}, - SMatrix{2, 2, ET, 4}, - } - RT123 = Geometry.AxisTensor{ - ET, - 2, - Tuple{Geometry.CovariantAxis{(1, 2)}, Geometry.LocalAxis{(1, 2, 3)}}, - SMatrix{2, 3, ET, 6}, - } - if RT <: Geometry.Covariant12Vector - (v,) = input - v[i, j, vt] = arg - elseif RT <: RT12 - v₁, v₂ = input - v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u - v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v - elseif RT <: RT123 - v₁, v₂, v₃ = input - v₁[i, j, vt] = Geometry.LocalVector(arg, local_geometry).u - v₂[i, j, vt] = Geometry.LocalVector(arg, local_geometry).v - v₃[i, j, vt] = Geometry.LocalVector(arg, local_geometry).w - end + input[i, j, vt] = arg end - +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::WeakGradient{(1,)}, + arg, +) where {Nvt} + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + # allocate temp output + IT = eltype(arg) + Wf = CUDA.CuStaticSharedArray(IT, (Nq, Nvt)) + return (Wf,) +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, @@ -181,6 +223,20 @@ Base.@propagate_inbounds function operator_shmem( return (Wf,) end +Base.@propagate_inbounds function operator_fill_shmem!( + op::WeakGradient{(1,)}, + (Wf,), + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ * local_geometry.invJ + i, _ = ij.I + Wf[i, vt] = W ⊠ arg +end Base.@propagate_inbounds function operator_fill_shmem!( op::WeakGradient{(1, 2)}, (Wf,), @@ -196,7 +252,33 @@ Base.@propagate_inbounds function operator_fill_shmem!( Wf[i, j, vt] = W ⊠ arg end - +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::Curl{(1,)}, + arg, +) where {Nvt} + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + IT = eltype(arg) + ET = eltype(IT) + RT = operator_return_eltype(op, IT) + # allocate temp output + if RT <: Geometry.Contravariant3Vector # input is a Covariant12Vector + v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (nothing, v₂) + elseif RT <: Geometry.Contravariant2Vector # input is a Covariant3Vector + v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (v₃,) + elseif RT <: Geometry.Contravariant23Vector # input is a Covariant123Vector + v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (nothing, v₂, v₃) + else + error("invalid return type") + end +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, @@ -210,15 +292,14 @@ Base.@propagate_inbounds function operator_shmem( ET = eltype(IT) RT = operator_return_eltype(op, IT) # allocate temp output - if RT <: Geometry.Contravariant3Vector - # input data is a Covariant12Vector field + if RT <: Geometry.Contravariant3Vector # input is a Covariant12Vector v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) return (v₁, v₂) - elseif RT <: Geometry.Contravariant12Vector + elseif RT <: Geometry.Contravariant12Vector # input is a Covariant3Vector v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) return (v₃,) - elseif RT <: Geometry.Contravariant123Vector + elseif RT <: Geometry.Contravariant123Vector # input is a Covariant123Vector v₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) v₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) v₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) @@ -228,6 +309,30 @@ Base.@propagate_inbounds function operator_shmem( end end +Base.@propagate_inbounds function operator_fill_shmem!( + op::Curl{(1,)}, + work, + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + i, _ = ij.I + local_geometry = get_local_geometry(space, ij, slabidx) + RT = operator_return_eltype(op, typeof(arg)) + if RT <: Geometry.Contravariant3Vector + _, v₂ = work + v₂[i, vt] = Geometry.covariant2(arg, local_geometry) + elseif RT <: Geometry.Contravariant2Vector + (v₃,) = work + v₃[i, vt] = Geometry.covariant3(arg, local_geometry) + else + _, v₂, v₃ = work + v₂[i, vt] = Geometry.covariant2(arg, local_geometry) + v₃[i, vt] = Geometry.covariant3(arg, local_geometry) + end +end Base.@propagate_inbounds function operator_fill_shmem!( op::Curl{(1, 2)}, work, @@ -255,8 +360,33 @@ Base.@propagate_inbounds function operator_fill_shmem!( end end - - +Base.@propagate_inbounds function operator_shmem( + space, + ::Val{Nvt}, + op::WeakCurl{(1,)}, + arg, +) where {Nvt} + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + IT = eltype(arg) + ET = eltype(IT) + RT = operator_return_eltype(op, IT) + # allocate temp output + if RT <: Geometry.Contravariant3Vector # input is a Covariant12Vector + Wv₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (nothing, Wv₂) + elseif RT <: Geometry.Contravariant2Vector # input is a Covariant3Vector + Wv₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (Wv₃,) + elseif RT <: Geometry.Contravariant23Vector # input is a Covariant123Vector + Wv₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + Wv₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nvt)) + return (nothing, Wv₂, Wv₃) + else + error("invalid return type") + end +end Base.@propagate_inbounds function operator_shmem( space, ::Val{Nvt}, @@ -270,15 +400,14 @@ Base.@propagate_inbounds function operator_shmem( ET = eltype(IT) RT = operator_return_eltype(op, IT) # allocate temp output - if RT <: Geometry.Contravariant3Vector - # input data is a Covariant12Vector field + if RT <: Geometry.Contravariant3Vector # input is a Covariant12Vector Wv₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) Wv₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) return (Wv₁, Wv₂) - elseif RT <: Geometry.Contravariant12Vector + elseif RT <: Geometry.Contravariant12Vector # input is a Covariant3Vector Wv₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) return (Wv₃,) - elseif RT <: Geometry.Contravariant123Vector + elseif RT <: Geometry.Contravariant123Vector # input is a Covariant123Vector Wv₁ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) Wv₂ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) Wv₃ = CUDA.CuStaticSharedArray(ET, (Nq, Nq, Nvt)) @@ -288,6 +417,31 @@ Base.@propagate_inbounds function operator_shmem( end end +Base.@propagate_inbounds function operator_fill_shmem!( + op::WeakCurl{(1,)}, + work, + space, + ij, + slabidx, + arg, +) + vt = threadIdx().z + i, _ = ij.I + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ * local_geometry.invJ + RT = operator_return_eltype(op, typeof(arg)) + if RT <: Geometry.Contravariant3Vector + _, Wv₂ = work + Wv₂[i, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) + elseif RT <: Geometry.Contravariant2Vector + (Wv₃,) = work + Wv₃[i, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + else + _, Wv₂, Wv₃ = work + Wv₂[i, vt] = W ⊠ Geometry.covariant2(arg, local_geometry) + Wv₃[i, vt] = W ⊠ Geometry.covariant3(arg, local_geometry) + end +end Base.@propagate_inbounds function operator_fill_shmem!( op::WeakCurl{(1, 2)}, work, diff --git a/ext/cuda/operators_spectral_element.jl b/ext/cuda/operators_spectral_element.jl index d22ce173a7..141e17f82a 100644 --- a/ext/cuda/operators_spectral_element.jl +++ b/ext/cuda/operators_spectral_element.jl @@ -177,6 +177,29 @@ Base.@propagate_inbounds function resolve_shmem!(obj, ij, slabidx) nothing end +Base.@propagate_inbounds function operator_evaluate( + op::Divergence{(1,)}, + (Jv¹,), + space, + ij, + slabidx, +) + vt = threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + + DJv = D[i, 1] ⊠ Jv¹[1, vt] + for k in 2:Nq + DJv = DJv ⊞ D[i, k] ⊠ Jv¹[k, vt] + end + return RecursiveApply.rmul(DJv, local_geometry.invJ) +end Base.@propagate_inbounds function operator_evaluate( op::Divergence{(1, 2)}, (Jv¹, Jv²), @@ -204,6 +227,29 @@ Base.@propagate_inbounds function operator_evaluate( return RecursiveApply.rmul(DJv, local_geometry.invJ) end +Base.@propagate_inbounds function operator_evaluate( + op::WeakDivergence{(1,)}, + (WJv¹,), + space, + ij, + slabidx, +) + vt = CUDA.threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + + Dᵀ₁WJv¹ = D[1, i] ⊠ WJv¹[1, vt] + for k in 2:Nq + Dᵀ₁WJv¹ = Dᵀ₁WJv¹ ⊞ D[k, i] ⊠ WJv¹[k, vt] + end + return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹, local_geometry.WJ)) +end Base.@propagate_inbounds function operator_evaluate( op::WeakDivergence{(1, 2)}, (WJv¹, WJv²), @@ -230,6 +276,37 @@ Base.@propagate_inbounds function operator_evaluate( return ⊟(RecursiveApply.rdiv(Dᵀ₁WJv¹ ⊞ Dᵀ₂WJv², local_geometry.WJ)) end +Base.@propagate_inbounds function operator_evaluate( + op::Gradient{(1,)}, + input, + space, + ij, + slabidx, +) + vt = threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + @inbounds begin + ∂f∂ξ₁ = D[i, 1] ⊠ input[1, vt] + for k in 2:Nq + ∂f∂ξ₁ = ∂f∂ξ₁ ⊞ D[i, k] ⊠ input[k, vt] + end + end + if eltype(input) <: Number + return Geometry.Covariant1Vector(∂f∂ξ₁) + elseif eltype(input) <: Geometry.AxisVector + tensor_axes = (Geometry.Covariant1Axis(), axes(eltype(input))[1]) + tensor_components = hcat(Geometry.components(∂f∂ξ₁))' + return Geometry.AxisTensor(tensor_axes, tensor_components) + else + error("Unsupported input type for gradient operator: $(eltype(input))") + end +end Base.@propagate_inbounds function operator_evaluate( op::Gradient{(1, 2)}, input, @@ -245,61 +322,50 @@ Base.@propagate_inbounds function operator_evaluate( Nq = Quadratures.degrees_of_freedom(QS) D = Quadratures.differentiation_matrix(FT, QS) - if length(input) == 1 # check types - (v₁,) = input - @inbounds begin - ∂f∂ξ₁ = D[i, 1] ⊠ v₁[1, j, vt] - ∂f∂ξ₂ = D[j, 1] ⊠ v₁[i, 1, vt] - for k in 2:Nq - ∂f∂ξ₁ = ∂f∂ξ₁ ⊞ D[i, k] ⊠ v₁[k, j, vt] - ∂f∂ξ₂ = ∂f∂ξ₂ ⊞ D[j, k] ⊠ v₁[i, k, vt] - end + @inbounds begin + ∂f∂ξ₁ = D[i, 1] ⊠ input[1, j, vt] + ∂f∂ξ₂ = D[j, 1] ⊠ input[i, 1, vt] + for k in 2:Nq + ∂f∂ξ₁ = ∂f∂ξ₁ ⊞ D[i, k] ⊠ input[k, j, vt] + ∂f∂ξ₂ = ∂f∂ξ₂ ⊞ D[j, k] ⊠ input[i, k, vt] end + end + if eltype(input) <: Number return Geometry.Covariant12Vector(∂f∂ξ₁, ∂f∂ξ₂) - elseif length(input) == 2 - # Update `shmem` - v₁, v₂ = input - @inbounds begin - ∂f₁∂ξ₁ = D[i, 1] ⊠ v₁[1, j, vt] - ∂f₁∂ξ₂ = D[j, 1] ⊠ v₁[i, 1, vt] - ∂f₂∂ξ₁ = D[i, 1] ⊠ v₂[1, j, vt] - ∂f₂∂ξ₂ = D[j, 1] ⊠ v₂[i, 1, vt] - @simd for k in 2:Nq - ∂f₁∂ξ₁ = ∂f₁∂ξ₁ ⊞ D[i, k] ⊠ v₁[k, j, vt] - ∂f₁∂ξ₂ = ∂f₁∂ξ₂ ⊞ D[j, k] ⊠ v₁[i, k, vt] - ∂f₂∂ξ₁ = ∂f₂∂ξ₁ ⊞ D[i, k] ⊠ v₂[k, j, vt] - ∂f₂∂ξ₂ = ∂f₂∂ξ₂ ⊞ D[j, k] ⊠ v₂[i, k, vt] - end - end - return Geometry.AxisTensor( - (Geometry.Covariant12Axis(), Geometry.UVAxis()), - (∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂), - ) + elseif eltype(input) <: Geometry.AxisVector + tensor_axes = (Geometry.Covariant12Axis(), axes(eltype(input))[1]) + tensor_components = + hcat(Geometry.components(∂f∂ξ₁), Geometry.components(∂f∂ξ₂))' + return Geometry.AxisTensor(tensor_axes, tensor_components) else - v₁, v₂, v₃ = input - @inbounds begin - ∂f₁∂ξ₁ = D[i, 1] ⊠ v₁[1, j, vt] - ∂f₁∂ξ₂ = D[j, 1] ⊠ v₁[i, 1, vt] - ∂f₂∂ξ₁ = D[i, 1] ⊠ v₂[1, j, vt] - ∂f₂∂ξ₂ = D[j, 1] ⊠ v₂[i, 1, vt] - ∂f₃∂ξ₁ = D[i, 1] ⊠ v₃[1, j, vt] - ∂f₃∂ξ₂ = D[j, 1] ⊠ v₃[i, 1, vt] - @simd for k in 2:Nq - ∂f₁∂ξ₁ = ∂f₁∂ξ₁ ⊞ D[i, k] ⊠ v₁[k, j, vt] - ∂f₁∂ξ₂ = ∂f₁∂ξ₂ ⊞ D[j, k] ⊠ v₁[i, k, vt] - ∂f₂∂ξ₁ = ∂f₂∂ξ₁ ⊞ D[i, k] ⊠ v₂[k, j, vt] - ∂f₂∂ξ₂ = ∂f₂∂ξ₂ ⊞ D[j, k] ⊠ v₂[i, k, vt] - ∂f₃∂ξ₁ = ∂f₃∂ξ₁ ⊞ D[i, k] ⊠ v₃[k, j, vt] - ∂f₃∂ξ₂ = ∂f₃∂ξ₂ ⊞ D[j, k] ⊠ v₃[i, k, vt] - end - end - return Geometry.AxisTensor( - (Geometry.Covariant12Axis(), Geometry.UVWAxis()), - (∂f₁∂ξ₁, ∂f₁∂ξ₂, ∂f₂∂ξ₁, ∂f₂∂ξ₂, ∂f₃∂ξ₁, ∂f₃∂ξ₂), - ) + error("Unsupported input type for gradient operator: $(eltype(input))") end end +Base.@propagate_inbounds function operator_evaluate( + op::WeakGradient{(1,)}, + (Wf,), + space, + ij, + slabidx, +) + vt = threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + + local_geometry = get_local_geometry(space, ij, slabidx) + W = local_geometry.WJ * local_geometry.invJ + + Dᵀ₁Wf = D[1, i] ⊠ Wf[1, vt] + for k in 2:Nq + Dᵀ₁Wf = Dᵀ₁Wf ⊞ D[k, i] ⊠ Wf[k, vt] + end + return Geometry.Covariant1Vector(⊟(RecursiveApply.rdiv(Dᵀ₁Wf, W))) +end Base.@propagate_inbounds function operator_evaluate( op::WeakGradient{(1, 2)}, (Wf,), @@ -330,6 +396,54 @@ Base.@propagate_inbounds function operator_evaluate( ) end +Base.@propagate_inbounds function operator_evaluate( + op::Curl{(1,)}, + work, + space, + ij, + slabidx, +) + vt = threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + local_geometry = get_local_geometry(space, ij, slabidx) + + if length(work) == 2 + _, v₂ = work + D₁v₂ = D[i, 1] ⊠ v₂[1, vt] + for k in 2:Nq + D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, vt] + end + return Geometry.Contravariant3Vector( + RecursiveApply.rmul(D₁v₂, local_geometry.invJ), + ) + elseif length(work) == 1 + (v₃,) = work + D₁v₃ = D[i, 1] ⊠ v₃[1, vt] + for k in 2:Nq + D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, vt] + end + return Geometry.Contravariant2Vector( + ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), + ) + else + _, v₂, v₃ = work + D₁v₂ = D[i, 1] ⊠ v₂[1, vt] + D₁v₃ = D[i, 1] ⊠ v₃[1, vt] + @simd for k in 2:Nq + D₁v₂ = D₁v₂ ⊞ D[i, k] ⊠ v₂[k, vt] + D₁v₃ = D₁v₃ ⊞ D[i, k] ⊠ v₃[k, vt] + end + return Geometry.Contravariant23Vector( + ⊟(RecursiveApply.rmul(D₁v₃, local_geometry.invJ)), + RecursiveApply.rmul(D₁v₂, local_geometry.invJ), + ) + end +end Base.@propagate_inbounds function operator_evaluate( op::Curl{(1, 2)}, work, @@ -389,6 +503,54 @@ Base.@propagate_inbounds function operator_evaluate( end end +Base.@propagate_inbounds function operator_evaluate( + op::WeakCurl{(1,)}, + work, + space, + ij, + slabidx, +) + vt = threadIdx().z + i, _ = ij.I + + FT = Spaces.undertype(space) + QS = Spaces.quadrature_style(space) + Nq = Quadratures.degrees_of_freedom(QS) + D = Quadratures.differentiation_matrix(FT, QS) + local_geometry = get_local_geometry(space, ij, slabidx) + + if length(work) == 2 + _, Wv₂ = work + Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, vt] + for k in 2:Nq + Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, vt] + end + return Geometry.Contravariant3Vector( + RecursiveApply.rdiv(⊟(Dᵀ₁Wv₂), local_geometry.WJ), + ) + elseif length(work) == 1 + (Wv₃,) = work + Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, vt] + for k in 2:Nq + Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, vt] + end + return Geometry.Contravariant2Vector( + RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), + ) + else + _, Wv₂, Wv₃ = work + Dᵀ₁Wv₂ = D[1, i] ⊠ Wv₂[1, vt] + Dᵀ₁Wv₃ = D[1, i] ⊠ Wv₃[1, vt] + @simd for k in 2:Nq + Dᵀ₁Wv₂ = Dᵀ₁Wv₂ ⊞ D[k, i] ⊠ Wv₂[k, vt] + Dᵀ₁Wv₃ = Dᵀ₁Wv₃ ⊞ D[k, i] ⊠ Wv₃[k, vt] + end + return Geometry.Contravariant23Vector( + RecursiveApply.rdiv(Dᵀ₁Wv₃, local_geometry.WJ), + RecursiveApply.rdiv(⊟(Dᵀ₁Wv₂), local_geometry.WJ), + ) + end +end Base.@propagate_inbounds function operator_evaluate( op::WeakCurl{(1, 2)}, work, diff --git a/ext/cuda/topologies_dss.jl b/ext/cuda/topologies_dss.jl index e3fa7df9d7..25dd5caa89 100644 --- a/ext/cuda/topologies_dss.jl +++ b/ext/cuda/topologies_dss.jl @@ -2,7 +2,7 @@ import ClimaCore: DataLayouts, Topologies, Spaces, Fields import ClimaCore.DataLayouts: CartesianFieldIndex using CUDA import ClimaCore.Topologies -import ClimaCore.Topologies: DSSDataTypes, DSSPerimeterTypes +import ClimaCore.Topologies: DSSTypes1D, DSSTypes2D, DSSPerimeterTypes import ClimaCore.Topologies: perimeter_vertex_node_index _max_threads_cuda() = 256 @@ -19,7 +19,7 @@ _configure_threadblock(nitems) = function Topologies.dss_load_perimeter_data!( dev::ClimaComms.CUDADevice, dss_buffer::Topologies.DSSBuffer, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Topologies.Perimeter2D, ) (; perimeter_data) = dss_buffer @@ -40,7 +40,7 @@ end function dss_load_perimeter_data_kernel!( perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Topologies.Perimeter2D{Nq}, ) where {Nq} gidx = threadIdx().x + (blockIdx().x - Int32(1)) * blockDim().x @@ -60,7 +60,7 @@ end function Topologies.dss_unload_perimeter_data!( dev::ClimaComms.CUDADevice, - data::DSSDataTypes, + data::DSSTypes2D, dss_buffer::Topologies.DSSBuffer, perimeter, ) @@ -81,7 +81,7 @@ function Topologies.dss_unload_perimeter_data!( end function dss_unload_perimeter_data_kernel!( - data::DSSDataTypes, + data::DSSTypes2D, perimeter_data::DSSPerimeterTypes, perimeter::Topologies.Perimeter2D{Nq}, ) where {Nq} @@ -195,10 +195,10 @@ end function Topologies.dss_transform!( device::ClimaComms.CUDADevice, perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Topologies.Perimeter2D, - local_geometry::DSSDataTypes, - dss_weights::DSSDataTypes, + local_geometry::DSSTypes2D, + dss_weights::DSSTypes2D, localelems::AbstractVector{Int}, ) nlocalelems = length(localelems) @@ -240,10 +240,10 @@ end function dss_transform_kernel!( perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Topologies.Perimeter2D, - local_geometry::DSSDataTypes, - dss_weights::DSSDataTypes, + local_geometry::DSSTypes2D, + dss_weights::DSSTypes2D, localelems::AbstractVector{Int}, ::Val{nlocalelems}, ) where {nlocalelems} @@ -271,8 +271,8 @@ end function Topologies.dss_untransform!( device::ClimaComms.CUDADevice, perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, - local_geometry::DSSDataTypes, + data::DSSTypes2D, + local_geometry::DSSTypes2D, perimeter::Topologies.Perimeter2D, localelems::AbstractVector{Int}, ) @@ -312,8 +312,8 @@ end function dss_untransform_kernel!( perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, - local_geometry::DSSDataTypes, + data::DSSTypes2D, + local_geometry::DSSTypes2D, perimeter::Topologies.Perimeter2D, localelems::AbstractVector{Int}, ::Val{nlocalelems}, @@ -582,3 +582,55 @@ function dss_ghost_kernel!( end return nothing end + +function Topologies.dss_1d!( + ::ClimaComms.CUDADevice, + data::DSSTypes1D, + topology::Topologies.IntervalTopology, + local_geometry = nothing, + dss_weights = nothing, +) + (_, _, _, Nv, Nh) = DataLayouts.universal_size(data) + nfaces = Topologies.isperiodic(topology) ? Nh : Nh - 1 + nitems = Nv * nfaces + threads = _max_threads_cuda() + p = linear_partition(nitems, threads) + args = (data, local_geometry, dss_weights, nfaces) + auto_launch!( + dss_1d_kernel!, + args; + threads_s = p.threads, + blocks_s = p.blocks, + ) + return nothing +end + +function dss_1d_kernel!(data, local_geometry, dss_weights, nfaces) + T = eltype(data) + (Ni, _, _, Nv, Nh) = DataLayouts.universal_size(data) + gidx = threadIdx().x + (blockIdx().x - 1) * blockDim().x + if gidx ≤ Nv * nfaces + left_face_elem = cld(gidx, Nv) + level = gidx - (left_face_elem - 1) * Nv + right_face_elem = left_face_elem == Nh ? 1 : left_face_elem + 1 + left_idx = CartesianIndex(Ni, 1, 1, level, left_face_elem) + right_idx = CartesianIndex(1, 1, 1, level, right_face_elem) + val = + Topologies.dss_transform( + data, + local_geometry, + dss_weights, + left_idx, + ) ⊞ Topologies.dss_transform( + data, + local_geometry, + dss_weights, + right_idx, + ) + data[left_idx] = + Topologies.dss_untransform(T, val, local_geometry, left_idx) + data[right_idx] = + Topologies.dss_untransform(T, val, local_geometry, right_idx) + end + return nothing +end diff --git a/src/Grids/spectralelement.jl b/src/Grids/spectralelement.jl index 24e583b7fc..deae263d6c 100644 --- a/src/Grids/spectralelement.jl +++ b/src/Grids/spectralelement.jl @@ -63,6 +63,7 @@ function _SpectralElementGrid1D( ::Val{Nh}, ::Type{horizontal_layout_type}, ) where {Nh, horizontal_layout_type} + DA = ClimaComms.array_type(topology) global_geometry = Geometry.CartesianGlobalGeometry() CoordType = Topologies.coordinate_type(topology) AIdx = Geometry.coordinate_axis(CoordType) @@ -103,12 +104,13 @@ function _SpectralElementGrid1D( end end + device_local_geometry = DataLayouts.rebuild(local_geometry, DA) return SpectralElementGrid1D( topology, quadrature_style, global_geometry, - local_geometry, - compute_dss_weights(local_geometry, topology, quadrature_style), + device_local_geometry, + compute_dss_weights(device_local_geometry, topology, quadrature_style), ) end @@ -584,11 +586,7 @@ function compute_dss_weights(local_geometry, topology, quadrature_style) # Although the weights are defined as WJ / Σ collocated WJ, we can use J # instead of WJ if the weights are symmetric across element boundaries. dss_weights = copy(local_geometry.J) - if topology isa Topologies.IntervalTopology - is_dss_required && Topologies.dss_1d!(topology, dss_weights) - else - is_dss_required && Topologies.dss!(dss_weights, topology) - end + is_dss_required && Topologies.dss!(dss_weights, topology) @. dss_weights = local_geometry.J / dss_weights return dss_weights end diff --git a/src/MatrixFields/matrix_multiplication.jl b/src/MatrixFields/matrix_multiplication.jl index 8ad5cb3395..5df6eb9a83 100644 --- a/src/MatrixFields/matrix_multiplication.jl +++ b/src/MatrixFields/matrix_multiplication.jl @@ -201,6 +201,8 @@ This means that we can express the bounds on the interior values of ``i`` as struct MultiplyColumnwiseBandMatrixField <: Operators.FiniteDifferenceOperator end const ⋅ = MultiplyColumnwiseBandMatrixField() +Operators.strip_space(op::MultiplyColumnwiseBandMatrixField, _) = op + struct TopLeftMatrixCorner <: Operators.AbstractBoundaryCondition end struct BottomRightMatrixCorner <: Operators.AbstractBoundaryCondition end diff --git a/src/MatrixFields/operator_matrices.jl b/src/MatrixFields/operator_matrices.jl index 7eaa097968..e87f098b3d 100644 --- a/src/MatrixFields/operator_matrices.jl +++ b/src/MatrixFields/operator_matrices.jl @@ -90,6 +90,9 @@ function FDOperatorMatrix(op::O) where {O} return FDOperatorMatrix{O}(op) end +Operators.strip_space(op::FDOperatorMatrix, parent_space) = + FDOperatorMatrix(Operators.strip_space(op.op, parent_space)) + struct LazyOneArgFDOperatorMatrix{O <: OneArgFDOperator} <: AbstractLazyOperator op::O end diff --git a/src/Operators/finitedifference.jl b/src/Operators/finitedifference.jl index 2efae3f784..861004f7bb 100644 --- a/src/Operators/finitedifference.jl +++ b/src/Operators/finitedifference.jl @@ -80,6 +80,10 @@ Subtypes should define: """ abstract type AbstractBoundaryCondition end +strip_space(bc::AbstractBoundaryCondition, parent_space) = + hasproperty(bc, :val) ? + unionall_type(typeof(bc))(strip_space(bc.val, parent_space)) : bc + """ NullBoundaryCondition() @@ -204,6 +208,12 @@ has_boundary( ::RightBoundaryWindow{name}, ) where {name} = hasproperty(op.bcs, name) +strip_space(op::FiniteDifferenceOperator, parent_space) = + unionall_type(typeof(op))( + NamedTuple{keys(op.bcs)}( + strip_space_args(values(op.bcs), parent_space), + ), + ) abstract type AbstractStencilStyle <: Fields.AbstractFieldStyle end @@ -272,7 +282,15 @@ function Base.Broadcast.instantiate( return Base.Broadcast.Broadcasted{Style}(bc.f, args, axes) end - +function strip_space(sbc::StencilBroadcasted{Style}, parent_space) where {Style} + current_space = axes(sbc) + new_space = placeholder_space(current_space, parent_space) + return StencilBroadcasted{Style}( + strip_space(sbc.op, current_space), + strip_space_args(sbc.args, current_space), + new_space, + ) +end """ return_eltype(::Op, fields...) @@ -1356,6 +1374,11 @@ struct MonotoneLocalExtrema <: LimiterConstraint end LinVanLeerC2F(; constraint, kwargs...) = LinVanLeerC2F(NamedTuple(kwargs), constraint) +strip_space(op::LinVanLeerC2F, parent_space) = LinVanLeerC2F( + NamedTuple{keys(op.bcs)}(strip_space_args(values(op.bcs), parent_space)), + op.constraint, +) + return_eltype(::LinVanLeerC2F, V, A, dt) = Geometry.Contravariant3Vector{eltype(eltype(V))} @@ -3802,16 +3825,6 @@ function _threaded_copyto!(field_out::Field, bc, Ni::Int, Nj::Int, Nh::Int) return field_out end -function strip_space(bc::StencilBroadcasted{Style}, parent_space) where {Style} - current_space = axes(bc) - new_space = placeholder_space(current_space, parent_space) - return StencilBroadcasted{Style}( - bc.op, - strip_space_args(bc.args, current_space), - new_space, - ) -end - function Base.copyto!( field_out::Field, bc::Union{ diff --git a/src/Operators/operator2stencil.jl b/src/Operators/operator2stencil.jl index 111a7e4f25..9a4715e126 100644 --- a/src/Operators/operator2stencil.jl +++ b/src/Operators/operator2stencil.jl @@ -53,6 +53,9 @@ struct Operator2Stencil{O <: FiniteDifferenceOperator} <: op::O end +strip_space(op::Operator2Stencil, parent_space) = + Operator2Stencil(strip_space(op.op, parent_space)) + extrapolation_increases_bandwidth_error(op_type::Type) = throw( ArgumentError( "Operator2Stencil currently only supports operators whose stencils \ diff --git a/src/Operators/pointwisestencil.jl b/src/Operators/pointwisestencil.jl index 4060e5e335..ace5897fb3 100644 --- a/src/Operators/pointwisestencil.jl +++ b/src/Operators/pointwisestencil.jl @@ -285,6 +285,8 @@ Bandwidths of Matrix-Matrix Product abstract type PointwiseStencilOperator <: FiniteDifferenceOperator end +strip_space(op::PointwiseStencilOperator, _) = op + struct LeftStencilBoundary <: AbstractBoundaryCondition end struct RightStencilBoundary <: AbstractBoundaryCondition end diff --git a/src/Spaces/dss.jl b/src/Spaces/dss.jl index 96caa127f5..2f0ab42c60 100644 --- a/src/Spaces/dss.jl +++ b/src/Spaces/dss.jl @@ -12,7 +12,7 @@ import ..Topologies: fill_send_buffer!, load_from_recv_buffer!, DSSTypesAll, - DSSDataTypes, + DSSTypes2D, DSSPerimeterTypes @@ -102,7 +102,7 @@ function weighted_dss_prepare!(data, space, dss_buffer::Nothing) end function weighted_dss_prepare!( - data::DSSDataTypes, + data::DSSTypes2D, space::Union{ Spaces.SpectralElementSpace2D, Spaces.ExtrudedFiniteDifferenceSpace, @@ -169,7 +169,7 @@ representative ghost vertices which store result of "ghost local" DSS are loaded 4). Start DSS communication with neighboring processes """ function weighted_dss_start!( - data::DSSDataTypes, + data::DSSTypes2D, space::Union{ Spaces.SpectralElementSpace2D, Spaces.ExtrudedFiniteDifferenceSpace, @@ -227,15 +227,16 @@ function weighted_dss_internal!( ) assert_same_eltype(data, dss_buffer) length(parent(data)) == 0 && return nothing + device = ClimaComms.device(topology(hspace)) if hspace isa SpectralElementSpace1D dss_1d!( - topology(hspace), + device, data, + topology(hspace), local_geometry_data(space), dss_weights(space), ) else - device = ClimaComms.device(topology(hspace)) dss_transform!( device, dss_buffer, @@ -303,7 +304,7 @@ weighted_dss_ghost!( function weighted_dss_ghost!( - data::DSSDataTypes, + data::DSSTypes2D, space::Union{AbstractSpectralElementSpace, ExtrudedFiniteDifferenceSpace}, hspace::SpectralElementSpace2D, dss_buffer::DSSBuffer, diff --git a/src/Spaces/extruded.jl b/src/Spaces/extruded.jl index ced77f28d7..cd9e6e88f5 100644 --- a/src/Spaces/extruded.jl +++ b/src/Spaces/extruded.jl @@ -220,15 +220,10 @@ column(space::ExtrudedFiniteDifferenceSpace, i, j, h) = column(space::ExtrudedFiniteDifferenceSpace, i, h) = column(space, Grids.ColumnIndex((i,), h)) -level(space::CenterExtrudedFiniteDifferenceSpace2D, v::Integer) = +level(space::ExtrudedFiniteDifferenceSpace, v) = + space isa ExtrudedFiniteDifferenceSpace3D ? + SpectralElementSpace2D(level(grid(space), v)) : SpectralElementSpace1D(level(grid(space), v)) -level(space::FaceExtrudedFiniteDifferenceSpace2D, v::PlusHalf) = - SpectralElementSpace1D(level(grid(space), v)) -level(space::CenterExtrudedFiniteDifferenceSpace3D, v::Integer) = - SpectralElementSpace2D(level(grid(space), v)) -level(space::FaceExtrudedFiniteDifferenceSpace3D, v::PlusHalf) = - SpectralElementSpace2D(level(grid(space), v)) - nlevels(space::ExtrudedFiniteDifferenceSpace) = size(local_geometry_data(space), 4) diff --git a/src/Topologies/dss.jl b/src/Topologies/dss.jl index 0b2c389d38..35fbd5e1d7 100644 --- a/src/Topologies/dss.jl +++ b/src/Topologies/dss.jl @@ -1,21 +1,14 @@ using .DataLayouts: CartesianFieldIndex -const DSSTypesAll = Union{ - DataLayouts.IFH, - DataLayouts.IHF, - DataLayouts.VIFH, - DataLayouts.VIHF, - DataLayouts.IJFH, - DataLayouts.IJHF, - DataLayouts.VIJFH, - DataLayouts.VIJHF, -} -const DSSDataTypes = Union{ +const DSSTypes1D = + Union{DataLayouts.IFH, DataLayouts.IHF, DataLayouts.VIFH, DataLayouts.VIHF} +const DSSTypes2D = Union{ DataLayouts.IJFH, DataLayouts.IJHF, DataLayouts.VIJFH, DataLayouts.VIJHF, } +const DSSTypesAll = Union{DSSTypes1D, DSSTypes2D} const DSSPerimeterTypes = Union{DataLayouts.VIFH, DataLayouts.VIHF} """ @@ -68,10 +61,10 @@ end Creates a [`DSSBuffer`](@ref) for the field data corresponding to `data` """ create_dss_buffer( - data::DSSDataTypes, + data::DSSTypes2D, topology::Topology2D, - local_geometry::Union{DSSDataTypes, Nothing} = nothing, - dss_weights::Union{DSSDataTypes, Nothing} = nothing, + local_geometry::Union{DSSTypes2D, Nothing} = nothing, + dss_weights::Union{DSSTypes2D, Nothing} = nothing, ) = create_dss_buffer( data, topology, @@ -81,11 +74,11 @@ create_dss_buffer( ) function create_dss_buffer( - data::DSSDataTypes, + data::DSSTypes2D, topology::Topology2D, ::Type{PerimeterLayout}, - local_geometry::Union{DSSDataTypes, Nothing} = nothing, - dss_weights::Union{DSSDataTypes, Nothing} = nothing, + local_geometry::Union{DSSTypes2D, Nothing} = nothing, + dss_weights::Union{DSSTypes2D, Nothing} = nothing, ) where {PerimeterLayout} S = eltype(data) Nij = DataLayouts.get_Nij(data) @@ -210,9 +203,9 @@ Part of [`ClimaCore.Spaces.weighted_dss!`](@ref). function dss_transform!( device::ClimaComms.AbstractDevice, dss_buffer::DSSBuffer, - data::DSSDataTypes, - local_geometry::DSSDataTypes, - dss_weights::DSSDataTypes, + data::DSSTypes2D, + local_geometry::DSSTypes2D, + dss_weights::DSSTypes2D, perimeter::Perimeter2D, localelems::AbstractVector{Int}, ) @@ -279,10 +272,10 @@ Part of [`ClimaCore.Spaces.weighted_dss!`](@ref). function dss_transform!( ::ClimaComms.AbstractCPUDevice, perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Perimeter2D{Nq}, - local_geometry::DSSDataTypes, - dss_weights::DSSDataTypes, + local_geometry::DSSTypes2D, + dss_weights::DSSTypes2D, localelems::Vector{Int}, ) where {Nq} (_, _, _, nlevels, _) = DataLayouts.universal_size(perimeter_data) @@ -331,8 +324,8 @@ Part of [`ClimaCore.Spaces.weighted_dss!`](@ref). function dss_untransform!( device::ClimaComms.AbstractDevice, dss_buffer::DSSBuffer, - data::DSSDataTypes, - local_geometry::DSSDataTypes, + data::DSSTypes2D, + local_geometry::DSSTypes2D, perimeter::Perimeter2D, localelems::AbstractVector{Int}, ) @@ -373,8 +366,8 @@ Part of [`ClimaCore.Spaces.weighted_dss!`](@ref). function dss_untransform!( ::ClimaComms.AbstractCPUDevice, perimeter_data::DSSPerimeterTypes, - data::DSSDataTypes, - local_geometry::DSSDataTypes, + data::DSSTypes2D, + local_geometry::DSSTypes2D, perimeter::Perimeter2D, localelems::Vector{Int}, ) @@ -397,7 +390,7 @@ end function dss_load_perimeter_data!( ::ClimaComms.AbstractCPUDevice, dss_buffer::DSSBuffer, - data::DSSDataTypes, + data::DSSTypes2D, perimeter::Perimeter2D, ) (; perimeter_data) = dss_buffer @@ -414,7 +407,7 @@ end function dss_unload_perimeter_data!( ::ClimaComms.AbstractCPUDevice, - data::DSSDataTypes, + data::DSSTypes2D, dss_buffer::DSSBuffer, perimeter::Perimeter2D, ) @@ -684,7 +677,14 @@ end Computed unweighted/pure DSS of `data`. """ -function dss!(data::DSSDataTypes, topology::Topology2D) +function dss!(data::DSSTypes1D, topology::IntervalTopology) + Nij = DataLayouts.get_Nij(data) + length(parent(data)) == 0 && return nothing + device = ClimaComms.device(topology) + dss_1d!(device, data, topology) + return nothing +end +function dss!(data::DSSTypes2D, topology::Topology2D) Nij = DataLayouts.get_Nij(data) length(parent(data)) == 0 && return nothing device = ClimaComms.device(topology) @@ -713,52 +713,23 @@ function dss!(data::DSSDataTypes, topology::Topology2D) end function dss_1d!( - htopology::AbstractTopology, - data, - local_geometry_data = nothing, + ::ClimaComms.AbstractCPUDevice, + data::DSSTypes1D, + topology::IntervalTopology, + local_geometry = nothing, dss_weights = nothing, ) - Nq = size(data, 1) - Nv = size(data, 4) - idx1 = CartesianIndex(1, 1, 1, 1, 1) - idx2 = CartesianIndex(Nq, 1, 1, 1, 1) - @inbounds for (elem1, face1, elem2, face2, reversed) in - interior_faces(htopology) - for level in 1:Nv - @assert face1 == 1 && face2 == 2 && !reversed - local_geometry_slab1 = slab(local_geometry_data, level, elem1) - weight_slab1 = slab(dss_weights, level, elem1) - data_slab1 = slab(data, level, elem1) - - local_geometry_slab2 = slab(local_geometry_data, level, elem2) - weight_slab2 = slab(dss_weights, level, elem2) - data_slab2 = slab(data, level, elem2) - val = - dss_transform( - data_slab1, - local_geometry_slab1, - weight_slab1, - idx1, - ) ⊞ dss_transform( - data_slab2, - local_geometry_slab2, - weight_slab2, - idx2, - ) - - data_slab1[idx1] = dss_untransform( - eltype(data_slab1), - val, - local_geometry_slab1, - idx1, - ) - data_slab2[idx2] = dss_untransform( - eltype(data_slab2), - val, - local_geometry_slab2, - idx2, - ) - end + T = eltype(data) + (Ni, _, _, Nv, Nh) = DataLayouts.universal_size(data) + nfaces = isperiodic(topology) ? Nh : Nh - 1 + @inbounds for left_face_elem in 1:nfaces, level in 1:Nv + right_face_elem = left_face_elem == Nh ? 1 : left_face_elem + 1 + left_idx = CartesianIndex(Ni, 1, 1, level, left_face_elem) + right_idx = CartesianIndex(1, 1, 1, level, right_face_elem) + val = + dss_transform(data, local_geometry, dss_weights, left_idx) ⊞ + dss_transform(data, local_geometry, dss_weights, right_idx) + data[left_idx] = dss_untransform(T, val, local_geometry, left_idx) + data[right_idx] = dss_untransform(T, val, local_geometry, right_idx) end - return data end diff --git a/test/DataLayouts/unit_mapreduce.jl b/test/DataLayouts/unit_mapreduce.jl index 511b92dc7b..7ee1271f00 100644 --- a/test/DataLayouts/unit_mapreduce.jl +++ b/test/DataLayouts/unit_mapreduce.jl @@ -163,6 +163,23 @@ end # data = DataLayouts.IH1JH2{S}(ArrayType{FT}, zeros; Nij); test_mapreduce_2!(context, data_view(data)) # TODO: test end +@testset "mapreduce extruded 1D data" begin + ArrayType = ClimaComms.array_type(device) + FT = Float64 + S = Tuple{FT, FT} + Nv = 4 + Ni = 3 + Nh = 5 + data = IFH{S}(ArrayType{FT}, zeros; Ni, Nh) + test_mapreduce_2!(context, data) + data = IHF{S}(ArrayType{FT}, zeros; Ni, Nh) + test_mapreduce_2!(context, data) + data = VIFH{S}(ArrayType{FT}, zeros; Nv, Ni, Nh) + test_mapreduce_2!(context, data) + data = VIHF{S}(ArrayType{FT}, zeros; Nv, Ni, Nh) + test_mapreduce_2!(context, data) +end + @testset "mapreduce with space with some non-round blocks" begin # https://github.com/CliMA/ClimaCore.jl/issues/2097 space = ClimaCore.CommonSpaces.RectangleXYSpace(; diff --git a/test/Operators/hybrid/unit_2d.jl b/test/Operators/hybrid/unit_2d.jl index c0432afc82..3ee66c0e80 100644 --- a/test/Operators/hybrid/unit_2d.jl +++ b/test/Operators/hybrid/unit_2d.jl @@ -33,8 +33,10 @@ end face_field = sin.(Fields.coordinate_field(hv_face_space).z) for npoints in (3, 8) - M_center = Operators.matrix_interpolate(center_field, npoints) - M_face = Operators.matrix_interpolate(face_field, npoints) + center_field_cpu = Adapt.adapt(Array, center_field) + face_field_cpu = Adapt.adapt(Array, face_field) + M_center = Operators.matrix_interpolate(center_field_cpu, npoints) + M_face = Operators.matrix_interpolate(face_field_cpu, npoints) @test size(M_center) == (10, 10 * npoints) @test size(M_face) == (10 + 1, 10 * npoints) end diff --git a/test/Operators/hybrid/utils_2d.jl b/test/Operators/hybrid/utils_2d.jl index 8ff92e7cdc..b8f508adb3 100644 --- a/test/Operators/hybrid/utils_2d.jl +++ b/test/Operators/hybrid/utils_2d.jl @@ -15,6 +15,7 @@ using Test using StaticArrays, IntervalSets, LinearAlgebra +import Adapt import ClimaComms ClimaComms.@import_required_backends diff --git a/test/Operators/unit_operators_examples.jl b/test/Operators/unit_operators_examples.jl index 534b94855a..3770fb84fc 100644 --- a/test/Operators/unit_operators_examples.jl +++ b/test/Operators/unit_operators_examples.jl @@ -57,21 +57,12 @@ grid = Grids.ExtrudedFiniteDifferenceGrid( ) center_space = Spaces.CenterExtrudedFiniteDifferenceSpace(grid) face_space = Spaces.FaceExtrudedFiniteDifferenceSpace(grid) - -# Create fields and show that it fails ᶜgradᵥ = Operators.GradientF2C() level_field = Fields.level(Fields.Field(Float64, face_space), Utilities.half) ᶠscalar_field = Fields.Field(Float64, face_space) -# Does not work: -using Test -@testset "Broken broadcast expression on GPUs" begin - if device isa ClimaComms.CUDADevice - @test_broken begin - @. ᶜgradᵥ(level_field + ᶠscalar_field) - end - else - @. ᶜgradᵥ(level_field + ᶠscalar_field) - end -end +using Test +@testset "Example broadcast expression on GPUs" begin + @test !isnothing(@. ᶜgradᵥ(level_field + ᶠscalar_field)) +end # Formerly broken; fixed in CliMA/ClimaCore.jl#2172 diff --git a/test/Spaces/opt_spaces.jl b/test/Spaces/opt_spaces.jl index 192b956bae..7003c9deb5 100644 --- a/test/Spaces/opt_spaces.jl +++ b/test/Spaces/opt_spaces.jl @@ -35,7 +35,7 @@ end #! format: off if ClimaComms.device(context) isa ClimaComms.CUDADevice test_n_failures(91, TU.PointSpace, context) - test_n_failures(141, TU.SpectralElementSpace1D, context) + test_n_failures(825, TU.SpectralElementSpace1D, context) test_n_failures(1141, TU.SpectralElementSpace2D, context) test_n_failures(3, TU.ColumnCenterFiniteDifferenceSpace, context) test_n_failures(4, TU.ColumnFaceFiniteDifferenceSpace, context) @@ -44,7 +44,7 @@ end test_n_failures(1146, TU.FaceExtrudedFiniteDifferenceSpace, context) else test_n_failures(0, TU.PointSpace, context) - test_n_failures(137, TU.SpectralElementSpace1D, context) + test_n_failures(150, TU.SpectralElementSpace1D, context) test_n_failures(310, TU.SpectralElementSpace2D, context) test_n_failures(4, TU.ColumnCenterFiniteDifferenceSpace, context) test_n_failures(5, TU.ColumnFaceFiniteDifferenceSpace, context) diff --git a/test/Spaces/unit_spaces.jl b/test/Spaces/unit_spaces.jl index 4a691db084..b1f2e91d43 100644 --- a/test/Spaces/unit_spaces.jl +++ b/test/Spaces/unit_spaces.jl @@ -53,13 +53,14 @@ on_gpu = ClimaComms.device() isa ClimaComms.CUDADevice @test eltype(coord_data) == Geometry.XPoint{Float64} @test DataLayouts.farray_size(Spaces.coordinates_data(space)) == (4, 1, 1) - coord_slab = slab(Spaces.coordinates_data(space), 1) + coord_slab = Adapt.adapt(Array, slab(Spaces.coordinates_data(space), 1)) @test coord_slab[slab_index(1)] == Geometry.XPoint{FT}(-3) @test typeof(coord_slab[slab_index(4)]) == Geometry.XPoint{FT} @test coord_slab[slab_index(4)].x ≈ FT(5) - local_geometry_slab = slab(Spaces.local_geometry_data(space), 1) - dss_weights_slab = slab(space.grid.dss_weights, 1) + local_geometry_slab = + Adapt.adapt(Array, slab(Spaces.local_geometry_data(space), 1)) + dss_weights_slab = Adapt.adapt(Array, slab(space.grid.dss_weights, 1)) for i in 1:4 @test Geometry.components(local_geometry_slab[slab_index(i)].∂x∂ξ) ≈ @@ -79,8 +80,8 @@ on_gpu = ClimaComms.device() isa ClimaComms.CUDADevice point_space = Spaces.column(space, 1, 1) @test point_space isa Spaces.PointSpace - @test Spaces.coordinates_data(point_space)[] == - Spaces.column(coord_data, 1, 1)[] + @test parent(Spaces.coordinates_data(point_space)) == + parent(Spaces.column(coord_data, 1, 1)) @test Spaces.local_geometry_type(typeof(point_space)) <: Geometry.LocalGeometry end From 6ab800c9de3409a95e284f9d88caf80658362710 Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Tue, 4 Feb 2025 23:35:19 -0800 Subject: [PATCH 2/3] Additional bugfixes --- ext/cuda/remapping_distributed.jl | 8 +++----- src/Hypsography/Hypsography.jl | 28 +++++++++++++++------------- 2 files changed, 18 insertions(+), 18 deletions(-) diff --git a/ext/cuda/remapping_distributed.jl b/ext/cuda/remapping_distributed.jl index 4d9c1f6625..9658e4e458 100644 --- a/ext/cuda/remapping_distributed.jl +++ b/ext/cuda/remapping_distributed.jl @@ -130,9 +130,7 @@ function set_interpolated_values_kernel!( out[i, j, k] = 0 for t in 1:Nq out[i, j, k] += - I[i, t] * - I[i, s] * - ( + I[i, t] * ( A * field_values[k][CI(t, 1, 1, v_lo, h)] + B * field_values[k][CI(t, 1, 1, v_hi, h)] ) @@ -275,9 +273,9 @@ function set_interpolated_values_kernel!( h = local_horiz_indices[i] out[i, k] = 0 - for t in 1:Nq, s in 1:Nq + for t in 1:Nq out[i, k] += - I[i, i] * field_values[k][CartesianIndex(t, 1, 1, 1, h)] + I[i, t] * field_values[k][CartesianIndex(t, 1, 1, 1, h)] end end return nothing diff --git a/src/Hypsography/Hypsography.jl b/src/Hypsography/Hypsography.jl index a8a6949bb8..d007d7eb74 100644 --- a/src/Hypsography/Hypsography.jl +++ b/src/Hypsography/Hypsography.jl @@ -77,10 +77,12 @@ end Locate the levels by linear interpolation between the surface field and the top of the domain, using the method of [GalChen1975](@cite). """ -struct LinearAdaption{F <: Fields.Field} <: HypsographyAdaption +struct LinearAdaption{F} <: HypsographyAdaption surface::F end +strip_field(::LinearAdaption) = LinearAdaption(nothing) + Adapt.adapt_structure(to, adaption::LinearAdaption) = LinearAdaption(Adapt.adapt(to, adaption.surface)) @@ -114,20 +116,19 @@ bounds represent the domain bottom and top respectively. `s` governs the decay r If the decay-scale is poorly specified (i.e., `s * zₜ` is lower than the maximum surface elevation), a warning is thrown and `s` is adjusted such that it `szₜ > maximum(z_surface)`. """ -struct SLEVEAdaption{F <: Fields.Field, FT <: Real} <: HypsographyAdaption +struct SLEVEAdaption{F, FT} <: HypsographyAdaption surface::F ηₕ::FT s::FT - function SLEVEAdaption( - surface::Fields.Field, - ηₕ::FT, - s::FT, - ) where {FT <: Real} - @assert 0 <= ηₕ <= 1 - @assert s >= 0 - new{typeof(surface), FT}(surface, ηₕ, s) - end end +function SLEVEAdaption(surface, ηₕ, s) + @assert 0 <= ηₕ <= 1 + @assert s >= 0 + SLEVEAdaption{typeof(surface), FT}(surface, ηₕ, s) +end + +strip_field(adaption::SLEVEAdaption) = + SLEVEAdaption(nothing, adaption.ηₕ, adaption.s) Adapt.adapt_structure(to, adaption::SLEVEAdaption) = SLEVEAdaption(Adapt.adapt(to, adaption.surface), adaption.ηₕ, adaption.s) @@ -180,10 +181,11 @@ function _ExtrudedFiniteDifferenceGrid( vertical_domain = Topologies.domain(vertical_grid) z_top = vertical_domain.coord_max + adaption_ref = Ref(strip_field(adaption)) center_z = - ref_z_to_physical_z.(Ref(adaption), center_z_ref, z_surface, Ref(z_top)) + ref_z_to_physical_z.(adaption_ref, center_z_ref, z_surface, Ref(z_top)) face_z = - ref_z_to_physical_z.(Ref(adaption), face_z_ref, z_surface, Ref(z_top)) + ref_z_to_physical_z.(adaption_ref, face_z_ref, z_surface, Ref(z_top)) return _ExtrudedFiniteDifferenceGrid( horizontal_grid, From 128d4d16205798706bf3b1470b836c755814ec3d Mon Sep 17 00:00:00 2001 From: Dennis Yatunin Date: Thu, 6 Feb 2025 18:24:03 -0800 Subject: [PATCH 3/3] Address reviewer comments --- .buildkite/pipeline.yml | 2 - src/Grids/spectralelement.jl | 7 +- src/Hypsography/Hypsography.jl | 75 ++++---- src/Quadratures/Quadratures.jl | 8 + src/Remapping/interpolate_array.jl | 98 ----------- src/Spaces/dss.jl | 9 +- src/Topologies/dss.jl | 5 +- test/Hypsography/2d.jl | 16 +- test/Remapping/distributed_remapping.jl | 223 ++++++++++++------------ 9 files changed, 163 insertions(+), 280 deletions(-) diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index 3221c902c8..e50f3a6109 100755 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -358,8 +358,6 @@ steps: - label: "Unit: distributed remapping with CUDA (1 process)" key: distributed_remapping_gpu_1proc command: "julia --color=yes --check-bounds=yes --project=.buildkite test/Remapping/distributed_remapping.jl" - env: - CLIMACOMMS_DEVICE: "CUDA" env: CLIMACOMMS_DEVICE: "CUDA" agents: diff --git a/src/Grids/spectralelement.jl b/src/Grids/spectralelement.jl index deae263d6c..26a99c9e48 100644 --- a/src/Grids/spectralelement.jl +++ b/src/Grids/spectralelement.jl @@ -580,13 +580,12 @@ function compute_surface_geometry( end function compute_dss_weights(local_geometry, topology, quadrature_style) - is_dss_required = - Quadratures.unique_degrees_of_freedom(quadrature_style) < - Quadratures.degrees_of_freedom(quadrature_style) + Quadratures.requires_dss(quadrature_style) || return nothing + # Although the weights are defined as WJ / Σ collocated WJ, we can use J # instead of WJ if the weights are symmetric across element boundaries. dss_weights = copy(local_geometry.J) - is_dss_required && Topologies.dss!(dss_weights, topology) + Topologies.dss!(dss_weights, topology) @. dss_weights = local_geometry.J / dss_weights return dss_weights end diff --git a/src/Hypsography/Hypsography.jl b/src/Hypsography/Hypsography.jl index d007d7eb74..3d22174c85 100644 --- a/src/Hypsography/Hypsography.jl +++ b/src/Hypsography/Hypsography.jl @@ -24,7 +24,7 @@ using StaticArrays, LinearAlgebra """ - ref_z_to_physical_z(adaption::HypsographyAdaption, z_ref::ZPoint, z_surface::ZPoint, z_top::ZPoint) :: ZPoint + ref_z_to_physical_z(adaption::HypsographyAdaption, z_ref::ZPoint, z_top::ZPoint) :: ZPoint Convert reference `z`s to physical `z`s as prescribed by the given adaption. @@ -33,12 +33,11 @@ This function has to be the inverse of `physical_z_to_ref_z`. function ref_z_to_physical_z( adaption::HypsographyAdaption, z_ref::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) end """ - physical_z_to_ref_z(adaption::HypsographyAdaption, z_ref::ZPoint, z_surface::ZPoint, z_top::ZPoint) :: ZPoint + physical_z_to_ref_z(adaption::HypsographyAdaption, z_ref::ZPoint, z_top::ZPoint) :: ZPoint Convert physical `z`s to reference `z`s as prescribed by the given adaption. @@ -47,7 +46,6 @@ This function has to be the inverse of `ref_z_to_physical_z`. function physical_z_to_ref_z( adaption::HypsographyAdaption, z_phys::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) end @@ -56,7 +54,6 @@ function physical_z_to_ref_z( function ref_z_to_physical_z( ::Flat, z_ref::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) return z_ref @@ -65,54 +62,55 @@ end function physical_z_to_ref_z( ::Flat, z_physical::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) return z_physical end """ - LinearAdaption(surface::Field) + LinearAdaption(surface) -Locate the levels by linear interpolation between the surface field and the top -of the domain, using the method of [GalChen1975](@cite). +Locate the levels by linear interpolation between the surface and the top of the +domain, using the method of [GalChen1975](@cite). The surface can be specified +as a `ZPoint` or a `Field` of `ZPoint`s. """ struct LinearAdaption{F} <: HypsographyAdaption surface::F end -strip_field(::LinearAdaption) = LinearAdaption(nothing) - Adapt.adapt_structure(to, adaption::LinearAdaption) = LinearAdaption(Adapt.adapt(to, adaption.surface)) # This method is invoked by the ExtrudedFiniteDifferenceGrid constructor function ref_z_to_physical_z( - ::LinearAdaption, + adaption::LinearAdaption, z_ref::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) - Geometry.ZPoint(z_ref.z + (1 - z_ref.z / z_top.z) * z_surface.z) + Geometry.ZPoint(z_ref.z + (1 - z_ref.z / z_top.z) * adaption.surface.z) end # This method is used for remapping function physical_z_to_ref_z( - ::LinearAdaption, + adaption::LinearAdaption, z_physical::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) - Geometry.ZPoint((z_physical.z - z_surface.z) / (1 - z_surface.z / z_top.z)) + Geometry.ZPoint( + (z_physical.z - adaption.surface.z) / + (1 - adaption.surface.z / z_top.z), + ) end """ - SLEVEAdaption(surface::Field, ηₕ::FT, s::FT) + SLEVEAdaption(surface, ηₕ, s) + +Locate vertical levels using an exponential function between the surface and the +top of the domain, using the method of [Schar2002](@cite). The surface can be +specified as a `ZPoint` or a `Field` of `ZPoint`s. -Locate vertical levels using an exponential function between the surface field and the top -of the domain, using the method of [Schar2002](@cite). This method is modified -such no warping is applied above some user defined parameter 0 ≤ ηₕ < 1.0, where the lower and upper -bounds represent the domain bottom and top respectively. `s` governs the decay rate. +This method is modified such no warping is applied above the generalized +coordinate `ηₕ`, where `0 ≤ ηₕ < 1`. `s` governs the decay rate. If the decay-scale is poorly specified (i.e., `s * zₜ` is lower than the maximum surface elevation), a warning is thrown and `s` is adjusted such that it `szₜ > maximum(z_surface)`. """ @@ -121,14 +119,6 @@ struct SLEVEAdaption{F, FT} <: HypsographyAdaption ηₕ::FT s::FT end -function SLEVEAdaption(surface, ηₕ, s) - @assert 0 <= ηₕ <= 1 - @assert s >= 0 - SLEVEAdaption{typeof(surface), FT}(surface, ηₕ, s) -end - -strip_field(adaption::SLEVEAdaption) = - SLEVEAdaption(nothing, adaption.ηₕ, adaption.s) Adapt.adapt_structure(to, adaption::SLEVEAdaption) = SLEVEAdaption(Adapt.adapt(to, adaption.surface), adaption.ηₕ, adaption.s) @@ -136,11 +126,12 @@ Adapt.adapt_structure(to, adaption::SLEVEAdaption) = function ref_z_to_physical_z( adaption::SLEVEAdaption, z_ref::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) - (; ηₕ, s) = adaption - if s * z_top.z <= z_surface.z + (; surface, ηₕ, s) = adaption + @assert 0 <= ηₕ <= 1 + @assert s >= 0 + if s * z_top.z <= adaption.surface.z error("Decay scale (s*z_top) must be higher than max surface elevation") end @@ -148,7 +139,7 @@ function ref_z_to_physical_z( if η <= ηₕ return Geometry.ZPoint( η * z_top.z + - z_surface.z * (sinh((ηₕ - η) / s / ηₕ)) / (sinh(1 / s)), + adaption.surface.z * (sinh((ηₕ - η) / s / ηₕ)) / (sinh(1 / s)), ) else return Geometry.ZPoint(η * z_top.z) @@ -158,12 +149,17 @@ end function physical_z_to_ref_z( adaption::SLEVEAdaption, z_physical::Geometry.ZPoint, - z_surface::Geometry.ZPoint, z_top::Geometry.ZPoint, ) error("This method is not implemented") end +function lazy_data_broadcast(adaption::T) where {T} + n_args = Val(fieldcount(T)) + data_args = ntuple(i -> Fields.todata(getfield(adaption, i)), n_args) + return Base.Broadcast.broadcasted(Operators.unionall_type(T), data_args...) +end # Should this be defined in Fields? It can also be extended to nested structs. + # can redefine this constructor for e.g. multi-arg SLEVE function _ExtrudedFiniteDifferenceGrid( horizontal_grid::Grids.AbstractGrid, @@ -172,7 +168,6 @@ function _ExtrudedFiniteDifferenceGrid( global_geometry::Geometry.AbstractGlobalGeometry, ) @assert Spaces.grid(axes(adaption.surface)) == horizontal_grid - z_surface = Fields.field_values(adaption.surface) center_z_ref = Grids.local_geometry_data(vertical_grid, Grids.CellCenter()).coordinates @@ -181,11 +176,9 @@ function _ExtrudedFiniteDifferenceGrid( vertical_domain = Topologies.domain(vertical_grid) z_top = vertical_domain.coord_max - adaption_ref = Ref(strip_field(adaption)) - center_z = - ref_z_to_physical_z.(adaption_ref, center_z_ref, z_surface, Ref(z_top)) - face_z = - ref_z_to_physical_z.(adaption_ref, face_z_ref, z_surface, Ref(z_top)) + adaption_data = lazy_data_broadcast(adaption) + center_z = ref_z_to_physical_z.(adaption_data, center_z_ref, Ref(z_top)) + face_z = ref_z_to_physical_z.(adaption_data, face_z_ref, Ref(z_top)) return _ExtrudedFiniteDifferenceGrid( horizontal_grid, diff --git a/src/Quadratures/Quadratures.jl b/src/Quadratures/Quadratures.jl index 9cb2e053ea..66a58b335c 100644 --- a/src/Quadratures/Quadratures.jl +++ b/src/Quadratures/Quadratures.jl @@ -33,6 +33,14 @@ Returns the degrees_of_freedom of the `QuadratureStyle` concrete type """ @inline degrees_of_freedom(::QuadratureStyle{Nq}) where {Nq} = Nq +""" + requires_dss(QuadratureStyle) + +Determines whether the `QuadratureStyle` requires direct stiffness summation. +""" +requires_dss(quadstyle) = + unique_degrees_of_freedom(quadstyle) < degrees_of_freedom(quadstyle) + """ points, weights = quadrature_points(::Type{FT}, quadrature_style) diff --git a/src/Remapping/interpolate_array.jl b/src/Remapping/interpolate_array.jl index 67d41df3d8..c339a9cdd4 100644 --- a/src/Remapping/interpolate_array.jl +++ b/src/Remapping/interpolate_array.jl @@ -363,101 +363,3 @@ function interpolation_weights( WI1 = Quadratures.interpolation_matrix(SVector(ξ1), quad_points) return (WI1,) end - -""" - interpolate_column(field, zpts, weights, gidx) - -Interpolate the given `field` on the given points assuming the given interpolation_matrix -and global index in the topology. - -The coefficients `weights` are computed with `Quadratures.interpolation_matrix`. -See also `interpolate_array`. - -Keyword arguments -================== - -- `physical_z`: When `true`, the given `zpts` are interpreted as "from mean sea - level" (instead of "from surface") and hypsography is - interpolated. `NaN`s are returned for values that are below the - surface. - -""" -function interpolate_column( - field::Fields.ExtrudedFiniteDifferenceField, - zpts, - Is, - gidx; - physical_z = false, - fill_value = Spaces.undertype(axes(field))(NaN), -) - space = axes(field) - - # When we don't have hypsography, there is no notion of "interpolating hypsography". In - # this case, the reference vertical points coincide with the physical ones. Setting - # physical_z = false ensures that zpts_ref = zpts - hypsography = Spaces.grid(space).hypsography - if hypsography isa Grids.Flat - physical_z = false - end - - output_array = zeros(Spaces.undertype(space), length(zpts)) - - # If we have physical_z, we have to move the z coordinates from physical to reference - # ones. We also have to deal with the fact that the output_array is not going to be - # fully obtained through interpolation, and the head of it will be simply a collection - # of `fill_values` (up to the surface). When we have topography, we compute the - # reference z and take the positive ones (above the surface). Then, we the top of - # `output_array` with `fill_value` and the rest with interpolated ones. To achieve this, - # we have to make sure that we are passing around views of the same array (as opposed to - # copied of it). - if physical_z - FT = Spaces.undertype(axes(field)) - - # interpolate_slab! takes a vector - z_surface = [zero(FT)] - interpolate_slab!( - z_surface, - hypsography.surface.z, - [Fields.SlabIndex(nothing, gidx)], - [Is], - ) - z_surface = z_surface[] - z_top = Spaces.z_max(space) - - all_zpts_ref = [ - Hypsography.physical_z_to_ref_z( - hypsography, - z_ref, - Geometry.ZPoint(z_surface), - Geometry.ZPoint(z_top), - ) for z_ref in zpts - ] - - # Remove points below the surface - zpts_ref = [z_ref for z_ref in all_zpts_ref if z_ref.z > 0] - - # Edge case check: when zpts = zpts_ref, all the points are above the surface - num_points_below_surface = length(zpts) - length(zpts_ref) - - fill!( - (@view output_array[1:(1 + num_points_below_surface)]), - fill_value, - ) - else - zpts_ref = zpts - num_points_below_surface = 0 - end - - vertical_indices_ref_coordinates = - [vertical_indices_ref_coordinate(space, zcoord) for zcoord in zpts_ref] - - interpolate_slab_level!( - (@view output_array[(1 + num_points_below_surface):end]), - field, - gidx, - Is, - vertical_indices_ref_coordinates, - ) - - return output_array -end diff --git a/src/Spaces/dss.jl b/src/Spaces/dss.jl index 2f0ab42c60..2c117eace8 100644 --- a/src/Spaces/dss.jl +++ b/src/Spaces/dss.jl @@ -110,7 +110,6 @@ function weighted_dss_prepare!( dss_buffer::DSSBuffer, ) assert_same_eltype(data, dss_buffer) - length(parent(data)) == 0 && return nothing device = ClimaComms.device(topology(space)) hspace = horizontal_space(space) dss_transform!( @@ -176,6 +175,8 @@ function weighted_dss_start!( }, dss_buffer::DSSBuffer, ) + Quadratures.requires_dss(quadrature_style(space)) || return nothing + sizeof(eltype(data)) > 0 || return nothing device = ClimaComms.device(topology(space)) weighted_dss_prepare!(data, space, dss_buffer) cuda_synchronize(device; blocking = true) @@ -225,8 +226,9 @@ function weighted_dss_internal!( hspace::AbstractSpectralElementSpace, dss_buffer::Union{DSSBuffer, Nothing}, ) + Quadratures.requires_dss(quadrature_style(space)) || return nothing + sizeof(eltype(data)) > 0 || return nothing assert_same_eltype(data, dss_buffer) - length(parent(data)) == 0 && return nothing device = ClimaComms.device(topology(hspace)) if hspace isa SpectralElementSpace1D dss_1d!( @@ -309,9 +311,10 @@ function weighted_dss_ghost!( hspace::SpectralElementSpace2D, dss_buffer::DSSBuffer, ) + Quadratures.requires_dss(quadrature_style(space)) || return data + sizeof(eltype(data)) > 0 || return data assert_same_eltype(data, dss_buffer) ClimaComms.finish(dss_buffer.graph_context) - length(parent(data)) == 0 && return data device = ClimaComms.device(topology(hspace)) load_from_recv_buffer!(device, dss_buffer) dss_ghost!( diff --git a/src/Topologies/dss.jl b/src/Topologies/dss.jl index 35fbd5e1d7..bf2ac51099 100644 --- a/src/Topologies/dss.jl +++ b/src/Topologies/dss.jl @@ -678,15 +678,14 @@ end Computed unweighted/pure DSS of `data`. """ function dss!(data::DSSTypes1D, topology::IntervalTopology) - Nij = DataLayouts.get_Nij(data) - length(parent(data)) == 0 && return nothing + sizeof(eltype(data)) > 0 || return nothing device = ClimaComms.device(topology) dss_1d!(device, data, topology) return nothing end function dss!(data::DSSTypes2D, topology::Topology2D) + sizeof(eltype(data)) > 0 || return nothing Nij = DataLayouts.get_Nij(data) - length(parent(data)) == 0 && return nothing device = ClimaComms.device(topology) perimeter = Perimeter2D(Nij) # create dss buffer diff --git a/test/Hypsography/2d.jl b/test/Hypsography/2d.jl index 9d613654ac..0f76469c69 100644 --- a/test/Hypsography/2d.jl +++ b/test/Hypsography/2d.jl @@ -76,26 +76,16 @@ z_ref1 = Geometry.ZPoint{FT}(40.0) z_top1 = Geometry.ZPoint{FT}(400.0) z_surface1 = Geometry.ZPoint{FT}(10.0) -point_space = Spaces.PointSpace(z_surface1) -z_surface_point = Fields.coordinate_field(point_space) - @test Hypsography.physical_z_to_ref_z( Grids.Flat(), - Hypsography.ref_z_to_physical_z(Grids.Flat(), z_ref1, z_surface1, z_top1), - z_surface1, + Hypsography.ref_z_to_physical_z(Grids.Flat(), z_ref1, z_top1), z_top1, ).z ≈ z_ref1.z # Linear adaption -linear_adaption = Hypsography.LinearAdaption(z_surface_point) +linear_adaption = Hypsography.LinearAdaption(z_surface1) @test Hypsography.physical_z_to_ref_z( linear_adaption, - Hypsography.ref_z_to_physical_z( - linear_adaption, - z_ref1, - z_surface1, - z_top1, - ), - z_surface1, + Hypsography.ref_z_to_physical_z(linear_adaption, z_ref1, z_top1), z_top1, ).z ≈ z_ref1.z diff --git a/test/Remapping/distributed_remapping.jl b/test/Remapping/distributed_remapping.jl index e3c5fdc6b5..64f97f4423 100644 --- a/test/Remapping/distributed_remapping.jl +++ b/test/Remapping/distributed_remapping.jl @@ -39,110 +39,102 @@ end @test Remapping.batched_ranges(3, 2) == [1:2, 3:3] end -on_gpu = device isa ClimaComms.CUDADevice with_mpi = context isa ClimaComms.MPICommsContext -if !on_gpu - @testset "2D extruded" begin - vertdomain = Domains.IntervalDomain( - Geometry.ZPoint(0.0), - Geometry.ZPoint(1000.0); - boundary_names = (:bottom, :top), - ) +@testset "2D extruded" begin + vertdomain = Domains.IntervalDomain( + Geometry.ZPoint(0.0), + Geometry.ZPoint(1000.0); + boundary_names = (:bottom, :top), + ) - vertmesh = Meshes.IntervalMesh(vertdomain, nelems = 30) - verttopo = Topologies.IntervalTopology( - ClimaComms.SingletonCommsContext(device), - vertmesh, - ) - vert_center_space = Spaces.CenterFiniteDifferenceSpace(verttopo) + vertmesh = Meshes.IntervalMesh(vertdomain, nelems = 30) + verttopo = Topologies.IntervalTopology( + ClimaComms.SingletonCommsContext(device), + vertmesh, + ) + vert_center_space = Spaces.CenterFiniteDifferenceSpace(verttopo) - horzdomain = Domains.IntervalDomain( - Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), - periodic = true, - ) + horzdomain = Domains.IntervalDomain( + Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), + periodic = true, + ) - quad = Quadratures.GLL{4}() - horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) - horztopology = Topologies.IntervalTopology( - ClimaComms.SingletonCommsContext(device), - horzmesh, - ) - horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) + quad = Quadratures.GLL{4}() + horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) + horztopology = Topologies.IntervalTopology( + ClimaComms.SingletonCommsContext(device), + horzmesh, + ) + horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) - hv_center_space = - Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) + hv_center_space = + Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) - coords = Fields.coordinate_field(hv_center_space) + coords = Fields.coordinate_field(hv_center_space) - xpts = range(-500.0, 500.0, length = 21) - zpts = range(0.0, 1000.0, length = 21) - hcoords = [Geometry.XPoint(x) for x in xpts] - zcoords = [Geometry.ZPoint(z) for z in zpts] + xpts = range(-500.0, 500.0, length = 21) + zpts = range(0.0, 1000.0, length = 21) + hcoords = [Geometry.XPoint(x) for x in xpts] + zcoords = [Geometry.ZPoint(z) for z in zpts] - remapper = Remapping.Remapper( - hv_center_space, - hcoords, - zcoords, - buffer_length = 2, - ) + remapper = + Remapping.Remapper(hv_center_space, hcoords, zcoords, buffer_length = 2) - interp_x = Remapping.interpolate(remapper, coords.x) - interp_x2 = Remapping.interpolate(coords.x, hcoords, zcoords) - if ClimaComms.iamroot(context) - @test Array(interp_x) ≈ [x for x in xpts, z in zpts] - @test Array(interp_x2) ≈ [x for x in xpts, z in zpts] - end + interp_x = Remapping.interpolate(remapper, coords.x) + interp_x2 = Remapping.interpolate(coords.x, hcoords, zcoords) + if ClimaComms.iamroot(context) + @test Array(interp_x) ≈ [x for x in xpts, z in zpts] + @test Array(interp_x2) ≈ [x for x in xpts, z in zpts] + end - interp_z = Remapping.interpolate(remapper, coords.z) - expected_z = [z for x in xpts, z in zpts] - if ClimaComms.iamroot(context) - @test Array(interp_z[:, 2:(end - 1)]) ≈ expected_z[:, 2:(end - 1)] - @test Array(interp_z[:, 1]) ≈ - [1000.0 * (0 / 30 + 1 / 30) / 2 for x in xpts] - @test Array(interp_z[:, end]) ≈ - [1000.0 * (29 / 30 + 30 / 30) / 2 for x in xpts] - end + interp_z = Remapping.interpolate(remapper, coords.z) + expected_z = [z for x in xpts, z in zpts] + if ClimaComms.iamroot(context) + @test Array(interp_z[:, 2:(end - 1)]) ≈ expected_z[:, 2:(end - 1)] + @test Array(interp_z[:, 1]) ≈ + [1000.0 * (0 / 30 + 1 / 30) / 2 for x in xpts] + @test Array(interp_z[:, end]) ≈ + [1000.0 * (29 / 30 + 30 / 30) / 2 for x in xpts] + end - # Remapping two fields - interp_xx = Remapping.interpolate(remapper, [coords.x, coords.x]) - if ClimaComms.iamroot(context) - @test interp_x ≈ interp_xx[:, :, 1] - @test interp_x ≈ interp_xx[:, :, 2] - end + # Remapping two fields + interp_xx = Remapping.interpolate(remapper, [coords.x, coords.x]) + if ClimaComms.iamroot(context) + @test interp_x ≈ interp_xx[:, :, 1] + @test interp_x ≈ interp_xx[:, :, 2] + end - # Remapping three fields (more than the buffer length) - interp_xxx = - Remapping.interpolate(remapper, [coords.x, coords.x, coords.x]) - if ClimaComms.iamroot(context) - @test interp_x ≈ interp_xxx[:, :, 1] - @test interp_x ≈ interp_xxx[:, :, 2] - @test interp_x ≈ interp_xxx[:, :, 3] - end + # Remapping three fields (more than the buffer length) + interp_xxx = Remapping.interpolate(remapper, [coords.x, coords.x, coords.x]) + if ClimaComms.iamroot(context) + @test interp_x ≈ interp_xxx[:, :, 1] + @test interp_x ≈ interp_xxx[:, :, 2] + @test interp_x ≈ interp_xxx[:, :, 3] + end - # Remapping in-place one field - dest = ArrayType(zeros(21, 21)) - Remapping.interpolate!(dest, remapper, coords.x) - if ClimaComms.iamroot(context) - @test interp_x ≈ dest - end + # Remapping in-place one field + dest = ArrayType(zeros(21, 21)) + Remapping.interpolate!(dest, remapper, coords.x) + if ClimaComms.iamroot(context) + @test interp_x ≈ dest + end - # Two fields - dest = ArrayType(zeros(21, 21, 2)) - Remapping.interpolate!(dest, remapper, [coords.x, coords.x]) - if ClimaComms.iamroot(context) - @test interp_x ≈ dest[:, :, 1] - @test interp_x ≈ dest[:, :, 2] - end + # Two fields + dest = ArrayType(zeros(21, 21, 2)) + Remapping.interpolate!(dest, remapper, [coords.x, coords.x]) + if ClimaComms.iamroot(context) + @test interp_x ≈ dest[:, :, 1] + @test interp_x ≈ dest[:, :, 2] + end - # Three fields (more than buffer length) - dest = ArrayType(zeros(21, 21, 3)) - Remapping.interpolate!(dest, remapper, [coords.x, coords.x, coords.x]) - if ClimaComms.iamroot(context) - @test interp_x ≈ dest[:, :, 1] - @test interp_x ≈ dest[:, :, 2] - @test interp_x ≈ dest[:, :, 3] - end + # Three fields (more than buffer length) + dest = ArrayType(zeros(21, 21, 3)) + Remapping.interpolate!(dest, remapper, [coords.x, coords.x, coords.x]) + if ClimaComms.iamroot(context) + @test interp_x ≈ dest[:, :, 1] + @test interp_x ≈ dest[:, :, 2] + @test interp_x ≈ dest[:, :, 3] end end @@ -863,36 +855,35 @@ end ) # 2D slice space - if !on_gpu - horzdomain = Domains.IntervalDomain( - Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), - periodic = true, - ) - quad = Quadratures.GLL{4}() - horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) - horztopology = Topologies.IntervalTopology( - ClimaComms.SingletonCommsContext(device), - horzmesh, - ) - horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) - hv_center_space = - Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) + horzdomain = Domains.IntervalDomain( + Geometry.XPoint(-500.0) .. Geometry.XPoint(500.0), + periodic = true, + ) + quad = Quadratures.GLL{4}() + horzmesh = Meshes.IntervalMesh(horzdomain, nelems = 10) + horztopology = Topologies.IntervalTopology( + ClimaComms.SingletonCommsContext(device), + horzmesh, + ) + horzspace = Spaces.SpectralElementSpace1D(horztopology, quad) - @test all( - Remapping.default_target_hcoords(hv_center_space) .≈ - [Geometry.XPoint(x) for x in range(-500.0, 500.0, length = 180)], - ) + hv_center_space = + Spaces.ExtrudedFiniteDifferenceSpace(horzspace, vert_center_space) - @test all( - Remapping.default_target_zcoords(hv_center_space) .≈ - Geometry.ZPoint.(range(0.0, 1000; length = 50)), - ) + @test all( + Remapping.default_target_hcoords(hv_center_space) .≈ + [Geometry.XPoint(x) for x in range(-500.0, 500.0, length = 180)], + ) - # Purely horizontal 1D space - @test all( - Remapping.default_target_hcoords(horzspace) .≈ - [Geometry.XPoint(x) for x in range(-500.0, 500.0, length = 180)], - ) - end + @test all( + Remapping.default_target_zcoords(hv_center_space) .≈ + Geometry.ZPoint.(range(0.0, 1000; length = 50)), + ) + + # Purely horizontal 1D space + @test all( + Remapping.default_target_hcoords(horzspace) .≈ + [Geometry.XPoint(x) for x in range(-500.0, 500.0, length = 180)], + ) end