CliMA · simone-silvestri · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/kernel_maps.jl b/kernel_maps.jl
diff --git a/src/BoundaryConditions/fill_halo_regions.jl b/src/BoundaryConditions/fill_halo_regions.jl
@@ -46,7 +46,11 @@ const MaybeTupledData = Union{OffsetArray, NTuple{<:Any, OffsetArray}}
 
 "Fill halo regions in ``x``, ``y``, and ``z`` for a given field's data."
 function fill_halo_regions!(c::MaybeTupledData, boundary_conditions, indices, loc, grid, args...; 
-                            fill_boundary_normal_velocities = true, kwargs...)
+                            fill_boundary_normal_velocities = true, 
+                            only_local_halos = false,  # Only valid for `DistributedGrids`, we throw it away here
+                            async = false,  # Only valid for `DistributedGrids`, we throw it away here
+                            kwargs...)
+
     arch = architecture(grid)
 
     if fill_boundary_normal_velocities

diff --git a/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/compute_hydrostatic_free_surface_tendencies.jl
@@ -96,7 +96,6 @@ function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_
                 compute_hydrostatic_free_surface_Gc!,
                 c_tendency,
                 grid,
-                active_cells_map,
                 args;
                 active_cells_map)
     end
@@ -161,12 +160,12 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para
 
     launch!(arch, grid, kernel_parameters,
             compute_hydrostatic_free_surface_Gu!, model.timestepper.Gⁿ.u, grid, 
-            active_cells_map, u_kernel_args;
+            u_kernel_args;
             active_cells_map)
 
     launch!(arch, grid, kernel_parameters,
             compute_hydrostatic_free_surface_Gv!, model.timestepper.Gⁿ.v, grid, 
-            active_cells_map, v_kernel_args;
+            v_kernel_args;
             active_cells_map)
 
     compute_free_surface_tendency!(grid, model, :xy)
@@ -200,45 +199,27 @@ end
 #####
 
 """ Calculate the right-hand-side of the u-velocity equation. """
-@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, ::Nothing, args)
+@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, args)
     i, j, k = @index(Global, NTuple)
     @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, active_cells_map, args)
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
-    @inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...)
-end
-
 """ Calculate the right-hand-side of the v-velocity equation. """
-@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, ::Nothing, args)
+@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, args)
     i, j, k = @index(Global, NTuple)
     @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, active_cells_map, args)
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
-    @inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...)
-end
-
 #####
 ##### Tendency calculators for tracers
 #####
 
 """ Calculate the right-hand-side of the tracer advection-diffusion equation. """
-@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, ::Nothing, args)
+@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, args)
     i, j, k = @index(Global, NTuple)
     @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, active_cells_map, args)
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
-    @inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...)
-end
-
 #####
 ##### Tendency calculators for an explicit free surface
 #####

diff --git a/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl b/src/Models/HydrostaticFreeSurfaceModels/hydrostatic_free_surface_ab2_step.jl
@@ -1,7 +1,6 @@
 using Oceananigans.Fields: location
 using Oceananigans.TimeSteppers: ab2_step_field!
 using Oceananigans.TurbulenceClosures: implicit_step!
-using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map, retrieve_surface_active_cells_map
 
 import Oceananigans.TimeSteppers: ab2_step!
 

diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl
@@ -133,7 +133,7 @@ end
 
 # Barotropic Model Kernels
 # u_Δz = u * Δz
-@kernel function _barotropic_mode_kernel!(U, V, grid, ::Nothing, u, v)
+@kernel function _barotropic_mode_kernel!(U, V, grid, u, v)
     i, j  = @index(Global, NTuple)
     k_top = grid.Nz+1
 
@@ -146,26 +146,9 @@ end
     end
 end
 
-# Barotropic Model Kernels
-# u_Δz = u * Δz
-@kernel function _barotropic_mode_kernel!(U, V, grid, active_cells_map, u, v)
-    idx = @index(Global, Linear)
-    i, j = active_linear_index_to_tuple(idx, active_cells_map)
-    k_top = grid.Nz+1
-
-    @inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1]
-    @inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1]
-
-    for k in 2:grid.Nz
-        @inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k]
-        @inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k]
-    end
-end
-
 @inline function compute_barotropic_mode!(U, V, grid, u, v)
     active_cells_map = retrieve_surface_active_cells_map(grid)
-
-    launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, active_cells_map, u, v; active_cells_map)
+    launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; active_cells_map)
 
     return nothing
 end

diff --git a/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl b/src/Models/HydrostaticFreeSurfaceModels/store_hydrostatic_free_surface_tendencies.jl
@@ -4,7 +4,6 @@ using Oceananigans.TimeSteppers: store_field_tendencies!
 
 using Oceananigans: prognostic_fields
 using Oceananigans.Grids: AbstractGrid
-using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map
 
 using Oceananigans.Utils: launch!
 

diff --git a/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl b/src/Models/NonhydrostaticModels/compute_nonhydrostatic_tendencies.jl
@@ -103,15 +103,15 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti
 
     exclude_periphery = true
     launch!(arch, grid, kernel_parameters, compute_Gu!, 
-            tendencies.u, grid, active_cells_map, u_kernel_args;
+            tendencies.u, grid, u_kernel_args;
             active_cells_map, exclude_periphery)
 
     launch!(arch, grid, kernel_parameters, compute_Gv!, 
-            tendencies.v, grid, active_cells_map, v_kernel_args;
+            tendencies.v, grid, v_kernel_args;
             active_cells_map, exclude_periphery)
 
     launch!(arch, grid, kernel_parameters, compute_Gw!, 
-            tendencies.w, grid, active_cells_map, w_kernel_args;
+            tendencies.w, grid, w_kernel_args;
             active_cells_map, exclude_periphery)
 
     start_tracer_kernel_args = (advection, closure)
@@ -131,7 +131,7 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti
                      forcing, clock)
 
         launch!(arch, grid, kernel_parameters, compute_Gc!, 
-                c_tendency, grid, active_cells_map, args;
+                c_tendency, grid, args;
                 active_cells_map)
     end
 
@@ -143,57 +143,34 @@ end
 #####
 
 """ Calculate the right-hand-side of the u-velocity equation. """
-@kernel function compute_Gu!(Gu, grid, ::Nothing, args) 
+@kernel function compute_Gu!(Gu, grid, args) 
     i, j, k = @index(Global, NTuple)
     @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_Gu!(Gu, grid, interior_map, args) 
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, interior_map)
-    @inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...)
-end
-
 """ Calculate the right-hand-side of the v-velocity equation. """
-@kernel function compute_Gv!(Gv, grid, ::Nothing, args) 
+@kernel function compute_Gv!(Gv, grid, args) 
     i, j, k = @index(Global, NTuple)
     @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_Gv!(Gv, grid, interior_map, args) 
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, interior_map)
-    @inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...)
-end
-
 """ Calculate the right-hand-side of the w-velocity equation. """
-@kernel function compute_Gw!(Gw, grid, ::Nothing, args) 
+@kernel function compute_Gw!(Gw, grid, args) 
     i, j, k = @index(Global, NTuple)
     @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_Gw!(Gw, grid, interior_map, args)
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, interior_map)
-    @inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...)
-end
 
 #####
 ##### Tracer(s)
 #####
 
 """ Calculate the right-hand-side of the tracer advection-diffusion equation. """
-@kernel function compute_Gc!(Gc, grid, ::Nothing, args)
+@kernel function compute_Gc!(Gc, grid, args)
     i, j, k = @index(Global, NTuple)
     @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...)
 end
 
-@kernel function compute_Gc!(Gc, grid, interior_map, args) 
-    idx = @index(Global, Linear)
-    i, j, k = active_linear_index_to_tuple(idx, interior_map)
-    @inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...)
-end
-
 #####
 ##### Boundary contributions to tendencies due to user-prescribed fluxes
 #####

diff --git a/src/Utils/kernel_launching.jl b/src/Utils/kernel_launching.jl
@@ -6,7 +6,9 @@ using Oceananigans: location
 using Oceananigans.Architectures
 using Oceananigans.Grids
 using Oceananigans.Grids: AbstractGrid
+using Adapt
 using Base: @pure
+using KernelAbstractions: Kernel
 
 import Oceananigans
 import KernelAbstractions: get, expand
@@ -80,6 +82,28 @@ end
 contiguousrange(range::NTuple{N, Int}, offset::NTuple{N, Int}) where N = Tuple(1+o:r+o for (r, o) in zip(range, offset))
 flatten_reduced_dimensions(worksize, dims) = Tuple(d ∈ dims ? 1 : worksize[d] for d = 1:3)
 
+#### 
+#### Internal utility to launch a function mapped on an index_map
+####
+
+struct MappedFunction{F, M} <: Function
+    f::F
+    imap::M
+end
+
+Adapt.adapt_structure(to, m::MappedFunction) = 
+    MappedFunction(Adapt.adapt(to, m.f), Adapt.adapt(to, m.imap))
+
+@inline function (m::MappedFunction)(_ctx_)  
+    m.f(_ctx_)
+    return nothing
+end
+
+@inline function (m::MappedFunction)(_ctx_, args...) 
+    m.f(_ctx_, args...)
+    return nothing
+end
+
 # Support for 1D
 heuristic_workgroup(Wx) = min(Wx, 256)
 
@@ -238,9 +262,20 @@ the architecture `arch`.
 
     dev = Architectures.device(arch)
     loop = kernel!(dev, workgroup, worksize)
+
+    # Map out the function to use active_cells_map
+    # as an index map
+    if !isnothing(active_cells_map)
+        func  = MappedFunction(loop.f, active_cells_map)
+        param = get_kernel_parameters(loop)
+        M     = typeof(func)
+        loop  = Kernel{param..., M}(dev, func)
+    end
+
     return loop, worksize
 end
 
+@inline get_kernel_parameters(k::Kernel{A, B, C}) where {A, B, C} = A, B, C
 
 """
     launch!(arch, grid, workspec, kernel!, kernel_args...; kw...)
@@ -272,10 +307,7 @@ end
 @inline function _launch!(arch, grid, workspec, kernel!, first_kernel_arg, other_kernel_args...;
                           exclude_periphery = false,
                           reduced_dimensions = (),
-                          active_cells_map = nothing,
-                          # TODO: these two kwargs do nothing:
-                          only_local_halos = false,
-                          async = false)
+                          active_cells_map = nothing)
 
     location = Oceananigans.location(first_kernel_arg)
 
@@ -320,6 +352,13 @@ using KernelAbstractions: Kernel
 using KernelAbstractions.NDIteration: _Size, StaticSize
 using KernelAbstractions.NDIteration: NDRange
 
+using KernelAbstractions.NDIteration
+using KernelAbstractions: ndrange, workgroupsize
+import KernelAbstractions: partition
+
+using KernelAbstractions: CompilerMetadata
+import KernelAbstractions: __ndrange, __groupsize
+
 struct OffsetStaticSize{S} <: _Size
     function OffsetStaticSize{S}() where S
         new{S::Tuple{Vararg}}()
@@ -369,13 +408,6 @@ const OffsetNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:Any, <:KernelO
     return CartesianIndex(nI)
 end
 
-using KernelAbstractions.NDIteration
-using KernelAbstractions: ndrange, workgroupsize
-import KernelAbstractions: partition
-
-using KernelAbstractions: CompilerMetadata
-import KernelAbstractions: __ndrange, __groupsize
-
 @inline __ndrange(::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize}  = CartesianIndices(get(NDRange))
 @inline __groupsize(cm::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize} = size(__ndrange(cm))
 
@@ -413,3 +445,58 @@ function partition(kernel::OffsetKernel, inrange, ingroupsize)
     return iterspace, dynamic
 end
 
+#####
+##### Utilities for Mapped kernels
+#####
+
+struct IndexMap{M}
+    imap :: M
-    imap :: M
+    index_map :: M
-    imap :: M
+    index_map :: M
+end
+
+Adapt.adapt_structure(to, m::IndexMap) = IndexMap(Adapt.adapt(to, m.index_map))
+
+const MappedNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:Any, <:IndexMap} where N
+
+# NDRange has been modified to include an index_map in place of workitems: R
+# Remember, dynamic offset kernels are not possible with this extension!!
+# Also, mapped kernels work only with a 1D kernel and a 1D map, it is not possible to launch a ND kernel.
+# TODO: maybe don't do this
+@inline function expand(ndrange::MappedNDRange, groupidx::CartesianIndex, idx::CartesianIndex) 
+    Base.@_inline_meta
+    offsets = workitems(ndrange)
+    stride = size(offsets, 1)
+    gidx = groupidx.I[1]
+    tI = (gidx - 1) * stride + idx.I[1]
+    nI = ndrange.workitems.imap[tI]
+    return CartesianIndex(nI)
+end
+
+const MappedKernel{D} = Kernel{D, <:Any, <:Any, <:MappedFunction} where D
+
+# Override the getproperty to make sure we get the correct properties
+@inline getproperty(k::MappedKernel, prop::Symbol) = get_mapped_property(k, Val(prop))
+
+@inline get_mapped_property(k, ::Val{:imap}) = k.f.imap
+@inline get_mapped_property(k, ::Val{:f}) = k.f.f
+
+# Extending the partition function to include offsets in NDRange: note that in this case the 
+# offsets take the place of the DynamicWorkitems which we assume is not needed in static kernels
+function partition(kernel::MappedKernel, inrange, ingroupsize)
+    static_workgroupsize = workgroupsize(kernel)
+
+    # Calculate the static NDRange and WorkgroupSize
+    index_map = getproperty(kernel, :imap)
+    range = length(index_map)
+    arch  = Oceananigans.Architectures.architecture(index_map)
+    groupsize = get(static_workgroupsize)
+
+    blocks, groupsize, dynamic = NDIteration.partition(range, groupsize)
+
+    static_blocks = StaticSize{blocks}
+    static_workgroupsize = StaticSize{groupsize} # we might have padded workgroupsize
+
+    index_map = Oceananigans.Architectures.convert_args(arch, index_map)
+    iterspace = NDRange{length(range), static_blocks, static_workgroupsize}(blocks, IndexMap(index_map))
+
+    return iterspace, dynamic
+end