Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

support for active_cells_map in kernels #3920

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file added kernel_maps.jl
Empty file.
6 changes: 5 additions & 1 deletion src/BoundaryConditions/fill_halo_regions.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ const MaybeTupledData = Union{OffsetArray, NTuple{<:Any, OffsetArray}}

"Fill halo regions in ``x``, ``y``, and ``z`` for a given field's data."
function fill_halo_regions!(c::MaybeTupledData, boundary_conditions, indices, loc, grid, args...;
fill_boundary_normal_velocities = true, kwargs...)
fill_boundary_normal_velocities = true,
only_local_halos = false, # Only valid for `DistributedGrids`, we throw it away here
async = false, # Only valid for `DistributedGrids`, we throw it away here
kwargs...)

arch = architecture(grid)

if fill_boundary_normal_velocities
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ function compute_hydrostatic_free_surface_tendency_contributions!(model, kernel_
compute_hydrostatic_free_surface_Gc!,
c_tendency,
grid,
active_cells_map,
args;
active_cells_map)
end
Expand Down Expand Up @@ -161,12 +160,12 @@ function compute_hydrostatic_momentum_tendencies!(model, velocities, kernel_para

launch!(arch, grid, kernel_parameters,
compute_hydrostatic_free_surface_Gu!, model.timestepper.Gⁿ.u, grid,
active_cells_map, u_kernel_args;
u_kernel_args;
active_cells_map)

launch!(arch, grid, kernel_parameters,
compute_hydrostatic_free_surface_Gv!, model.timestepper.Gⁿ.v, grid,
active_cells_map, v_kernel_args;
v_kernel_args;
active_cells_map)

compute_free_surface_tendency!(grid, model, :xy)
Expand Down Expand Up @@ -200,45 +199,27 @@ end
#####

""" Calculate the right-hand-side of the u-velocity equation. """
@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, ::Nothing, args)
@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...)
end

@kernel function compute_hydrostatic_free_surface_Gu!(Gu, grid, active_cells_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
@inbounds Gu[i, j, k] = hydrostatic_free_surface_u_velocity_tendency(i, j, k, grid, args...)
end

""" Calculate the right-hand-side of the v-velocity equation. """
@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, ::Nothing, args)
@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...)
end

@kernel function compute_hydrostatic_free_surface_Gv!(Gv, grid, active_cells_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
@inbounds Gv[i, j, k] = hydrostatic_free_surface_v_velocity_tendency(i, j, k, grid, args...)
end

#####
##### Tendency calculators for tracers
#####

""" Calculate the right-hand-side of the tracer advection-diffusion equation. """
@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, ::Nothing, args)
@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...)
end

@kernel function compute_hydrostatic_free_surface_Gc!(Gc, grid, active_cells_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, active_cells_map)
@inbounds Gc[i, j, k] = hydrostatic_free_surface_tracer_tendency(i, j, k, grid, args...)
end

#####
##### Tendency calculators for an explicit free surface
#####
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
using Oceananigans.Fields: location
using Oceananigans.TimeSteppers: ab2_step_field!
using Oceananigans.TurbulenceClosures: implicit_step!
using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map, retrieve_surface_active_cells_map

import Oceananigans.TimeSteppers: ab2_step!

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ end

# Barotropic Model Kernels
# u_Δz = u * Δz
@kernel function _barotropic_mode_kernel!(U, V, grid, ::Nothing, u, v)
@kernel function _barotropic_mode_kernel!(U, V, grid, u, v)
i, j = @index(Global, NTuple)
k_top = grid.Nz+1

Expand All @@ -146,26 +146,9 @@ end
end
end

# Barotropic Model Kernels
# u_Δz = u * Δz
@kernel function _barotropic_mode_kernel!(U, V, grid, active_cells_map, u, v)
idx = @index(Global, Linear)
i, j = active_linear_index_to_tuple(idx, active_cells_map)
k_top = grid.Nz+1

@inbounds U[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u[i, j, 1]
@inbounds V[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v[i, j, 1]

for k in 2:grid.Nz
@inbounds U[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u[i, j, k]
@inbounds V[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v[i, j, k]
end
end

@inline function compute_barotropic_mode!(U, V, grid, u, v)
active_cells_map = retrieve_surface_active_cells_map(grid)

launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, active_cells_map, u, v; active_cells_map)
launch!(architecture(grid), grid, :xy, _barotropic_mode_kernel!, U, V, grid, u, v; active_cells_map)

return nothing
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ using Oceananigans.TimeSteppers: store_field_tendencies!

using Oceananigans: prognostic_fields
using Oceananigans.Grids: AbstractGrid
using Oceananigans.ImmersedBoundaries: retrieve_interior_active_cells_map

using Oceananigans.Utils: launch!

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,15 +103,15 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti

exclude_periphery = true
launch!(arch, grid, kernel_parameters, compute_Gu!,
tendencies.u, grid, active_cells_map, u_kernel_args;
tendencies.u, grid, u_kernel_args;
active_cells_map, exclude_periphery)

launch!(arch, grid, kernel_parameters, compute_Gv!,
tendencies.v, grid, active_cells_map, v_kernel_args;
tendencies.v, grid, v_kernel_args;
active_cells_map, exclude_periphery)

launch!(arch, grid, kernel_parameters, compute_Gw!,
tendencies.w, grid, active_cells_map, w_kernel_args;
tendencies.w, grid, w_kernel_args;
active_cells_map, exclude_periphery)

start_tracer_kernel_args = (advection, closure)
Expand All @@ -131,7 +131,7 @@ function compute_interior_tendency_contributions!(model, kernel_parameters; acti
forcing, clock)

launch!(arch, grid, kernel_parameters, compute_Gc!,
c_tendency, grid, active_cells_map, args;
c_tendency, grid, args;
active_cells_map)
end

Expand All @@ -143,57 +143,34 @@ end
#####

""" Calculate the right-hand-side of the u-velocity equation. """
@kernel function compute_Gu!(Gu, grid, ::Nothing, args)
@kernel function compute_Gu!(Gu, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...)
end

@kernel function compute_Gu!(Gu, grid, interior_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, interior_map)
@inbounds Gu[i, j, k] = u_velocity_tendency(i, j, k, grid, args...)
end

""" Calculate the right-hand-side of the v-velocity equation. """
@kernel function compute_Gv!(Gv, grid, ::Nothing, args)
@kernel function compute_Gv!(Gv, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...)
end

@kernel function compute_Gv!(Gv, grid, interior_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, interior_map)
@inbounds Gv[i, j, k] = v_velocity_tendency(i, j, k, grid, args...)
end

""" Calculate the right-hand-side of the w-velocity equation. """
@kernel function compute_Gw!(Gw, grid, ::Nothing, args)
@kernel function compute_Gw!(Gw, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...)
end

@kernel function compute_Gw!(Gw, grid, interior_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, interior_map)
@inbounds Gw[i, j, k] = w_velocity_tendency(i, j, k, grid, args...)
end

#####
##### Tracer(s)
#####

""" Calculate the right-hand-side of the tracer advection-diffusion equation. """
@kernel function compute_Gc!(Gc, grid, ::Nothing, args)
@kernel function compute_Gc!(Gc, grid, args)
i, j, k = @index(Global, NTuple)
@inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...)
end

@kernel function compute_Gc!(Gc, grid, interior_map, args)
idx = @index(Global, Linear)
i, j, k = active_linear_index_to_tuple(idx, interior_map)
@inbounds Gc[i, j, k] = tracer_tendency(i, j, k, grid, args...)
end

#####
##### Boundary contributions to tendencies due to user-prescribed fluxes
#####
Expand Down
109 changes: 98 additions & 11 deletions src/Utils/kernel_launching.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ using Oceananigans: location
using Oceananigans.Architectures
using Oceananigans.Grids
using Oceananigans.Grids: AbstractGrid
using Adapt
using Base: @pure
using KernelAbstractions: Kernel

import Oceananigans
import KernelAbstractions: get, expand
Expand Down Expand Up @@ -80,6 +82,28 @@ end
contiguousrange(range::NTuple{N, Int}, offset::NTuple{N, Int}) where N = Tuple(1+o:r+o for (r, o) in zip(range, offset))
flatten_reduced_dimensions(worksize, dims) = Tuple(d ∈ dims ? 1 : worksize[d] for d = 1:3)

####
#### Internal utility to launch a function mapped on an index_map
####

struct MappedFunction{F, M} <: Function
f::F
simone-silvestri marked this conversation as resolved.
Show resolved Hide resolved
imap::M
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

index_map?

end

Adapt.adapt_structure(to, m::MappedFunction) =
MappedFunction(Adapt.adapt(to, m.f), Adapt.adapt(to, m.imap))

@inline function (m::MappedFunction)(_ctx_)
m.f(_ctx_)
return nothing
end
simone-silvestri marked this conversation as resolved.
Show resolved Hide resolved

@inline function (m::MappedFunction)(_ctx_, args...)
m.f(_ctx_, args...)
return nothing
end

# Support for 1D
heuristic_workgroup(Wx) = min(Wx, 256)

Expand Down Expand Up @@ -238,9 +262,20 @@ the architecture `arch`.

dev = Architectures.device(arch)
loop = kernel!(dev, workgroup, worksize)

# Map out the function to use active_cells_map
# as an index map
if !isnothing(active_cells_map)
func = MappedFunction(loop.f, active_cells_map)
param = get_kernel_parameters(loop)
M = typeof(func)
loop = Kernel{param..., M}(dev, func)
end
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't it make more sense to put this in the first if-statement in this function?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we need to build the kernel first to be able to retrieve the function


return loop, worksize
end

@inline get_kernel_parameters(k::Kernel{A, B, C}) where {A, B, C} = A, B, C

"""
launch!(arch, grid, workspec, kernel!, kernel_args...; kw...)
Expand Down Expand Up @@ -272,10 +307,7 @@ end
@inline function _launch!(arch, grid, workspec, kernel!, first_kernel_arg, other_kernel_args...;
exclude_periphery = false,
reduced_dimensions = (),
active_cells_map = nothing,
# TODO: these two kwargs do nothing:
only_local_halos = false,
async = false)
active_cells_map = nothing)

location = Oceananigans.location(first_kernel_arg)

Expand Down Expand Up @@ -320,6 +352,13 @@ using KernelAbstractions: Kernel
using KernelAbstractions.NDIteration: _Size, StaticSize
using KernelAbstractions.NDIteration: NDRange

using KernelAbstractions.NDIteration
using KernelAbstractions: ndrange, workgroupsize
import KernelAbstractions: partition

using KernelAbstractions: CompilerMetadata
import KernelAbstractions: __ndrange, __groupsize

struct OffsetStaticSize{S} <: _Size
function OffsetStaticSize{S}() where S
new{S::Tuple{Vararg}}()
Expand Down Expand Up @@ -369,13 +408,6 @@ const OffsetNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:Any, <:KernelO
return CartesianIndex(nI)
end

using KernelAbstractions.NDIteration
using KernelAbstractions: ndrange, workgroupsize
import KernelAbstractions: partition

using KernelAbstractions: CompilerMetadata
import KernelAbstractions: __ndrange, __groupsize

@inline __ndrange(::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize} = CartesianIndices(get(NDRange))
@inline __groupsize(cm::CompilerMetadata{NDRange}) where {NDRange<:OffsetStaticSize} = size(__ndrange(cm))

Expand Down Expand Up @@ -413,3 +445,58 @@ function partition(kernel::OffsetKernel, inrange, ingroupsize)
return iterspace, dynamic
end

#####
##### Utilities for Mapped kernels
#####

struct IndexMap{M}
imap :: M
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
imap :: M
index_map :: M

end

Adapt.adapt_structure(to, m::IndexMap) = IndexMap(Adapt.adapt(to, m.index_map))

const MappedNDRange{N} = NDRange{N, <:StaticSize, <:StaticSize, <:Any, <:IndexMap} where N

# NDRange has been modified to include an index_map in place of workitems: R
# Remember, dynamic offset kernels are not possible with this extension!!
# Also, mapped kernels work only with a 1D kernel and a 1D map, it is not possible to launch a ND kernel.
# TODO: maybe don't do this
@inline function expand(ndrange::MappedNDRange, groupidx::CartesianIndex, idx::CartesianIndex)
Base.@_inline_meta
offsets = workitems(ndrange)
stride = size(offsets, 1)
gidx = groupidx.I[1]
tI = (gidx - 1) * stride + idx.I[1]
nI = ndrange.workitems.imap[tI]
return CartesianIndex(nI)
end

const MappedKernel{D} = Kernel{D, <:Any, <:Any, <:MappedFunction} where D

# Override the getproperty to make sure we get the correct properties
@inline getproperty(k::MappedKernel, prop::Symbol) = get_mapped_property(k, Val(prop))

@inline get_mapped_property(k, ::Val{:imap}) = k.f.imap
@inline get_mapped_property(k, ::Val{:f}) = k.f.f

# Extending the partition function to include offsets in NDRange: note that in this case the
# offsets take the place of the DynamicWorkitems which we assume is not needed in static kernels
function partition(kernel::MappedKernel, inrange, ingroupsize)
static_workgroupsize = workgroupsize(kernel)

# Calculate the static NDRange and WorkgroupSize
index_map = getproperty(kernel, :imap)
range = length(index_map)
arch = Oceananigans.Architectures.architecture(index_map)
groupsize = get(static_workgroupsize)

blocks, groupsize, dynamic = NDIteration.partition(range, groupsize)

static_blocks = StaticSize{blocks}
static_workgroupsize = StaticSize{groupsize} # we might have padded workgroupsize

index_map = Oceananigans.Architectures.convert_args(arch, index_map)
iterspace = NDRange{length(range), static_blocks, static_workgroupsize}(blocks, IndexMap(index_map))

return iterspace, dynamic
end