From 20e41659165ccf3567a4224df8e3e21cf0c4b4c4 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 24 Oct 2024 15:50:09 -0600 Subject: [PATCH 01/29] Add test for hydrostatic turbulence --- test/test_enzyme.jl | 94 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 92 insertions(+), 2 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 45faab48ff..896fefe222 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -106,7 +106,6 @@ function set_initial_condition_via_launch!(model_tracer, amplitude) end @testset "Enzyme + Oceananigans Initialization Broadcast Kernel" begin - Nx = Ny = 64 Nz = 8 @@ -157,7 +156,7 @@ end end end -@testset "Enzyme on advection and diffusion" begin +@testset "Enzyme autodifferentiation of advection and diffusion" begin Nx = Ny = 64 Nz = 8 @@ -234,3 +233,94 @@ end @test rel_error < tol end +function viscous_hydrostatic_turbulence(ν, model, u_init, v_init, Δt, u_truth, v_truth) + # Initialize the model + model.clock.iteration = 0 + model.clock.time = 0 + new_closure = ScalarDiffusivity(; ν, κ=NamedTuple()) + model.closure = new_closure + set!(model, u=u_init, v=v_init) + + # Step it forward + for n = 1:10 + time_step!(model, Δt) + end + + # Compute the sum square error + u, v, w = model.velocities + Nx, Ny, Nz = size(model.grid) + err = 0.0 + for j = 1:Ny, i = 1:Nx + err += @inbounds (u[i, j, 3] - u_truth[i, j, 3])^2 + + (v[i, j, 3] - v_truth[i, j, 3])^2 + end + + return err::Float64 +end + +@testset "Enzyme autodifferentiation of hydrostatic turbulence" begin + Random.seed!(123) + arch = CPU() + Nx = Ny = 32 + Nz = 1 + x = y = (0, 2π) + z = 1 + ν₀ = 1e-2 + + grid = RectilinearGrid(arch, size=(Nx, Ny, 1); x, y, z, topology=(Periodic, Periodic, Bounded)) + closure = ScalarDiffusivity(ν=ν₀) + momentum_advection = Centered(order=2) + + g = 4^2 + c = sqrt(g) + free_surface = ExplicitFreeSurface(gravitational_acceleration=g) + model = HydrostaticFreeSurfaceModel(; grid, momentum_advection, free_surface, closure) + + ϵ(x, y, z) = 2randn() - 1 + set!(model, u=ϵ, v=ϵ) + + u_init = deepcopy(model.velocities.u) + v_init = deepcopy(model.velocities.v) + + Δx = minimum_xspacing(grid) + Δt = 0.01 * Δx / c + for n = 1:10 + time_step!(model, Δt) + end + + u_truth = deepcopy(model.velocities.u) + v_truth = deepcopy(model.velocities.v) + + # Use a manual finite difference to compute a gradient + Δν = 1e-6 + ν1 = 2e-2 + ν2 = ν1 + Δν + e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) + e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) + ΔeΔν = (e2 - e1) / Δν + + @info "Finite difference computed: $ΔeΔν" + + @info "Now with autodiff..." + start_time = time_ns() + + # Use autodiff to compute a gradient + dmodel = Enzyme.make_zero(model) + dedν = autodiff(set_runtime_activity(Enzyme.Reverse), + viscous_hydrostatic_turbulence, + Active(ν1), + Duplicated(model, dmodel), + Const(u_init), + Const(v_init), + Const(Δt), + Const(u_truth), + Const(v_truth)) + + @info "Automatically computed: $dedν." + @info "Elapsed time: " * prettytime(1e-9 * (time_ns() - start_time)) + + tol = 1e-1 + rel_error = abs(dedν[1][3] - ΔeΔν) / abs(ΔeΔν) + @test rel_error < tol +end + From 92b314cd6c70f2b55e907a8831940b3237dc8a71 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 24 Oct 2024 17:02:54 -0600 Subject: [PATCH 02/29] Bypass fill_halo_regions for tuples --- .../update_hydrostatic_free_surface_model_state.jl | 7 +++++-- test/test_enzyme.jl | 11 +++++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index cf6b20eade..599bb56453 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -8,7 +8,7 @@ using Oceananigans.TurbulenceClosures: compute_diffusivities! using Oceananigans.ImmersedBoundaries: mask_immersed_field!, mask_immersed_field_xy!, inactive_node using Oceananigans.Models: update_model_field_time_series! using Oceananigans.Models.NonhydrostaticModels: update_hydrostatic_pressure!, p_kernel_parameters -using Oceananigans.Fields: replace_horizontal_vector_halos! +using Oceananigans.Fields: replace_horizontal_vector_halos!, tupled_fill_halo_regions! import Oceananigans.Models.NonhydrostaticModels: compute_auxiliaries! import Oceananigans.TimeSteppers: update_state! @@ -38,7 +38,10 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp # Update the boundary conditions @apply_regionally update_boundary_condition!(fields(model), model) - fill_halo_regions!(prognostic_fields(model), model.clock, fields(model); async = true) + fill_halo_regions!(model.free_surface.η, model.clock, fields(model), async=true) + prognostic_3d_fields = (model.velocities.u, model.velocities.v, model.tracers...) + tupled_fill_halo_regions!(prognostic_3d_fields, model.grid, model.clock, fields(model); async=true) + @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 896fefe222..51e0442730 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -233,12 +233,19 @@ end @test rel_error < tol end +function set_viscosity!(model, viscosity) + new_closure = ScalarDiffusivity(ν=viscosity) + names = () + new_closure = with_tracers(names, new_closure) + model.closure = new_closure + return nothing +end + function viscous_hydrostatic_turbulence(ν, model, u_init, v_init, Δt, u_truth, v_truth) # Initialize the model model.clock.iteration = 0 model.clock.time = 0 - new_closure = ScalarDiffusivity(; ν, κ=NamedTuple()) - model.closure = new_closure + set_viscosity!(model, ν) set!(model, u=u_init, v=v_init) # Step it forward From b25c2b4ee7d7758620e29fab7eea4de7479a0927 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 24 Oct 2024 17:28:55 -0600 Subject: [PATCH 03/29] Cleanup to interface --- src/Fields/field_tuples.jl | 50 ++++++++----------- ...te_hydrostatic_free_surface_model_state.jl | 4 +- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index debac6aab9..9d8e02913e 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -55,45 +55,35 @@ Fill halo regions for all `fields`. The algorithm: """ function fill_halo_regions!(maybe_nested_tuple::Union{NamedTuple, Tuple}, args...; kwargs...) flattened = flattened_unique_values(maybe_nested_tuple) - - # Sort fields into ReducedField and Field with non-nothing boundary conditions - fields_with_bcs = filter(f -> !isnothing(boundary_conditions(f)), flattened) - reduced_fields = filter(f -> f isa ReducedField, fields_with_bcs) + return tupled_fill_halo_regions!(flattened, args...; kwargs...) +end - for field in reduced_fields - fill_halo_regions!(field, args...; kwargs...) - end - - # MultiRegion fields are considered windowed_fields (indices isa MultiRegionObject)) - windowed_fields = filter(f -> !(f isa FullField), fields_with_bcs) - ordinary_fields = filter(f -> (f isa FullField) && !(f isa ReducedField), fields_with_bcs) - - # Fill halo regions for reduced and windowed fields - for field in windowed_fields - fill_halo_regions!(field, args...; kwargs...) +function tupled_fill_halo_regions!(fields, args...; kwargs...) + + ordinary_fields = Field[] + for fields in fields + if !isnothing(boundary_conditions(f)) + if field isa ReducedField || !(field isa FullField) + # Windowed and reduced fields + fill_halo_regions!(field, args...; kwargs...) + else + push!(ordinary_fields, field) + end + end end - # Fill the rest - if !isempty(ordinary_fields) + if !isempty(ordinary_fields) # ie not reduced, and with default_indices grid = first(ordinary_fields).grid - tupled_fill_halo_regions!(ordinary_fields, grid, args...; kwargs...) + fill_halo_regions!(map(data, fields), + map(boundary_conditions, fields), + default_indices(3), + map(instantiated_location, fields), + grid, args...; kwargs...) end return nothing end -function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) - - # We cannot group windowed fields together, the indices must be (:, :, :)! - indices = default_indices(3) - - return fill_halo_regions!(map(data, fields), - map(boundary_conditions, fields), - indices, - map(instantiated_location, fields), - grid, args...; kwargs...) -end - ##### ##### Tracer names ##### diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index 599bb56453..f8ca935c96 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -38,9 +38,7 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp # Update the boundary conditions @apply_regionally update_boundary_condition!(fields(model), model) - fill_halo_regions!(model.free_surface.η, model.clock, fields(model), async=true) - prognostic_3d_fields = (model.velocities.u, model.velocities.v, model.tracers...) - tupled_fill_halo_regions!(prognostic_3d_fields, model.grid, model.clock, fields(model); async=true) + tupled_fill_halo_regions!(prognostic_fields(model), model.clock, fields(model), async=true) @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) From fd3031cc62fdb8000d946a54e5602bc29410b737 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 24 Oct 2024 17:38:57 -0600 Subject: [PATCH 04/29] Make the test better --- test/test_enzyme.jl | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 51e0442730..687ba04b8b 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -245,8 +245,9 @@ function viscous_hydrostatic_turbulence(ν, model, u_init, v_init, Δt, u_truth, # Initialize the model model.clock.iteration = 0 model.clock.time = 0 + model.clock.last_Δt = Inf set_viscosity!(model, ν) - set!(model, u=u_init, v=v_init) + set!(model, u=u_init, v=v_init, η=0) # Step it forward for n = 1:10 @@ -300,7 +301,7 @@ end # Use a manual finite difference to compute a gradient Δν = 1e-6 - ν1 = 2e-2 + ν1 = ν₀ ν2 = ν1 + Δν e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) From 0047d2f702944e0c5fa9140372694115c43bf1be Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 08:01:54 -0600 Subject: [PATCH 05/29] Updates --- src/Fields/field.jl | 4 +- src/TimeSteppers/quasi_adams_bashforth_2.jl | 72 +++++++++++++-------- 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/Fields/field.jl b/src/Fields/field.jl index ee806db849..c17c120623 100644 --- a/src/Fields/field.jl +++ b/src/Fields/field.jl @@ -404,8 +404,8 @@ Base.checkbounds(f::Field, I...) = Base.checkbounds(f.data, I...) @propagate_inbounds Base.lastindex(f::Field) = lastindex(f.data) @propagate_inbounds Base.lastindex(f::Field, dim) = lastindex(f.data, dim) -Base.fill!(f::Field, val) = fill!(parent(f), val) -Base.parent(f::Field) = parent(f.data) +@inline Base.fill!(f::Field, val) = fill!(parent(f), val) +@inline Base.parent(f::Field) = parent(f.data) Adapt.adapt_structure(to, f::Field) = Adapt.adapt(to, f.data) total_size(f::Field) = total_size(f.grid, location(f), f.indices) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index ed9f55750b..7836662afa 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -52,6 +52,9 @@ end reset!(timestepper::QuasiAdamsBashforth2TimeStepper) = nothing +@inline zero!(::Nothing) = nothing +@inline zero!(field) = fill!(parent(field), 0) + ##### ##### Time steppping ##### @@ -79,39 +82,23 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt Δt == 0 && @warn "Δt == 0 may cause model blowup!" # Be paranoid and update state at iteration 0 - model.clock.iteration == 0 && update_state!(model, callbacks) - - ab2_timestepper = model.timestepper + model.clock.iteration == 0 && update_state!(model, callbacks; compute_tendencies=true) - # Change the default χ if necessary, which occurs if: + # Take an euler step if: # * We detect that the time-step size has changed. # * We detect that this is the "first" time-step, which means we # need to take an euler step. Note that model.clock.last_Δt is # initialized as Inf # * The user has passed euler=true to time_step! euler = euler || (Δt != model.clock.last_Δt) - - # If euler, then set χ = -0.5 - minus_point_five = convert(eltype(model.grid), -0.5) - χ = ifelse(euler, minus_point_five, ab2_timestepper.χ) - - # Set time-stepper χ (this is used in ab2_step!, but may also be used elsewhere) - χ₀ = ab2_timestepper.χ # Save initial value - ab2_timestepper.χ = χ + euler && @debug "Taking a forward Euler step." - # Ensure zeroing out all previous tendency fields to avoid errors in - # case G⁻ includes NaNs. See https://github.com/CliMA/Oceananigans.jl/issues/2259 + # Full step for tracers, fractional step for velocities. if euler - @debug "Taking a forward Euler step." - for field in ab2_timestepper.G⁻ - !isnothing(field) && @apply_regionally fill!(field, 0) - end + euler_step!(model, Δt) + else + ab2_step!(model, Δt) end - - # Be paranoid and update state at iteration 0 - model.clock.iteration == 0 && update_state!(model, callbacks; compute_tendencies=true) - - ab2_step!(model, Δt) # full step for tracers, fractional step for velocities. tick!(model.clock, Δt) model.clock.last_Δt = Δt @@ -123,9 +110,6 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt update_state!(model, callbacks; compute_tendencies=true) step_lagrangian_particles!(model, Δt) - # Return χ to initial value - ab2_timestepper.χ = χ₀ - return nothing end @@ -175,7 +159,7 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method @kernel function ab2_step_field!(u, Δt, χ, Gⁿ, G⁻) i, j, k = @index(Global, NTuple) - FT = eltype(χ) + FT = typeof(χ) one_point_five = convert(FT, 1.5) oh_point_five = convert(FT, 0.5) @@ -184,3 +168,37 @@ end @kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻) = nothing +function euler_step!(model, Δt) + grid = model.grid + arch = architecture(grid) + model_fields = prognostic_fields(model) + + for (i, field) in enumerate(model_fields) + kernel_args = (field, Δt, model.timestepper.Gⁿ[i]) + launch!(arch, grid, :xyz, euler_step_field!, kernel_args...; exclude_periphery=true) + + # TODO: function tracer_index(model, field_index) = field_index - 3, etc... + tracer_index = Val(i - 3) # assumption + + implicit_step!(field, + model.timestepper.implicit_solver, + model.closure, + model.diffusivity_fields, + tracer_index, + model.clock, + Δt) + end + + return nothing +end + +@kernel function euler_step_field!(u, Δt, Gⁿ) + i, j, k = @index(Global, NTuple) + FT = eltype(u) + @inbounds u[i, j, k] += convert(FT, Δt) * Gⁿ[i, j, k] +end + +@kernel euler_step_field!(::FunctionField, Δt, Gⁿ) = nothing + + + From 1522ae47996a4df9092a27e0d32bfffc1bf53fad Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 14:16:09 -0600 Subject: [PATCH 06/29] Fix z-coord bug --- test/test_enzyme.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 687ba04b8b..58c619d601 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -272,7 +272,7 @@ end Nx = Ny = 32 Nz = 1 x = y = (0, 2π) - z = 1 + z = (0, 1) ν₀ = 1e-2 grid = RectilinearGrid(arch, size=(Nx, Ny, 1); x, y, z, topology=(Periodic, Periodic, Bounded)) From b97190b66b5fec6206bb42de524412c75049a9d6 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 14:30:37 -0600 Subject: [PATCH 07/29] Few more bugs --- test/test_enzyme.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 58c619d601..fa09e5a1f7 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -259,8 +259,8 @@ function viscous_hydrostatic_turbulence(ν, model, u_init, v_init, Δt, u_truth, Nx, Ny, Nz = size(model.grid) err = 0.0 for j = 1:Ny, i = 1:Nx - err += @inbounds (u[i, j, 3] - u_truth[i, j, 3])^2 + - (v[i, j, 3] - v_truth[i, j, 3])^2 + err += @inbounds (u[i, j, 1] - u_truth[i, j, 1])^2 + + (v[i, j, 1] - v_truth[i, j, 1])^2 end return err::Float64 @@ -328,7 +328,7 @@ end @info "Elapsed time: " * prettytime(1e-9 * (time_ns() - start_time)) tol = 1e-1 - rel_error = abs(dedν[1][3] - ΔeΔν) / abs(ΔeΔν) + rel_error = abs(dedν[1][1] - ΔeΔν) / abs(ΔeΔν) @test rel_error < tol end From 08292d2400652b4676e097816fcb2a7f9c7dcdcc Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 15:03:13 -0600 Subject: [PATCH 08/29] Fix bug in field_tuples --- src/Fields/field_tuples.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 9d8e02913e..048a89011e 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -61,8 +61,8 @@ end function tupled_fill_halo_regions!(fields, args...; kwargs...) ordinary_fields = Field[] - for fields in fields - if !isnothing(boundary_conditions(f)) + for field in fields + if !isnothing(boundary_conditions(field)) if field isa ReducedField || !(field isa FullField) # Windowed and reduced fields fill_halo_regions!(field, args...; kwargs...) @@ -72,6 +72,8 @@ function tupled_fill_halo_regions!(fields, args...; kwargs...) end end + ordinary_fields = tuple(ordinary_fields...) + if !isempty(ordinary_fields) # ie not reduced, and with default_indices grid = first(ordinary_fields).grid fill_halo_regions!(map(data, fields), From 0f487733d25e489a937a6e2d62bd5759c1765ed6 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 15:04:57 -0600 Subject: [PATCH 09/29] Tiny cleanup ; --- src/TimeSteppers/quasi_adams_bashforth_2.jl | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index 7836662afa..04097790fd 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -52,9 +52,6 @@ end reset!(timestepper::QuasiAdamsBashforth2TimeStepper) = nothing -@inline zero!(::Nothing) = nothing -@inline zero!(field) = fill!(parent(field), 0) - ##### ##### Time steppping ##### @@ -99,7 +96,7 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt else ab2_step!(model, Δt) end - + tick!(model.clock, Δt) model.clock.last_Δt = Δt model.clock.last_stage_Δt = Δt # just one stage @@ -200,5 +197,3 @@ end @kernel euler_step_field!(::FunctionField, Δt, Gⁿ) = nothing - - From 9b9575f9b35ef3eff5fe4c8e34e514858ba4dfc6 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 15:08:10 -0600 Subject: [PATCH 10/29] Another bug --- src/Fields/field_tuples.jl | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 048a89011e..39d3122d53 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -61,13 +61,13 @@ end function tupled_fill_halo_regions!(fields, args...; kwargs...) ordinary_fields = Field[] - for field in fields - if !isnothing(boundary_conditions(field)) - if field isa ReducedField || !(field isa FullField) + for f in fields + if !isnothing(boundary_conditions(f)) + if f isa ReducedField || !(f isa FullField) # Windowed and reduced fields - fill_halo_regions!(field, args...; kwargs...) + fill_halo_regions!(f, args...; kwargs...) else - push!(ordinary_fields, field) + push!(ordinary_fields, f) end end end @@ -76,10 +76,10 @@ function tupled_fill_halo_regions!(fields, args...; kwargs...) if !isempty(ordinary_fields) # ie not reduced, and with default_indices grid = first(ordinary_fields).grid - fill_halo_regions!(map(data, fields), - map(boundary_conditions, fields), + fill_halo_regions!(map(data, ordinary_fields), + map(boundary_conditions, ordinary_fields), default_indices(3), - map(instantiated_location, fields), + map(instantiated_location, ordinary_fields), grid, args...; kwargs...) end From 423281bdacfd05062f7eaed6c46bf869ede03b6d Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 16:44:35 -0600 Subject: [PATCH 11/29] Back to pure ab2 --- .../explicit_free_surface.jl | 11 ++- .../implicit_free_surface.jl | 4 +- .../split_explicit_free_surface_kernels.jl | 72 +++++++++++-------- src/TimeSteppers/quasi_adams_bashforth_2.jl | 56 +++++---------- 4 files changed, 69 insertions(+), 74 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl index 56fd8d0049..7d44fe9b44 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl @@ -32,7 +32,6 @@ on_architecture(to, free_surface::ExplicitFreeSurface) = function materialize_free_surface(free_surface::ExplicitFreeSurface{Nothing}, velocities, grid) η = free_surface_displacement_field(velocities, free_surface, grid) g = convert(eltype(grid), free_surface.gravitational_acceleration) - return ExplicitFreeSurface(η, g) end @@ -62,10 +61,16 @@ explicit_ab2_step_free_surface!(free_surface, model, Δt, χ) = ##### Kernel ##### -@kernel function _explicit_ab2_step_free_surface!(η, Δt, χ::FT, Gηⁿ, Gη⁻, Nz) where FT +@kernel function _explicit_ab2_step_free_surface!(η, Δt, χ, Gηⁿ, Gη⁻, Nz) i, j = @index(Global, NTuple) + FT = typeof(χ) + one_point_five = convert(FT, 1.5) + oh_point_five = convert(FT, 0.5) + not_euler = χ != convert(FT, -0.5) @inbounds begin - η[i, j, Nz+1] += Δt * ((FT(1.5) + χ) * Gηⁿ[i, j, Nz+1] - (FT(0.5) + χ) * Gη⁻[i, j, Nz+1]) + Gη = (one_point_five + χ) * Gηⁿ[i, j, Nz+1] - (oh_point_five + χ) * Gη⁻[i, j, Nz+1] * not_euler + η[i, j, Nz+1] += Δt * Gη end end + diff --git a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl index e54ad5c49c..99d0d4def6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl @@ -124,9 +124,9 @@ build_implicit_step_solver(::Val{:Default}, grid, settings, gravitational_accele Implicitly step forward η. """ ab2_step_free_surface!(free_surface::ImplicitFreeSurface, model, Δt, χ) = - implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt, χ) + implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt) -function implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt, χ) +function implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt) η = free_surface.η g = free_surface.gravitational_acceleration rhs = free_surface.implicit_step_solver.right_hand_side diff --git a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl index 91ffed889f..9fb98656ce 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/split_explicit_free_surface_kernels.jl @@ -293,14 +293,14 @@ end Explicitly step forward η in substeps. """ ab2_step_free_surface!(free_surface::SplitExplicitFreeSurface, model, Δt, χ) = - split_explicit_free_surface_step!(free_surface, model, Δt, χ) + split_explicit_free_surface_step!(free_surface, model, Δt) function initialize_free_surface!(sefs::SplitExplicitFreeSurface, grid, velocities) @apply_regionally compute_barotropic_mode!(sefs.state.U̅, sefs.state.V̅, grid, velocities.u, velocities.v) fill_halo_regions!((sefs.state.U̅, sefs.state.V̅, sefs.η)) end -function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurface, model, Δt, χ) +function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurface, model, Δt) # Note: free_surface.η.grid != model.grid for DistributedSplitExplicitFreeSurface # since halo_size(free_surface.η.grid) != halo_size(model.grid) @@ -313,14 +313,14 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac settings = free_surface.settings Nsubsteps = calculate_substeps(settings.substepping, Δt) - # barotropic time step as fraction of baroclinic step and averaging weights + # Barotropic time step as fraction of baroclinic step and averaging weights fractional_Δt, weights = calculate_adaptive_settings(settings.substepping, Nsubsteps) Nsubsteps = length(weights) - # barotropic time step in seconds + # Barotropic time step in seconds Δτᴮ = fractional_Δt * Δt - # reset free surface averages + # Reset free surface averages @apply_regionally begin initialize_free_surface_state!(free_surface.state, free_surface.η, settings.timestepper) @@ -334,7 +334,7 @@ function split_explicit_free_surface_step!(free_surface::SplitExplicitFreeSurfac fields_to_fill = (free_surface.state.U̅, free_surface.state.V̅) fill_halo_regions!(fields_to_fill; async = true) - # Preparing velocities for the barotropic correction + # Prepare velocities for the barotropic correction @apply_regionally begin mask_immersed_field!(model.velocities.u) mask_immersed_field!(model.velocities.v) @@ -415,40 +415,54 @@ function iterate_split_explicit!(free_surface, grid, Δτᴮ, weights, ::Val{Nsu end # Calculate RHS for the barotropic time step. -@kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, ::Nothing, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) +@kernel function _compute_integrated_tendencies!(Gᵁ, Gⱽ, grid, ::Nothing, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) i, j = @index(Global, NTuple) k_top = grid.Nz + 1 - @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + @inbounds begin + Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u_tendency_increment(i, j, 1, grid, Gu⁻, Guⁿ, χ) + Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v_tendency_increment(i, j, 1, grid, Gv⁻, Gvⁿ, χ) - for k in 2:grid.Nz - @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) - end + for k in 2:grid.Nz + Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u_tendency_increment(i, j, k, grid, Gu⁻, Guⁿ, χ) + Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v_tendency_increment(i, j, k, grid, Gv⁻, Gvⁿ, χ) + end + end end # Calculate RHS for the barotropic time step.q -@kernel function _compute_integrated_ab2_tendencies!(Gᵁ, Gⱽ, grid, active_cells_map, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) +@kernel function _compute_integrated_tendencies!(Gᵁ, Gⱽ, grid, active_cells_map, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) idx = @index(Global, Linear) i, j = active_linear_index_to_tuple(idx, active_cells_map) k_top = grid.Nz+1 - @inbounds Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * ab2_step_Gu(i, j, 1, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * ab2_step_Gv(i, j, 1, grid, Gv⁻, Gvⁿ, χ) + @inbounds begin + Gᵁ[i, j, k_top-1] = Δzᶠᶜᶜ(i, j, 1, grid) * u_tendency_increment(i, j, 1, grid, Gu⁻, Guⁿ, χ) + Gⱽ[i, j, k_top-1] = Δzᶜᶠᶜ(i, j, 1, grid) * v_tendency_increment(i, j, 1, grid, Gv⁻, Gvⁿ, χ) - for k in 2:grid.Nz - @inbounds Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * ab2_step_Gu(i, j, k, grid, Gu⁻, Guⁿ, χ) - @inbounds Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * ab2_step_Gv(i, j, k, grid, Gv⁻, Gvⁿ, χ) - end + for k in 2:grid.Nz + Gᵁ[i, j, k_top-1] += Δzᶠᶜᶜ(i, j, k, grid) * u_tendency_increment(i, j, k, grid, Gu⁻, Guⁿ, χ) + Gⱽ[i, j, k_top-1] += Δzᶜᶠᶜ(i, j, k, grid) * v_tendency_increment(i, j, k, grid, Gv⁻, Gvⁿ, χ) + end + end end -@inline ab2_step_Gu(i, j, k, grid, G⁻, Gⁿ, χ::FT) where FT = - @inbounds ifelse(peripheral_node(i, j, k, grid, f, c, c), zero(grid), (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - G⁻[i, j, k] * (convert(FT, 0.5) + χ)) - -@inline ab2_step_Gv(i, j, k, grid, G⁻, Gⁿ, χ::FT) where FT = - @inbounds ifelse(peripheral_node(i, j, k, grid, c, f, c), zero(grid), (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - G⁻[i, j, k] * (convert(FT, 0.5) + χ)) +@inline function u_tendency_increment(i, j, k, grid, G⁻, Gⁿ, χ) + FT = typeof(χ) + peripheral = peripheral_node(i, j, k, grid, f, c, c) + not_euler = χ != convert(FT, -0.5) + ΔGu = @inbounds (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - (convert(FT, 0.5) + χ) * G⁻[i, j, k] * not_euler + return ifelse(peripheral, zero(grid), ΔGu) +end +@inline function v_tendency_increment(i, j, k, grid, G⁻, Gⁿ, χ) + FT = typeof(χ) + peripheral = peripheral_node(i, j, k, grid, c, f, c) + not_euler = χ != convert(FT, -0.5) + ΔGv = @inbounds (convert(FT, 1.5) + χ) * Gⁿ[i, j, k] - (convert(FT, 0.5) + χ) * G⁻[i, j, k] * not_euler + return ifelse(peripheral, zero(grid), ΔGv) +end + # Setting up the RHS for the barotropic step (tendencies of the barotropic velocity components) # This function is called after `calculate_tendency` and before `ab2_step_velocities!` function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) @@ -464,17 +478,15 @@ function setup_free_surface!(model, free_surface::SplitExplicitFreeSurface, χ) @apply_regionally setup_split_explicit_tendency!(auxiliary, model.grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) fields_to_fill = (auxiliary.Gᵁ, auxiliary.Gⱽ) - fill_halo_regions!(fields_to_fill; async = true) + fill_halo_regions!(fields_to_fill; async=true) return nothing end @inline function setup_split_explicit_tendency!(auxiliary, grid, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) active_cells_map = retrieve_surface_active_cells_map(grid) - - launch!(architecture(grid), grid, :xy, _compute_integrated_ab2_tendencies!, auxiliary.Gᵁ, auxiliary.Gⱽ, grid, - active_cells_map, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ; active_cells_map) - + kernel_args = (auxiliary.Gᵁ, auxiliary.Gⱽ, grid, active_cells_map, Gu⁻, Gv⁻, Guⁿ, Gvⁿ, χ) + launch!(architecture(grid), grid, :xy, _compute_integrated_tendencies!, kernel_args...; active_cells_map) return nothing end diff --git a/src/TimeSteppers/quasi_adams_bashforth_2.jl b/src/TimeSteppers/quasi_adams_bashforth_2.jl index 04097790fd..3471132ec3 100644 --- a/src/TimeSteppers/quasi_adams_bashforth_2.jl +++ b/src/TimeSteppers/quasi_adams_bashforth_2.jl @@ -90,12 +90,15 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt euler = euler || (Δt != model.clock.last_Δt) euler && @debug "Taking a forward Euler step." + # If euler, then set χ = -0.5 + minus_point_five = convert(eltype(model.grid), -0.5) + ab2_timestepper = model.timestepper + χ = ifelse(euler, minus_point_five, ab2_timestepper.χ) + χ₀ = ab2_timestepper.χ # Save initial value + ab2_timestepper.χ = χ + # Full step for tracers, fractional step for velocities. - if euler - euler_step!(model, Δt) - else - ab2_step!(model, Δt) - end + ab2_step!(model, Δt) tick!(model.clock, Δt) model.clock.last_Δt = Δt @@ -107,6 +110,9 @@ function time_step!(model::AbstractModel{<:QuasiAdamsBashforth2TimeStepper}, Δt update_state!(model, callbacks; compute_tendencies=true) step_lagrangian_particles!(model, Δt) + # Return χ to initial value + ab2_timestepper.χ = χ₀ + return nothing end @@ -122,7 +128,6 @@ end """ Generic implementation. """ function ab2_step!(model, Δt) - grid = model.grid arch = architecture(grid) model_fields = prognostic_fields(model) @@ -157,43 +162,16 @@ Time step velocity fields via the 2nd-order quasi Adams-Bashforth method i, j, k = @index(Global, NTuple) FT = typeof(χ) + Δt = convert(FT, Δt) one_point_five = convert(FT, 1.5) oh_point_five = convert(FT, 0.5) + not_euler = χ != convert(FT, -0.5) # use to prevent corruption by leftover NaNs in G⁻ - @inbounds u[i, j, k] += convert(FT, Δt) * ((one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k]) -end - -@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻) = nothing - -function euler_step!(model, Δt) - grid = model.grid - arch = architecture(grid) - model_fields = prognostic_fields(model) - - for (i, field) in enumerate(model_fields) - kernel_args = (field, Δt, model.timestepper.Gⁿ[i]) - launch!(arch, grid, :xyz, euler_step_field!, kernel_args...; exclude_periphery=true) - - # TODO: function tracer_index(model, field_index) = field_index - 3, etc... - tracer_index = Val(i - 3) # assumption - - implicit_step!(field, - model.timestepper.implicit_solver, - model.closure, - model.diffusivity_fields, - tracer_index, - model.clock, - Δt) + @inbounds begin + Gu = (one_point_five + χ) * Gⁿ[i, j, k] - (oh_point_five + χ) * G⁻[i, j, k] * not_euler + u[i, j, k] += Δt * Gu end - - return nothing end -@kernel function euler_step_field!(u, Δt, Gⁿ) - i, j, k = @index(Global, NTuple) - FT = eltype(u) - @inbounds u[i, j, k] += convert(FT, Δt) * Gⁿ[i, j, k] -end - -@kernel euler_step_field!(::FunctionField, Δt, Gⁿ) = nothing +@kernel ab2_step_field!(::FunctionField, Δt, χ, Gⁿ, G⁻) = nothing From fe0f9cfc36c21e58dc4558de54b9a86f3f0e8e62 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 16:50:21 -0600 Subject: [PATCH 12/29] Make set! more differentiable --- src/Fields/set!.jl | 1 + .../set_hydrostatic_free_surface_model.jl | 5 +++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index e4b8a29cb6..bbdc65c3e7 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -33,6 +33,7 @@ end set!(u::Field, f::Function) = set_to_function!(u, f) set!(u::Field, a::Union{Array, CuArray, OffsetArray}) = set_to_array!(u, a) set!(u::Field, v::Field) = set_to_field!(u, v) +@inline set!(u::Field, a::Number) = fill!(u, a) function set!(u::Field, v) u .= v # fallback diff --git a/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl b/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl index f216d312c7..a9a2c26006 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/set_hydrostatic_free_surface_model.jl @@ -33,7 +33,7 @@ T₀[T₀ .< 0.5] .= 0 set!(model, u=u₀, v=v₀, T=T₀) -model.velocities.u +@model.velocities.u # output @@ -61,7 +61,8 @@ model.velocities.u end initialize!(model) - update_state!(model; compute_tendencies = false) + update_state!(model; compute_tendencies=false) return nothing end + From e66af6438363971c73b1a5f4e71cfba7a0b4dd30 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 19:18:11 -0600 Subject: [PATCH 13/29] Restore syntax for some reason --- .../HydrostaticFreeSurfaceModels/implicit_free_surface.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl index 99d0d4def6..e54ad5c49c 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/implicit_free_surface.jl @@ -124,9 +124,9 @@ build_implicit_step_solver(::Val{:Default}, grid, settings, gravitational_accele Implicitly step forward η. """ ab2_step_free_surface!(free_surface::ImplicitFreeSurface, model, Δt, χ) = - implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt) + implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt, χ) -function implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt) +function implicit_free_surface_step!(free_surface::ImplicitFreeSurface, model, Δt, χ) η = free_surface.η g = free_surface.gravitational_acceleration rhs = free_surface.implicit_step_solver.right_hand_side From 15b582a1f7d93c159cded91de2cd48d2d2231c96 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 25 Oct 2024 19:20:07 -0600 Subject: [PATCH 14/29] More appropriate test w tolerance --- test/test_enzyme.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index fa09e5a1f7..7136181b1e 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -301,7 +301,7 @@ end # Use a manual finite difference to compute a gradient Δν = 1e-6 - ν1 = ν₀ + ν1 = ν₀ + Δν ν2 = ν1 + Δν e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) From eced1e51f704345fe6dc72a7f803cd985964ae3e Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Thu, 31 Oct 2024 16:03:15 -0600 Subject: [PATCH 15/29] hopefully fix type stability issue with set!(u, ::Number) --- src/Fields/set!.jl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index bbdc65c3e7..a9cff7a0b0 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -33,7 +33,11 @@ end set!(u::Field, f::Function) = set_to_function!(u, f) set!(u::Field, a::Union{Array, CuArray, OffsetArray}) = set_to_array!(u, a) set!(u::Field, v::Field) = set_to_field!(u, v) -@inline set!(u::Field, a::Number) = fill!(u, a) + +function set!(u::Field, a::Number) + fill!(parent(u), a) + return u # return u, not parent(u), for type-stability +end function set!(u::Field, v) u .= v # fallback From 7cbcadf584448aa3337681d485b69e7f2e43b112 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Fri, 1 Nov 2024 22:36:18 -0600 Subject: [PATCH 16/29] Updates --- src/Fields/set!.jl | 2 +- test/test_abstract_operations.jl | 5 ++--- test/test_enzyme.jl | 7 ++++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Fields/set!.jl b/src/Fields/set!.jl index a9cff7a0b0..3958c098fc 100644 --- a/src/Fields/set!.jl +++ b/src/Fields/set!.jl @@ -35,7 +35,7 @@ set!(u::Field, a::Union{Array, CuArray, OffsetArray}) = set_to_array!(u, a) set!(u::Field, v::Field) = set_to_field!(u, v) function set!(u::Field, a::Number) - fill!(parent(u), a) + fill!(interior(u), a) # note all other set! only change interior return u # return u, not parent(u), for type-stability end diff --git a/test/test_abstract_operations.jl b/test/test_abstract_operations.jl index c9942895c8..6594f628f8 100644 --- a/test/test_abstract_operations.jl +++ b/test/test_abstract_operations.jl @@ -77,9 +77,8 @@ function x_derivative_cell(arch) end function times_x_derivative(a, b, location, i, j, k, answer) - a∇b = @at location b * ∂x(a) - - return CUDA.@allowscalar a∇b[i, j, k] == answer + b∇a = @at location b * ∂x(a) + return CUDA.@allowscalar b∇a[i, j, k] == answer end for arch in archs diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 84308b2fd4..c282dd94c3 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -77,7 +77,7 @@ end fwd, rev = Enzyme.autodiff_thunk(ReverseSplitWithPrimal, Const{typeof(f)}, Duplicated, typeof(Const(grid))) tape, primal, shadowp = fwd(Const(f), Const(grid)) - @show tape primal shadowp + # @show tape primal shadowp shadow = if shadowp isa Base.RefValue shadowp[] @@ -215,7 +215,7 @@ end models = [model_no_bc, model_bc] - @show "Advection-diffusion results, first without then with flux BC" + @info "Advection-diffusion results, first without then with flux BC" for i in 1:2 # Compute derivative by hand @@ -262,7 +262,8 @@ function viscous_hydrostatic_turbulence(ν, model, u_init, v_init, Δt, u_truth, model.clock.time = 0 model.clock.last_Δt = Inf set_viscosity!(model, ν) - set!(model, u=u_init, v=v_init, η=0) + set!(model, u=u_init, v=v_init) + fill!(model.free_surface.η, 0) # Step it forward for n = 1:10 From 397abe439eb1a2b962046d6fb1e03b0d3df800f8 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Mon, 4 Nov 2024 14:15:34 -0700 Subject: [PATCH 17/29] Update Enzyme --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 4d80cee5b7..45b005cea6 100644 --- a/Project.toml +++ b/Project.toml @@ -50,7 +50,7 @@ CubedSphere = "0.2, 0.3" Dates = "1.9" Distances = "0.10" DocStringExtensions = "0.8, 0.9" -Enzyme = "0.13.3" +Enzyme = "0.13.14" FFTW = "1" Glob = "1.3" IncompleteLU = "0.2" From 8f65907e1ddad71cf07f5349518c97d88dd589f0 Mon Sep 17 00:00:00 2001 From: Gregory Wagner Date: Mon, 4 Nov 2024 19:44:49 -0700 Subject: [PATCH 18/29] Down Enzyme --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 45b005cea6..29d7df13be 100644 --- a/Project.toml +++ b/Project.toml @@ -50,7 +50,7 @@ CubedSphere = "0.2, 0.3" Dates = "1.9" Distances = "0.10" DocStringExtensions = "0.8, 0.9" -Enzyme = "0.13.14" +Enzyme = "0.13.13" FFTW = "1" Glob = "1.3" IncompleteLU = "0.2" From 7574a4de8650f2fdd7c9cc42195432bd0351aa03 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Wed, 6 Nov 2024 18:15:13 -0600 Subject: [PATCH 19/29] Changed FD computation to central difference instead of forward difference. Central difference FD appears consistent with autodiff --- test/test_enzyme.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index c282dd94c3..321d65eef4 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -319,9 +319,9 @@ end Δν = 1e-6 ν1 = ν₀ + Δν ν2 = ν1 + Δν - e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) + e1 = viscous_hydrostatic_turbulence(ν₀, model, u_init, v_init, Δt, u_truth, v_truth) e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) - ΔeΔν = (e2 - e1) / Δν + ΔeΔν = (e2 - e1) / 2Δν @info "Finite difference computed: $ΔeΔν" @@ -345,6 +345,8 @@ end tol = 1e-1 rel_error = abs(dedν[1][1] - ΔeΔν) / abs(ΔeΔν) + @show dedν + @show ΔeΔν @test rel_error < tol end From f20c900fbeea79783b1ca356033239d4487fd17c Mon Sep 17 00:00:00 2001 From: jlk9 Date: Thu, 7 Nov 2024 09:21:18 -0600 Subject: [PATCH 20/29] Switched named of variables for FD comparison --- test/test_enzyme.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 321d65eef4..fd7e6a980c 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -317,9 +317,9 @@ end # Use a manual finite difference to compute a gradient Δν = 1e-6 - ν1 = ν₀ + Δν - ν2 = ν1 + Δν - e1 = viscous_hydrostatic_turbulence(ν₀, model, u_init, v_init, Δt, u_truth, v_truth) + ν1 = ν₀ - Δν + ν2 = ν₀ + Δν + e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) ΔeΔν = (e2 - e1) / 2Δν @@ -332,7 +332,7 @@ end dmodel = Enzyme.make_zero(model) dedν = autodiff(set_runtime_activity(Enzyme.Reverse), viscous_hydrostatic_turbulence, - Active(ν1), + Active(ν₀), Duplicated(model, dmodel), Const(u_init), Const(v_init), From c35a751d572f4d9d131b42265ff0fbef7cf85dbd Mon Sep 17 00:00:00 2001 From: jlk9 Date: Thu, 7 Nov 2024 11:17:17 -0600 Subject: [PATCH 21/29] =?UTF-8?q?Set=20AD=20test=20to=20be=20at=20=CE=BD1?= =?UTF-8?q?=20=3D=20=CE=BD=E2=82=80=20+=20=CE=94=CE=BD=20to=20avoid=20a=20?= =?UTF-8?q?global=20minimum?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_enzyme.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index fd7e6a980c..869349e42c 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -315,24 +315,25 @@ end u_truth = deepcopy(model.velocities.u) v_truth = deepcopy(model.velocities.v) - # Use a manual finite difference to compute a gradient + # Use a manual finite difference (central difference) to compute a gradient at ν1 = ν₀ + Δν Δν = 1e-6 - ν1 = ν₀ - Δν - ν2 = ν₀ + Δν - e1 = viscous_hydrostatic_turbulence(ν1, model, u_init, v_init, Δt, u_truth, v_truth) + ν0 = ν₀ + ν1 = ν₀ + Δν + ν2 = ν₀ + 2Δν + e0 = viscous_hydrostatic_turbulence(ν0, model, u_init, v_init, Δt, u_truth, v_truth) e2 = viscous_hydrostatic_turbulence(ν2, model, u_init, v_init, Δt, u_truth, v_truth) - ΔeΔν = (e2 - e1) / 2Δν + ΔeΔν = (e2 - e0) / 2Δν @info "Finite difference computed: $ΔeΔν" @info "Now with autodiff..." start_time = time_ns() - # Use autodiff to compute a gradient + # Use autodiff to compute a gradient at ν1 = ν₀ + Δν dmodel = Enzyme.make_zero(model) dedν = autodiff(set_runtime_activity(Enzyme.Reverse), viscous_hydrostatic_turbulence, - Active(ν₀), + Active(ν1), Duplicated(model, dmodel), Const(u_init), Const(v_init), From 9d254fe9454cc50832b7ad6e1154a8fa837da122 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Thu, 7 Nov 2024 12:35:54 -0600 Subject: [PATCH 22/29] Removed extraneous @show statements --- test/test_enzyme.jl | 2 -- 1 file changed, 2 deletions(-) diff --git a/test/test_enzyme.jl b/test/test_enzyme.jl index 869349e42c..acb54187dc 100644 --- a/test/test_enzyme.jl +++ b/test/test_enzyme.jl @@ -346,8 +346,6 @@ end tol = 1e-1 rel_error = abs(dedν[1][1] - ΔeΔν) / abs(ΔeΔν) - @show dedν - @show ΔeΔν @test rel_error < tol end From 11809b0f36588b03d63bf13fcb1d3f9436233103 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Sat, 9 Nov 2024 19:16:56 -0600 Subject: [PATCH 23/29] Testing to see if removing grid from the handle of tupled_fill_halo_regions! is the issue --- src/Fields/field_tuples.jl | 15 +++++++++++++-- ...update_hydrostatic_free_surface_model_state.jl | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 39d3122d53..c9e48ecbed 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -55,10 +55,21 @@ Fill halo regions for all `fields`. The algorithm: """ function fill_halo_regions!(maybe_nested_tuple::Union{NamedTuple, Tuple}, args...; kwargs...) flattened = flattened_unique_values(maybe_nested_tuple) - return tupled_fill_halo_regions!(flattened, args...; kwargs...) + + # Check to find grid: + for f in flattened + if !isnothing(boundary_conditions(f)) + if !(f isa ReducedField) && (f isa FullField) + grid = f.grid + break + end + end + end + + return tupled_fill_halo_regions!(flattened, grid, args...; kwargs...) end -function tupled_fill_halo_regions!(fields, args...; kwargs...) +function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) ordinary_fields = Field[] for f in fields diff --git a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl index f8ca935c96..66e9eca3a1 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/update_hydrostatic_free_surface_model_state.jl @@ -38,7 +38,7 @@ function update_state!(model::HydrostaticFreeSurfaceModel, grid, callbacks; comp # Update the boundary conditions @apply_regionally update_boundary_condition!(fields(model), model) - tupled_fill_halo_regions!(prognostic_fields(model), model.clock, fields(model), async=true) + tupled_fill_halo_regions!(prognostic_fields(model), grid, model.clock, fields(model), async=true) @apply_regionally replace_horizontal_vector_halos!(model.velocities, model.grid) @apply_regionally compute_auxiliaries!(model) From ee6361b6caf8d95090de0f6bec7dd4bdbb116ea4 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Sat, 9 Nov 2024 19:48:23 -0600 Subject: [PATCH 24/29] Another test with grid handles --- src/Fields/field_tuples.jl | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index c9e48ecbed..2a14323d01 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -57,14 +57,7 @@ function fill_halo_regions!(maybe_nested_tuple::Union{NamedTuple, Tuple}, args.. flattened = flattened_unique_values(maybe_nested_tuple) # Check to find grid: - for f in flattened - if !isnothing(boundary_conditions(f)) - if !(f isa ReducedField) && (f isa FullField) - grid = f.grid - break - end - end - end + grid = flattened[1].grid return tupled_fill_halo_regions!(flattened, grid, args...; kwargs...) end From 22d4a9584d755479c34f64ef8551647c7ead8674 Mon Sep 17 00:00:00 2001 From: Joseph Kump Date: Sun, 10 Nov 2024 11:30:29 -0600 Subject: [PATCH 25/29] Update src/Fields/field_tuples.jl Co-authored-by: Gregory L. Wagner --- src/Fields/field_tuples.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 2a14323d01..78bd008f47 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -56,7 +56,7 @@ Fill halo regions for all `fields`. The algorithm: function fill_halo_regions!(maybe_nested_tuple::Union{NamedTuple, Tuple}, args...; kwargs...) flattened = flattened_unique_values(maybe_nested_tuple) - # Check to find grid: + # Look for grid within the flattened field tuple: grid = flattened[1].grid return tupled_fill_halo_regions!(flattened, grid, args...; kwargs...) From c9d9942933a5b93fc00a9e97c12d6a6b0c76a169 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Mon, 11 Nov 2024 08:16:41 -0600 Subject: [PATCH 26/29] More robust test for grid --- src/Fields/field_tuples.jl | 39 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 37 insertions(+), 2 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 78bd008f47..c0d7aa6ccf 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -57,11 +57,46 @@ function fill_halo_regions!(maybe_nested_tuple::Union{NamedTuple, Tuple}, args.. flattened = flattened_unique_values(maybe_nested_tuple) # Look for grid within the flattened field tuple: - grid = flattened[1].grid + for f in flattened + if isdefined(f, :grid) + grid = f.grid + return tupled_fill_halo_regions!(flattened, grid, args...; kwargs...) + end + end + + return tupled_fill_halo_regions!(flattened, args...; kwargs...) +end - return tupled_fill_halo_regions!(flattened, grid, args...; kwargs...) +# Version where we find grid amongst ordinary fields: +function tupled_fill_halo_regions!(fields, args...; kwargs...) + + ordinary_fields = Field[] + for f in fields + if !isnothing(boundary_conditions(f)) + if f isa ReducedField || !(f isa FullField) + # Windowed and reduced fields + fill_halo_regions!(f, args...; kwargs...) + else + push!(ordinary_fields, f) + end + end + end + + ordinary_fields = tuple(ordinary_fields...) + + if !isempty(ordinary_fields) # ie not reduced, and with default_indices + grid = first(ordinary_fields).grid + fill_halo_regions!(map(data, ordinary_fields), + map(boundary_conditions, ordinary_fields), + default_indices(3), + map(instantiated_location, ordinary_fields), + grid, args...; kwargs...) + end + + return nothing end +# Version where grid is provided: function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) ordinary_fields = Field[] From 2a239e97c101d806c696d169eb7d49b88c276e5e Mon Sep 17 00:00:00 2001 From: jlk9 Date: Mon, 11 Nov 2024 16:55:45 -0600 Subject: [PATCH 27/29] Modularizing different tupled_fill_halo_regions! methods --- src/Fields/field_tuples.jl | 43 ++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index c0d7aa6ccf..85170fe793 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -70,19 +70,7 @@ end # Version where we find grid amongst ordinary fields: function tupled_fill_halo_regions!(fields, args...; kwargs...) - ordinary_fields = Field[] - for f in fields - if !isnothing(boundary_conditions(f)) - if f isa ReducedField || !(f isa FullField) - # Windowed and reduced fields - fill_halo_regions!(f, args...; kwargs...) - else - push!(ordinary_fields, f) - end - end - end - - ordinary_fields = tuple(ordinary_fields...) + ordinary_fields = produce_ordinary_fields(fields, args...; kwargs) if !isempty(ordinary_fields) # ie not reduced, and with default_indices grid = first(ordinary_fields).grid @@ -99,6 +87,22 @@ end # Version where grid is provided: function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) + ordinary_fields = produce_ordinary_fields(fields, args...; kwargs) + + if !isempty(ordinary_fields) # ie not reduced, and with default_indices + fill_halo_regions!(map(data, ordinary_fields), + map(boundary_conditions, ordinary_fields), + default_indices(3), + map(instantiated_location, ordinary_fields), + grid, args...; kwargs...) + end + + return nothing +end + +# Helper function to create the tuple of ordinary fields: +@inline function produce_ordinary_fields(fields, args...; kwargs) + ordinary_fields = Field[] for f in fields if !isnothing(boundary_conditions(f)) @@ -111,18 +115,7 @@ function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) end end - ordinary_fields = tuple(ordinary_fields...) - - if !isempty(ordinary_fields) # ie not reduced, and with default_indices - grid = first(ordinary_fields).grid - fill_halo_regions!(map(data, ordinary_fields), - map(boundary_conditions, ordinary_fields), - default_indices(3), - map(instantiated_location, ordinary_fields), - grid, args...; kwargs...) - end - - return nothing + return tuple(ordinary_fields...) end ##### From df48234fa4d34282b775c5d904ffb8387b9ebd10 Mon Sep 17 00:00:00 2001 From: Joseph Kump Date: Tue, 12 Nov 2024 12:28:16 -0600 Subject: [PATCH 28/29] Update src/Fields/field_tuples.jl Co-authored-by: Gregory L. Wagner --- src/Fields/field_tuples.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Fields/field_tuples.jl b/src/Fields/field_tuples.jl index 85170fe793..cc75269b65 100644 --- a/src/Fields/field_tuples.jl +++ b/src/Fields/field_tuples.jl @@ -85,7 +85,7 @@ function tupled_fill_halo_regions!(fields, args...; kwargs...) end # Version where grid is provided: -function tupled_fill_halo_regions!(fields, grid, args...; kwargs...) +function tupled_fill_halo_regions!(fields, grid::AbstractGrid, args...; kwargs...) ordinary_fields = produce_ordinary_fields(fields, args...; kwargs) From 8947a3be1bcc43f18f6187966afb4c32240a4921 Mon Sep 17 00:00:00 2001 From: jlk9 Date: Sat, 23 Nov 2024 13:34:22 -0500 Subject: [PATCH 29/29] Fixed variable name conflict error --- .../explicit_free_surface.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl index 260cf553aa..1362417df6 100644 --- a/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl +++ b/src/Models/HydrostaticFreeSurfaceModels/explicit_free_surface.jl @@ -84,12 +84,12 @@ explicit_ab2_step_free_surface!(free_surface, model, Δt, χ) = @inbounds η[i, j, Nz+1] += ζⁿ * η⁻[i, j, k] + γⁿ * (η[i, j, k] + convert(FT, Δt) * Gⁿ[i, j, k]) end -@kernel function _explicit_ab2_step_free_surface!(η, Δt, χ, Gηⁿ, Gη⁻, Nz) where FT +@kernel function _explicit_ab2_step_free_surface!(η, Δt, χ, Gηⁿ, Gη⁻, Nz) i, j = @index(Global, NTuple) - FT = typeof(χ) - one_point_five = convert(FT, 1.5) - oh_point_five = convert(FT, 0.5) - not_euler = χ != convert(FT, -0.5) + FT0 = typeof(χ) + one_point_five = convert(FT0, 1.5) + oh_point_five = convert(FT0, 0.5) + not_euler = χ != convert(FT0, -0.5) @inbounds begin Gη = (one_point_five + χ) * Gηⁿ[i, j, Nz+1] - (oh_point_five + χ) * Gη⁻[i, j, Nz+1] * not_euler η[i, j, Nz+1] += Δt * Gη