Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to model performance by reducing allocations #871

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
152 changes: 39 additions & 113 deletions src/ecosystem/corals/growth.jl
Original file line number Diff line number Diff line change
Expand Up @@ -494,17 +494,20 @@ function _shift_distributions!(
# Weight distributions based on growth rate and cover
# Do from largest size class to size class 2
# (values for size class 1 gets replaced by recruitment process)
prop_growth = MVector{2,F}(0.0, 0.0)
for i in length(growth_rate):-1:2
# Skip size class if nothing is moving up
sum(@view(cover[(i - 1):i])) == 0.0 ? continue : false
sum(view(cover, (i - 1):i)) == 0.0 ? continue : false
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any advantage of using view instead of the macro @view here? (I'm asking because the macro seems easier to read to me)

Copy link
Collaborator Author

@ConnectedSystems ConnectedSystems Oct 4, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now I'm not sure which is better between view() and @view() (from what I understand the difference should be minimal?) but they both beat @views, at least when slicing a single matrix (size: 79x3806)

view() @view() @views
image image image

https://www.juliabloggers.com/the-view-and-views-macros-are-you-sure-you-know-how-they-work/

https://discourse.julialang.org/t/difference-between-view-and-view/40798


prop_growth = @views (cover[(i - 1):i] ./ sum(cover[(i - 1):i])) .*
prop_growth .= @views (cover[(i - 1):i] ./ sum(cover[(i - 1):i])) .*
(growth_rate[(i - 1):i] ./ sum(growth_rate[(i - 1):i]))
if sum(prop_growth) == 0.0
continue
end

dist_t[i] = sum(@view(dist_t[(i - 1):i]), Weights(prop_growth ./ sum(prop_growth)))
# Weighted sum
dist_t[i] =
(dist_t[i - 1] * prop_growth[1] + dist_t[i] * prop_growth[2]) / sum(prop_growth)
end

return nothing
Expand Down Expand Up @@ -577,86 +580,6 @@ function adjust_DHW_distribution!(
return nothing
end

"""
settler_DHW_tolerance!(c_mean_t_1::Matrix{F}, c_mean_t::Matrix{F}, k_area::Vector{F}, tp::AbstractMatrix{F}, settlers::Matrix{F}, fec_params_per_m²::Vector{F}, h²::F)::Nothing where {F<:Float64}

Update DHW tolerance means of newly settled corals into the distributions for the sink
locations.

# Arguments
- `c_mean_t_1` : DHW tolerance distribution for previous timestep (t - 1)
- `c_mean_t` : DHW tolerance distribution for current timestep (t)
- `k_area` : Absolute coral habitable area
- `tp` : Transition Probability matrix indicating the proportion of larvae that reaches a
sink location (columns) from a given source location (rows)
Columns should sum to 1
- `settlers` : recruited corals for each sink location
- `fec_params_per_m²` : Fecundity parameters for each species/size class combination
- `h²` : narrow-sense heritability
"""
function settler_DHW_tolerance!(
c_mean_t_1::Matrix{F},
c_mean_t::Matrix{F},
k_area::Vector{F},
tp::AbstractMatrix{F},
settlers::Matrix{F},
fec_params_per_m²::Vector{F},
h²::F,
n_sizes::Int64
)::Nothing where {F<:Float64}
# Potential sink locations (TODO: pass in later)
sink_loc_ids::Vector{Int64} = findall(k_area .> 0.0)

source_locs::BitVector = falses(length(k_area)) # cache for source locations
reproductive_sc::BitVector = falses(n_sizes) # cache for reproductive size classes

settler_sc::StepRange = 1:n_sizes:length(fec_params_per_m²)
for sink_loc in sink_loc_ids
if sum(@views(settlers[:, sink_loc])) .== 0.0
# Only update locations where recruitment occurred
continue
end

# Note: source locations can include the sink location (self-seeding)
source_locs .= @view(tp[:, sink_loc]) .> 0.0

# Calculate contribution to cover to determine weights for each species/group
w = @views settlers[:, sink_loc]' .* tp[source_locs, sink_loc].data
w_per_group = w ./ sum(w; dims=1)
replace!(w_per_group, NaN => 0.0)

# Determine new distribution mean for each species at all locations
for (sp, sc1) in enumerate(settler_sc)
sc1_end::UnitRange{Int64} = sc1:(sc1 + (n_sizes - 1))

# Get distribution mean of reproductive size classes at source locations
# recalling that source locations may include the sink location due to
# self-seeding.
reproductive_sc .= @view(fec_params_per_m²[sc1_end]) .> 0.0
settler_means::SubArray{Float64} = @view(
c_mean_t_1[sc1_end[reproductive_sc], source_locs]
)

# Determine weights based on contribution to recruitment.
# This weights the recruited corals by the size classes and source locations
# which contributed to recruitment.
if sum(w_per_group[:, sp]) > 0.0
ew::Vector{Float64} = repeat(
w_per_group[:, sp]; inner=count(reproductive_sc)
)

# Determine combined mean
# https://en.wikipedia.org/wiki/Mixture_distribution#Properties
recruit_μ::Float64 = sum(settler_means, Weights(ew ./ sum(ew)))

# Mean for generation t is determined through Breeder's equation
c_mean_t[sc1, sink_loc] = breeders(c_mean_t_1[sc1, sink_loc], recruit_μ, h²)
end
end
end

return nothing
end
function settler_DHW_tolerance!(
c_mean_t_1::AbstractArray{F,3},
c_mean_t::AbstractArray{F,3},
Expand All @@ -669,47 +592,48 @@ function settler_DHW_tolerance!(
groups, sizes, _ = axes(c_mean_t_1)

sink_loc_ids::Vector{Int64} = findall(k_area .> 0.0)

source_locs::BitVector = falses(length(k_area)) # cache for source locations
reproductive_sc::BitVector = falses(length(sizes)) # cache for reproductive size classes

for sink_loc in sink_loc_ids
if sum(@views(settlers[:, sink_loc])) .== 0.0
# Number of reproductive size classes for each group
n_reproductive = sum(view(fec_params_per_m², :, :) .> 0.0; dims=2)
reproductive_sc::BitVector = falses(sizes) # cache for reproductive size classes

@inbounds for sink_loc in sink_loc_ids
sink_settlers = view(settlers, :, sink_loc)'
if sum(sink_settlers) .== 0.0
# Only update locations where recruitment occurred
continue
end

# Find sources for each sink
# Note: source locations can include the sink location (self-seeding)
source_locs .= @view(tp[:, sink_loc]) .> 0.0
source_locs .= view(tp.data, :, sink_loc) .> 0.0

# Calculate contribution to cover to determine weights for each species/group
w = @views settlers[:, sink_loc]' .* tp.data[source_locs, sink_loc]
w_per_group = w ./ sum(w; dims=1)
replace!(w_per_group, NaN => 0.0)
# Calculate contribution to cover to determine weights for each functional group
w::Matrix{Float64} = sink_settlers .* view(tp.data, source_locs, sink_loc)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make a relevant difference to cache this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The tricky thing is the number of sources can change.

But maybe there is a way of preallocating and then resizing. Maybe it will avoid excessive triggering of the GC... let me have a think.

Copy link
Collaborator Author

@ConnectedSystems ConnectedSystems Oct 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't think of a way to cache the weights matrix because the number of source locations change as I said, and making the cache matrix dynamic resize involved a few more intermediate steps which would increase overall allocations (at least the way I was thinking of it).

I did play with the idea that we can cache the result outside the function given the number of source locations are constant, but this is only true in our current case where we use mean connectivity. If/when we move to variable/dynamic connectivity data, then we'd need this implementation anyway.

I did think of some other tweaks which helped a tiny bit though:

image

w_per_group::Matrix{Float64} = w ./ sum(w; dims=1)
ew::Vector{Float64} = zeros(count(source_locs))

for grp in groups
# Get distribution mean of reproductive size classes at source locations
# recalling that source locations may include the sink location due to
# self-seeding.
reproductive_sc .= @view(fec_params_per_m²[grp, :]) .> 0.0
settler_means::SubArray{Float64} = @view(
c_mean_t_1[grp, reproductive_sc, source_locs]
)

# Determine weights based on contribution to recruitment.
# This weights the recruited corals by the size classes and source locations
# which contributed to recruitment.
if sum(w_per_group[:, grp]) > 0.0
ew::Vector{Float64} = repeat(
w_per_group[:, grp]; inner=count(reproductive_sc)
)
if sum(view(w_per_group, :, grp)) > 0.0
# Get distribution mean of reproductive size classes at source locations
# recalling that source locations may include the sink location due to
# self-seeding.
reproductive_sc .= view(fec_params_per_m², grp, :) .> 0.0

# Determine combined mean
# https://en.wikipedia.org/wiki/Mixture_distribution#Properties
recruit_μ::Float64 = sum(settler_means, Weights(ew ./ sum(ew)))
ew .= view(w_per_group, :, grp)
settler_means::SubArray = view(
c_mean_t_1, grp, reproductive_sc, source_locs
)
recruit_μ::Float64 = sum(settler_means .* ew') / n_reproductive[grp]

# Mean for generation t is determined through Breeder's equation
c_mean_t[grp, 1, sink_loc] = breeders(
@views c_mean_t[grp, 1, sink_loc] = breeders(
c_mean_t_1[grp, 1, sink_loc], recruit_μ, h²
)
end
Expand Down Expand Up @@ -908,16 +832,18 @@ Area covered by recruited larvae (in m²)
function settler_cover(
fec_scope::T,
conn::AbstractMatrix{Float64},
leftover_space::T,
leftover_space::V,
α::V,
β::V,
basal_area_per_settler::V,
potential_settlers::T
)::T where {T<:Matrix{Float64},V<:Vector{Float64}}
potential_settlers::T;
valid_sources::BitVector=falses(size(conn, 2)),
valid_sinks::BitVector=falses(size(conn, 1))
)::T where {T<:AbstractMatrix{Float64},V<:Vector{Float64}}

# Determine active sources and sinks
valid_sources::BitVector = vec(sum(conn; dims=2) .> 0.0)
valid_sinks::BitVector = vec(sum(conn; dims=1) .> 0.0)
valid_sources .= vec(sum(conn.data; dims=2) .> 0.0)
valid_sinks .= vec(sum(conn.data; dims=1) .> 0.0)

# Send larvae out into the world (reuse potential_settlers to reduce allocations)
# Note, conn rows need not sum to 1.0 as this missing probability accounts for larvae
Expand All @@ -926,8 +852,8 @@ function settler_cover(
# this is known as in-water mortality.
# Set to 0.0 as it is now taken care of by connectivity data.
# Mwater::Float64 = 0.0
@views potential_settlers[:, valid_sinks] .= (
fec_scope[:, valid_sources] * conn[valid_sources, valid_sinks]
@views @inbounds potential_settlers[:, valid_sinks] .= (
fec_scope[:, valid_sources] * conn.data[valid_sources, valid_sinks]
)

# Larvae have landed, work out how many are recruited
Expand Down
30 changes: 17 additions & 13 deletions src/scenario.jl
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ function run_model(domain::Domain, param_set::Union{DataFrameRow,YAXArray})::Nam
n_sizes::Int64 = domain.coral_growth.n_sizes
n_groups::Int64 = domain.coral_growth.n_groups
_bin_edges::Matrix{Float64} = bin_edges()
functional_groups = [
functional_groups = Vector{FunctionalGroup}[
FunctionalGroup.(
eachrow(_bin_edges[:, 1:(end - 1)]),
eachrow(_bin_edges[:, 2:end]),
Expand All @@ -425,6 +425,7 @@ function run_model(
param_set::DataFrameRow,
functional_groups::Vector{Vector{FunctionalGroup}}
)::NamedTuple
setup()
ps = DataCube(Vector(param_set); factors=names(param_set))
return run_model(domain, ps, functional_groups)
end
Expand Down Expand Up @@ -531,8 +532,9 @@ function run_model(
loc_cover_cache = zeros(n_locs)

# Locations that can support corals
vec_abs_k = site_k_area(domain)
habitable_locs::BitVector = location_k(domain) .> 0.0
habitable_loc_areas = site_k_area(domain)[habitable_locs]
habitable_loc_areas = vec_abs_k[habitable_locs]
habitable_loc_areas′ = reshape(habitable_loc_areas, (1, 1, length(habitable_locs)))

# Avoid placing importance on sites that were not considered
Expand Down Expand Up @@ -671,7 +673,7 @@ function run_model(
p.r .= _to_group_size(domain.coral_growth, corals.growth_rate)
p.mb .= _to_group_size(domain.coral_growth, corals.mb_rate)

area_weighted_conn = conn .* site_k_area(domain)
area_weighted_conn = conn .* vec_abs_k
conn_cache = similar(area_weighted_conn.data)

# basal_area_per_settler is the area in m^2 of a size class one coral
Expand All @@ -692,7 +694,7 @@ function run_model(
# Empty the old contents of the buffers and add the new blocks
cover_view = [@view C_cover[1, :, :, loc] for loc in 1:n_locs]
functional_groups = reuse_buffers!.(
functional_groups, (cover_view .* vec(loc_k_area))
functional_groups, (cover_view .* vec_abs_k)
)

# Preallocate memory for temporaries
Expand All @@ -708,6 +710,7 @@ function run_model(
)

FLoops.assistant(false)
habitable_loc_idxs = findall(habitable_locs)
for tstep::Int64 in 2:tf
# Convert cover to absolute values to use within CoralBlox model
C_cover_t[:, :, habitable_locs] .=
Expand All @@ -725,7 +728,7 @@ function run_model(
lin_ext_scale_factors[_loc_coral_cover(C_cover_t)[habitable_locs] .< (0.7 .* habitable_loc_areas)] .=
1

@floop for i in findall(habitable_locs)
@floop for i in habitable_loc_idxs
# TODO Skip when _loc_rel_leftover_space[i] == 0

# Perform timestep
Expand All @@ -737,7 +740,7 @@ function run_model(
)

# Write to the cover matrix
coral_cover(functional_groups[i], @view(C_cover_t[:, :, i]))
coral_cover(functional_groups[i], view(C_cover_t, :, :, i))
end

# Check if size classes are inappropriately out-growing habitable area
Expand All @@ -746,14 +749,15 @@ function run_model(
) "Cover outgrowing habitable area"

# Convert C_cover_t to relative values after CoralBlox was run
C_cover_t[:, :, habitable_locs] .=
C_cover_t[:, :, habitable_locs] ./ habitable_loc_areas′
C_cover_t[:, :, habitable_locs] .= (
view(C_cover_t, :, :, habitable_locs) ./ habitable_loc_areas′
)
C_cover[tstep, :, :, habitable_locs] .= C_cover_t[:, :, habitable_locs]

# Natural adaptation (doesn't change C_cover_t)
if tstep <= tf
adjust_DHW_distribution!(
@view(C_cover[(tstep - 1), :, :, :]), c_mean_t, p.r
view(C_cover, (tstep - 1), :, :, :), c_mean_t, p.r
)

# Set values for t to t-1
Expand All @@ -772,15 +776,15 @@ function run_model(
fecundity_scope!(fec_scope, fec_all, fec_params_per_m², C_cover_t, loc_k_area)

loc_coral_cover = _loc_coral_cover(C_cover_t)
leftover_space_m² = relative_leftover_space(loc_coral_cover) .* loc_k_area'
leftover_space_m² = relative_leftover_space(loc_coral_cover) .* vec_abs_k

# Reset potential settlers to zero
potential_settlers .= 0.0
recruitment .= 0.0

# Recruitment represents additional cover, relative to total site area
# Recruitment/settlement occurs after the full moon in October/November
recruitment[:, habitable_locs] .=
@views recruitment[:, habitable_locs] .=
settler_cover(
fec_scope,
conn,
Expand All @@ -796,7 +800,7 @@ function run_model(
settler_DHW_tolerance!(
c_mean_t_1,
c_mean_t,
vec(loc_k_area),
vec_abs_k,
TP_data, # ! IMPORTANT: Pass in transition probability matrix, not connectivity!
recruitment,
fec_params_per_m²,
Expand Down Expand Up @@ -930,7 +934,7 @@ function run_model(
seed_locs = findall(log_location_ranks.locations .∈ [selected_seed_ranks])
seed_corals!(
C_cover_t,
vec(loc_k_area),
vec_abs_k,
vec(leftover_space_m²),
seed_locs, # use location indices
seeded_area,
Expand Down
Loading