Skip to content

Commit

Permalink
Update for MPIStaticCondensation.jl with shared-memory MPI support
Browse files Browse the repository at this point in the history
  • Loading branch information
johnomotani committed Feb 24, 2025
1 parent beb2ffc commit 02d28fc
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 28 deletions.
49 changes: 23 additions & 26 deletions moment_kinetics/src/electron_kinetic_equation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1365,28 +1365,27 @@ global_rank[] == 0 && println("recalculating precon")
ir, evolve_ppar, :all, false, false)

begin_serial_region()
if block_rank[] == 0
# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
A_factorization = static_condensation.A_factorization
C = static_condensation.C
pdf_size = z.n * vperp.n * vpa.n
ppar_size = z.n
A_factorization = CondensedFactorization(precon_matrix[1:pdf_size,1:pdf_size],
A_factorization.local_blocks;
sparse_local_blocks=true)
C .= precon_matrix[pdf_size+1:pdf_size+ppar_size,1:pdf_size]
@views update_schur_complement!(
static_condensation, A_factorization,
precon_matrix[1:pdf_size,pdf_size+1:pdf_size+ppar_size], C,
precon_matrix[pdf_size+1:pdf_size+ppar_size,pdf_size+1:pdf_size+ppar_size])
nl_solver_params.preconditioners[ir] =
(static_condensation, precon_matrix, pdf_buffer, ppar_buffer)
else
nl_solver_params.preconditioners[ir] =
(static_condensation, precon_matrix, pdf_buffer, ppar_buffer)
end

# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
A_factorization = static_condensation.A_factorization
C = static_condensation.C
pdf_size = z.n * vperp.n * vpa.n
ppar_size = z.n
A_factorization = CondensedFactorization(precon_matrix[1:pdf_size,1:pdf_size],
A_factorization.local_blocks;
sparse_local_blocks=true,
shared_MPI_comm=comm_block[],
joining_elements_rhs_buffer=A_factorization.joining_elements_rhs,
joining_elements_solution_buffer=A_factorization.joining_elements_solution)
C .= precon_matrix[pdf_size+1:pdf_size+ppar_size,1:pdf_size]
@views update_schur_complement!(
static_condensation, A_factorization,
precon_matrix[1:pdf_size,pdf_size+1:pdf_size+ppar_size], C,
precon_matrix[pdf_size+1:pdf_size+ppar_size,pdf_size+1:pdf_size+ppar_size])
nl_solver_params.preconditioners[ir] =
(static_condensation, precon_matrix, pdf_buffer, ppar_buffer)
end

@timeit_debug global_timer static_condensation_precon!(x) = begin
Expand All @@ -1404,10 +1403,8 @@ global_rank[] == 0 && println("recalculating precon")
this_ppar_buffer[iz] = precon_ppar[iz]
end

begin_serial_region()
@serial_region begin
@timeit_debug global_timer "ldiv!" ldiv!(vec(precon_f), precon_ppar, precon_static_condensation, vec(this_pdf_buffer), this_ppar_buffer)
end
_block_synchronize()
@timeit_debug global_timer "ldiv!" ldiv!(vec(precon_f), precon_ppar, precon_static_condensation, vec(this_pdf_buffer), this_ppar_buffer, global_timer)

# Ensure values of precon_f and precon_ppar are consistent across
# distributed-MPI block boundaries. For precon_f take the upwind
Expand Down
14 changes: 12 additions & 2 deletions moment_kinetics/src/nonlinear_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,17 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
error("Created a block with indices greater than the matrix size.")
end

n_local_blocks_points = sum(length(b) for b local_blocks)
n_joining_elements_points = pdf_size - n_local_blocks_points
joining_elements_rhs_buffer = allocate_shared_float(n_joining_elements_points)
joining_elements_solution_buffer = allocate_shared_float(n_joining_elements_points)

A_factorization = CondensedFactorization(dummy_A, local_blocks;
sparse_local_blocks=true)
sparse_local_blocks=true,
shared_MPI_comm=comm_block[],
joining_elements_rhs_buffer=joining_elements_rhs_buffer,
joining_elements_solution_buffer=joining_elements_solution_buffer)

Ainv_dot_B = allocate_shared_float(pdf_size, ppar_size)
C = allocate_shared_float(ppar_size, pdf_size)
schur_complement = allocate_shared_float(ppar_size, ppar_size)
Expand All @@ -348,7 +357,8 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
bottom_vec_buffer = allocate_shared_float(ppar_size)
sc = MPISchurComplement(A_factorization, Ainv_dot_B, C, schur_complement,
schur_complement_lu, Ainv_dot_u, top_vec_buffer,
bottom_vec_buffer)
bottom_vec_buffer, comm_block[], block_rank[],
block_size[])
return sc
end
pdf_plus_ppar_size = total_size_coords + coords.z.n
Expand Down

0 comments on commit 02d28fc

Please sign in to comment.