Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Use MUMPS.jl to solve LU decomposition in parallel #311

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion moment_kinetics/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
MUMPS = "55d2b088-9f4e-11e9-26c0-150b02ea6a46"
Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Expand Down Expand Up @@ -50,11 +51,12 @@ file_io_netcdf = "NCDatasets"
manufactured_solns_ext = ["Symbolics", "IfElse"]

[compat]
MUMPS = "1.5.0"
julia = "1.9.0"

[extras]
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Random", "Test", "NCDatasets"]
test = ["Random", "Test"]
98 changes: 74 additions & 24 deletions moment_kinetics/src/electron_kinetic_equation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ module electron_kinetic_equation

using LinearAlgebra
using MPI
using MUMPS
using SparseArrays

export get_electron_critical_velocities
Expand Down Expand Up @@ -1217,7 +1218,7 @@ pressure \$p_{e∥}\$.
end

if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval
global_rank[] == 0 && println("recalculating precon")
global_rank[] == 0 && println("recalculating precon :electron_lu 1")
nl_solver_params.solves_since_precon_update[] = 0
nl_solver_params.precon_dt[] = t_params.dt[]

Expand All @@ -1230,28 +1231,48 @@ global_rank[] == 0 && println("recalculating precon")
vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
external_source_settings, num_diss_params, t_params, ion_dt,
ir, evolve_ppar)

mumps = Mumps{mk_float}(mumps_unsymmetric, get_icntl(), default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
begin_serial_region()
if block_rank[] == 0
if size(orig_lu) == (1, 1)
_block_synchronize()
if block_rank[] >= 0
if orig_lu.n == 0#size(orig_lu, 1) == 1
# Have not properly created the LU decomposition before, so
# cannot reuse it.
@timeit_debug global_timer "lu" nl_solver_params.preconditioners[ir] =
(lu(sparse(precon_matrix)), precon_matrix, input_buffer,
output_buffer)
@timeit_debug global_timer "lu" begin
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mumps, precon_matrix, input_buffer, output_buffer)
end
else
# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
try
@timeit_debug global_timer "lu!" lu!(orig_lu, sparse(precon_matrix); check=false)
@timeit_debug global_timer "lu!" begin
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
MUMPS.factorize!(orig_lu)
end
catch e
if !isa(e, ArgumentError)
rethrow(e)
end
println("Sparsity pattern of matrix changed, rebuilding "
* " LU from scratch")
@timeit_debug global_timer "lu" orig_lu = lu(sparse(precon_matrix))
@timeit_debug global_timer "lu" begin
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mumps, precon_matrix, input_buffer, output_buffer)
end

end
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
Expand Down Expand Up @@ -1280,11 +1301,16 @@ global_rank[] == 0 && println("recalculating precon")
counter += 1
end

begin_serial_region()
@serial_region begin
@timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer)
begin_serial_region()
this_output_buffer .= this_input_buffer
if block_rank[] == 0
MUMPS.associate_rhs!(precon_lu, this_output_buffer)
end

_block_synchronize()
@timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)
_block_synchronize()

begin_serial_region()
counter = 1
@loop_z_vperp_vpa iz ivperp ivpa begin
Expand Down Expand Up @@ -1338,6 +1364,7 @@ global_rank[] == 0 && println("recalculating precon")
left_preconditioner = identity
right_preconditioner = lu_precon!
elseif nl_solver_params.preconditioner_type === Val(:electron_adi)
global_rank[] == 0 && println("recalculating precon :electron_adi")

if t_params.dt[] > 1.5 * nl_solver_params.precon_dt[] ||
t_params.dt[] < 2.0/3.0 * nl_solver_params.precon_dt[]
Expand All @@ -1347,7 +1374,6 @@ global_rank[] == 0 && println("recalculating precon")
end

if nl_solver_params.solves_since_precon_update[] ≥ nl_solver_params.preconditioner_update_interval
global_rank[] == 0 && println("recalculating precon")
nl_solver_params.solves_since_precon_update[] = 0
nl_solver_params.precon_dt[] = t_params.dt[]

Expand Down Expand Up @@ -1931,7 +1957,7 @@ to allow the outer r-loop to be parallelised.

function recalculate_preconditioner!()
if nl_solver_params.preconditioner_type === Val(:electron_lu)
global_rank[] == 0 && println("recalculating precon")
global_rank[] == 0 && println("recalculating precon :electron_lu 2")
nl_solver_params.solves_since_precon_update[] = 0
nl_solver_params.precon_dt[] = ion_dt

Expand All @@ -1944,27 +1970,47 @@ global_rank[] == 0 && println("recalculating precon")
z_advect, vpa_advect, scratch_dummy, external_source_settings,
num_diss_params, t_params, ion_dt, ir, true, :all, true, false)

mumps = Mumps{mk_float}(mumps_unsymmetric, get_icntl(), default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
begin_serial_region()
if block_rank[] == 0
if size(orig_lu) == (1, 1)
_block_synchronize()
if block_rank[] >= 0
if orig_lu.n == 0#size(orig_lu, 1) == 1
# Have not properly created the LU decomposition before, so
# cannot reuse it.
@timeit_debug global_timer "lu" nl_solver_params.preconditioners[ir] =
(lu(sparse(precon_matrix)), precon_matrix, input_buffer,
output_buffer)
@timeit_debug global_timer "lu" begin
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mumps, precon_matrix, input_buffer, output_buffer)
end
else
# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
try
@timeit_debug global_timer "lu!" lu!(orig_lu, sparse(precon_matrix); check=false)
@timeit_debug global_timer "lu!" begin
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
MUMPS.factorize!(orig_lu)
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
end
catch e
if !isa(e, ArgumentError)
rethrow(e)
end
println("Sparsity pattern of matrix changed, rebuilding "
* " LU from scratch")
@timeit_debug global_timer "lu" orig_lu = lu(sparse(precon_matrix))
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
@timeit_debug global_timer "lu" MUMPS.factorize!(orig_lu)
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
end
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
Expand Down Expand Up @@ -2008,9 +2054,13 @@ global_rank[] == 0 && println("recalculating precon")
end

begin_serial_region()
@serial_region begin
@timeit_debug global_timer "ldiv!" ldiv!(this_output_buffer, precon_lu, this_input_buffer)
end
this_output_buffer .= this_input_buffer
if block_rank[] == 0
MUMPS.associate_rhs!(precon_lu, this_output_buffer)
end
_block_synchronize()
@timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)
_block_synchronize()

begin_serial_region()
counter = 1
Expand Down
13 changes: 8 additions & 5 deletions moment_kinetics/src/nonlinear_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ using ..type_definitions: mk_float, mk_int

using LinearAlgebra
using MPI
using MUMPS
using SparseArrays
using StatsBase: mean

Expand Down Expand Up @@ -167,22 +168,24 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
end
end

mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
if preconditioner_type === Val(:lu)
# Create dummy LU solver objects so we can create an array for preconditioners.
# These will be calculated properly within the time loop.
preconditioners = fill(lu(sparse(1.0*I, total_size_coords, total_size_coords)),
preconditioners = fill(mumps,
reverse(outer_coord_sizes))
elseif preconditioner_type === Val(:electron_split_lu)
preconditioners = (z=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)),
preconditioners = (z=fill(mumps,
tuple(coords.vpa.n, reverse(outer_coord_sizes)...)),
vpa=fill(lu(sparse(1.0*I, coords.vpa.n, coords.vpa.n)),
vpa=fill(mpqirstruct,
tuple(coords.z.n, reverse(outer_coord_sizes)...)),
ppar=fill(lu(sparse(1.0*I, coords.z.n, coords.z.n)),
ppar=fill(mumps,
reverse(outer_coord_sizes)),
)
elseif preconditioner_type === Val(:electron_lu)
pdf_plus_ppar_size = total_size_coords + coords.z.n
preconditioners = fill((lu(sparse(1.0*I, 1, 1)),
preconditioners = fill((mumps,
allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size),
allocate_shared_float(pdf_plus_ppar_size),
allocate_shared_float(pdf_plus_ppar_size),
Expand Down