Skip to content

Commit

Permalink
runs fast - not checked if correct though
Browse files Browse the repository at this point in the history
  • Loading branch information
James Cook committed Jan 23, 2025
1 parent 792de1d commit 5f2a96b
Show file tree
Hide file tree
Showing 3 changed files with 62 additions and 42 deletions.
4 changes: 2 additions & 2 deletions moment_kinetics/Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
MPIQR = "4acd9526-7714-4958-a7d5-1632fb49fc8a"
MUMPS = "55d2b088-9f4e-11e9-26c0-150b02ea6a46"
Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
Expand Down Expand Up @@ -51,7 +51,7 @@ file_io_netcdf = "NCDatasets"
manufactured_solns_ext = ["Symbolics", "IfElse"]

[compat]
MPIQR = "0.1.0"
MUMPS = "1.5.0"
julia = "1.9.0"

[extras]
Expand Down
84 changes: 52 additions & 32 deletions moment_kinetics/src/electron_kinetic_equation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ module electron_kinetic_equation

using LinearAlgebra
using MPI
using MPIQR
using MUMPS
using SparseArrays

export get_electron_critical_velocities
Expand Down Expand Up @@ -72,8 +72,6 @@ import ..runge_kutta

const integral_correction_sharpness = 4.0

const mpiqr_blocksize = 8 # 1 will be slow, 4 is good for smaller matrices, 8 is good for larger matrices, 16 - who knows?!

"""
update_electron_pdf is a function that uses the electron kinetic equation
to solve for the updated electron pdf
Expand Down Expand Up @@ -1233,38 +1231,46 @@ pressure \$p_{e∥}\$.
vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
external_source_settings, num_diss_params, t_params, ion_dt,
ir, evolve_ppar)

mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
if block_rank[] >= 0
localcols = MPIQR.localcolumns(block_rank[], size(precon_matrix, 2),
mpiqr_blocksize, block_size[])
if size(orig_lu, 1) == 1
if orig_lu.n == 0#size(orig_lu, 1) == 1
# Have not properly created the LU decomposition before, so
# cannot reuse it.
@timeit_debug global_timer "lu" begin
mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols],
size(precon_matrix); blocksize=mpiqr_blocksize, comm=comm_block[])
mpiqrstruct = qr!(mpiqrmatrix)
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mpiqrstruct, precon_matrix, input_buffer, output_buffer)
(mumps, precon_matrix, input_buffer, output_buffer)
end
else
# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
try
@timeit_debug global_timer "lu!" begin
orig_lu[:, localcols] .= precon_matrix[:, localcols]
qr!(orig_lu)
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
MUMPS.factorize!(orig_lu)
end
catch e
if !isa(e, ArgumentError)
rethrow(e)
end
println("Sparsity pattern of matrix changed, rebuilding "
* " LU from scratch")
mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols],
size(precon_matrix); blocksize=mpiqr_blocksize, comm=comm_block[])
@timeit_debug global_timer "lu" orig_lu = qr!(mpiqrmatrix)
@timeit_debug global_timer "lu" begin
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mumps, precon_matrix, input_buffer, output_buffer)
end

end
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
Expand Down Expand Up @@ -1294,7 +1300,10 @@ pressure \$p_{e∥}\$.
end

this_output_buffer .= this_input_buffer
@timeit_debug global_timer "ldiv!" MPIQR.ldiv!(precon_lu, this_output_buffer)
if block_rank[] == 0
MUMPS.associate_rhs!(precon_lu, this_output_buffer)
end
@timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)

begin_serial_region()
counter = 1
Expand Down Expand Up @@ -1955,37 +1964,45 @@ to allow the outer r-loop to be parallelised.
z_advect, vpa_advect, scratch_dummy, external_source_settings,
num_diss_params, t_params, ion_dt, ir, true, :all, true, false)

mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
if block_rank[] >= 0
localcols = MPIQR.localcols(block_rank[], size(precon_matrix, 2),
mpiqr_blocksize, block_size[])
if size(orig_lu, 1) == 1
if orig_lu.n == 0#size(orig_lu, 1) == 1
# Have not properly created the LU decomposition before, so
# cannot reuse it.
@timeit_debug global_timer "lu" begin
mpiqrmatrix = MPIQR.MPIQRMatrix((precon_matrix[:, localcols]), size(precon_matrix);
blocksize=mpiqr_blocksize, comm=comm_block[])
mpiqrstruct = qr!(mpiqrmatrix)
nl_solver_params.preconditioners[ir] =
(mpiqrstruct, precon_matrix, input_buffer, output_buffer)
if block_rank[] == 0
MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
end
MUMPS.factorize!(mumps)
nl_solver_params.preconditioners[ir] =
(mumps, precon_matrix, input_buffer, output_buffer)
end
else
# LU decomposition was previously created. The Jacobian always
# has the same sparsity pattern, so by using `lu!()` we can
# reuse some setup.
try
@timeit_debug global_timer "lu!" begin
orig_lu[:, localcols] .= precon_matrix[:, localcols]
qr!(orig_lu)
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
MUMPS.factorize!(orig_lu)
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
end
catch e
if !isa(e, ArgumentError)
rethrow(e)
end
println("Sparsity pattern of matrix changed, rebuilding "
* " LU from scratch")
mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols], size(precon_matrix);
blocksize=mpiqr_blocksize, comm=comm_block[])
@timeit_debug global_timer "lu" orig_lu = qr!(mpiqrmatrix)
if block_rank[] == 0
MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
end
@timeit_debug global_timer "lu" MUMPS.factorize!(orig_lu)
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
end
nl_solver_params.preconditioners[ir] =
(orig_lu, precon_matrix, input_buffer, output_buffer)
Expand Down Expand Up @@ -2028,8 +2045,11 @@ to allow the outer r-loop to be parallelised.
counter += 1
end

this_output_buffer .= this_input_buffer
@timeit_debug global_timer "ldiv!" MPIQR.ldiv!(precon_lu, this_output_buffer)
this_output_buffer .= this_input_buffer
if block_rank[] == 0
MUMPS.associate_rhs!(precon_lu, this_output_buffer)
end
@timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)

begin_serial_region()
counter = 1
Expand Down
16 changes: 8 additions & 8 deletions moment_kinetics/src/nonlinear_solvers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ using ..type_definitions: mk_float, mk_int

using LinearAlgebra
using MPI
using MPIQR
using MUMPS
using SparseArrays
using StatsBase: mean

Expand Down Expand Up @@ -167,25 +167,25 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
V .= 0.0
end
end
mpiqr = MPIQR.MPIQRMatrix(zeros(Float64, 1, 1), (1, block_size[]);
blocksize=1, comm=comm_block[])
mpiqrstruct = MPIQR.MPIQRStruct(mpiqr)

mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
if preconditioner_type === Val(:lu)
# Create dummy LU solver objects so we can create an array for preconditioners.
# These will be calculated properly within the time loop.
preconditioners = fill(mpiqrstruct,
preconditioners = fill(mumps,
reverse(outer_coord_sizes))
elseif preconditioner_type === Val(:electron_split_lu)
preconditioners = (z=fill(mpiqrstruct,
preconditioners = (z=fill(mumps,
tuple(coords.vpa.n, reverse(outer_coord_sizes)...)),
vpa=fill(mpqirstruct,
tuple(coords.z.n, reverse(outer_coord_sizes)...)),
ppar=fill(mpiqrstruct,
ppar=fill(mumps,
reverse(outer_coord_sizes)),
)
elseif preconditioner_type === Val(:electron_lu)
pdf_plus_ppar_size = total_size_coords + coords.z.n
preconditioners = fill((mpiqrstruct,
preconditioners = fill((mumps,
allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size),
allocate_shared_float(pdf_plus_ppar_size),
allocate_shared_float(pdf_plus_ppar_size),
Expand Down

0 comments on commit 5f2a96b

Please sign in to comment.