runs fast - not checked if correct though

mabarnes · Jan 23, 2025 · 5f2a96b · 5f2a96b
1 parent 792de1d
commit 5f2a96b
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 42 deletions.
diff --git a/moment_kinetics/Project.toml b/moment_kinetics/Project.toml
@@ -19,7 +19,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
 LsqFit = "2fda8390-95c7-5789-9bda-21331edee243"
 MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
 MPIPreferences = "3da0fdf6-3ccc-4f1b-acd9-58baa6c99267"
-MPIQR = "4acd9526-7714-4958-a7d5-1632fb49fc8a"
+MUMPS = "55d2b088-9f4e-11e9-26c0-150b02ea6a46"
 Measures = "442fdcdd-2543-5da2-b0f3-8c86c306513e"
 OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
 Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
@@ -51,7 +51,7 @@ file_io_netcdf = "NCDatasets"
 manufactured_solns_ext = ["Symbolics", "IfElse"]
 
 [compat]
-MPIQR = "0.1.0"
+MUMPS = "1.5.0"
 julia = "1.9.0"
 
 [extras]

diff --git a/moment_kinetics/src/electron_kinetic_equation.jl b/moment_kinetics/src/electron_kinetic_equation.jl
@@ -2,7 +2,7 @@ module electron_kinetic_equation
 
 using LinearAlgebra
 using MPI
-using MPIQR
+using MUMPS
 using SparseArrays
 
 export get_electron_critical_velocities
@@ -72,8 +72,6 @@ import ..runge_kutta
 
 const integral_correction_sharpness = 4.0
 
-const mpiqr_blocksize = 8 # 1 will be slow, 4 is good for smaller matrices, 8 is good for larger matrices, 16 - who knows?!
-
 """
 update_electron_pdf is a function that uses the electron kinetic equation 
 to solve for the updated electron pdf
@@ -1233,38 +1231,46 @@ pressure \$p_{e∥}\$.
                 vperp_spectral, vpa_spectral, z_advect, vpa_advect, scratch_dummy,
                 external_source_settings, num_diss_params, t_params, ion_dt,
                 ir, evolve_ppar)
-
+            mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
+                                    comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
             if block_rank[] >= 0
-                localcols = MPIQR.localcolumns(block_rank[], size(precon_matrix, 2),
-                                               mpiqr_blocksize, block_size[])
-                if size(orig_lu, 1) == 1
+                if orig_lu.n == 0#size(orig_lu, 1) == 1
                     # Have not properly created the LU decomposition before, so
                     # cannot reuse it.
                     @timeit_debug global_timer "lu" begin
-                        mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols],
-                          size(precon_matrix); blocksize=mpiqr_blocksize, comm=comm_block[])
-                        mpiqrstruct = qr!(mpiqrmatrix)
+                        if block_rank[] == 0
+                          MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
+                        end
+                        MUMPS.factorize!(mumps)
                         nl_solver_params.preconditioners[ir] =
-                          (mpiqrstruct, precon_matrix, input_buffer, output_buffer)
+                          (mumps, precon_matrix, input_buffer, output_buffer)
                     end
                 else
                     # LU decomposition was previously created. The Jacobian always
                     # has the same sparsity pattern, so by using `lu!()` we can
                     # reuse some setup.
                     try
                         @timeit_debug global_timer "lu!" begin
-                          orig_lu[:, localcols] .= precon_matrix[:, localcols]
-                          qr!(orig_lu)
+                          if block_rank[] == 0
+                            MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
+                          end
+                          MUMPS.factorize!(orig_lu)
                         end
                     catch e
                         if !isa(e, ArgumentError)
                             rethrow(e)
                         end
                         println("Sparsity pattern of matrix changed, rebuilding "
                                 * " LU from scratch")
-                        mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols],
-                          size(precon_matrix); blocksize=mpiqr_blocksize, comm=comm_block[])
-                        @timeit_debug global_timer "lu" orig_lu = qr!(mpiqrmatrix)
+                        @timeit_debug global_timer "lu" begin
+                          if block_rank[] == 0
+                            MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
+                          end
+                          MUMPS.factorize!(mumps)
+                          nl_solver_params.preconditioners[ir] =
+                            (mumps, precon_matrix, input_buffer, output_buffer)
+                        end
+
                     end
                     nl_solver_params.preconditioners[ir] =
                         (orig_lu, precon_matrix, input_buffer, output_buffer)
@@ -1294,7 +1300,10 @@ pressure \$p_{e∥}\$.
             end
 
             this_output_buffer .= this_input_buffer
-            @timeit_debug global_timer "ldiv!" MPIQR.ldiv!(precon_lu, this_output_buffer)
+            if block_rank[] == 0
+              MUMPS.associate_rhs!(precon_lu, this_output_buffer)
+            end
+            @timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)
 
             begin_serial_region()
             counter = 1
@@ -1955,37 +1964,45 @@ to allow the outer r-loop to be parallelised.
                     z_advect, vpa_advect, scratch_dummy, external_source_settings,
                     num_diss_params, t_params, ion_dt, ir, true, :all, true, false)
 
+            mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
+                                    comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
                 if block_rank[] >= 0
-                    localcols = MPIQR.localcols(block_rank[], size(precon_matrix, 2),
-                                                mpiqr_blocksize, block_size[])
-                    if size(orig_lu, 1) == 1
+                    if orig_lu.n == 0#size(orig_lu, 1) == 1
                         # Have not properly created the LU decomposition before, so
                         # cannot reuse it.
                         @timeit_debug global_timer "lu" begin
-                            mpiqrmatrix = MPIQR.MPIQRMatrix((precon_matrix[:, localcols]), size(precon_matrix);
-                              blocksize=mpiqr_blocksize, comm=comm_block[])
-                            mpiqrstruct = qr!(mpiqrmatrix)
-                            nl_solver_params.preconditioners[ir] =
-                            (mpiqrstruct, precon_matrix, input_buffer, output_buffer)
+                          if block_rank[] == 0
+                            MUMPS.associate_matrix!(mumps, sparse(precon_matrix))
+                          end
+                          MUMPS.factorize!(mumps)
+                          nl_solver_params.preconditioners[ir] =
+                            (mumps, precon_matrix, input_buffer, output_buffer)
                         end
                     else
                         # LU decomposition was previously created. The Jacobian always
                         # has the same sparsity pattern, so by using `lu!()` we can
                         # reuse some setup.
                         try
                             @timeit_debug global_timer "lu!" begin
-                              orig_lu[:, localcols] .= precon_matrix[:, localcols]
-                              qr!(orig_lu)
+                              if block_rank[] == 0
+                                MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
+                              end
+                              MUMPS.factorize!(orig_lu)
+                              nl_solver_params.preconditioners[ir] =
+                                (orig_lu, precon_matrix, input_buffer, output_buffer)
                             end
                         catch e
                             if !isa(e, ArgumentError)
                                 rethrow(e)
                             end
                             println("Sparsity pattern of matrix changed, rebuilding "
                                     * " LU from scratch")
-                            mpiqrmatrix = MPIQR.MPIQRMatrix(precon_matrix[:, localcols], size(precon_matrix);
-                              blocksize=mpiqr_blocksize, comm=comm_block[])
-                            @timeit_debug global_timer "lu" orig_lu = qr!(mpiqrmatrix)
+                            if block_rank[] == 0
+                              MUMPS.associate_matrix!(orig_lu, sparse(precon_matrix))
+                            end
+                            @timeit_debug global_timer "lu" MUMPS.factorize!(orig_lu)
+                            nl_solver_params.preconditioners[ir] =
+                              (orig_lu, precon_matrix, input_buffer, output_buffer)
                         end
                         nl_solver_params.preconditioners[ir] =
                             (orig_lu, precon_matrix, input_buffer, output_buffer)
@@ -2028,8 +2045,11 @@ to allow the outer r-loop to be parallelised.
                     counter += 1
                 end
 
-                this_output_buffer .= this_input_buffer
-                @timeit_debug global_timer "ldiv!" MPIQR.ldiv!(precon_lu, this_output_buffer)
+            this_output_buffer .= this_input_buffer
+            if block_rank[] == 0
+              MUMPS.associate_rhs!(precon_lu, this_output_buffer)
+            end
+            @timeit_debug global_timer "ldiv!" MUMPS.solve!(precon_lu)
 
                 begin_serial_region()
                 counter = 1

diff --git a/moment_kinetics/src/nonlinear_solvers.jl b/moment_kinetics/src/nonlinear_solvers.jl
@@ -39,7 +39,7 @@ using ..type_definitions: mk_float, mk_int
 
 using LinearAlgebra
 using MPI
-using MPIQR
+using MUMPS
 using SparseArrays
 using StatsBase: mean
 
@@ -167,25 +167,25 @@ function setup_nonlinear_solve(active, input_dict, coords, outer_coords=(); defa
             V .= 0.0
         end
     end
-    mpiqr = MPIQR.MPIQRMatrix(zeros(Float64, 1, 1), (1, block_size[]);
-                              blocksize=1, comm=comm_block[])
-    mpiqrstruct = MPIQR.MPIQRStruct(mpiqr)
+
+    mumps = Mumps{mk_float}(mumps_unsymmetric, default_icntl, default_cntl64;
+                            comm=MPI.API.MPI_Comm_f2c(comm_block[].val))
     if preconditioner_type === Val(:lu)
         # Create dummy LU solver objects so we can create an array for preconditioners.
         # These will be calculated properly within the time loop.
-        preconditioners = fill(mpiqrstruct,
+        preconditioners = fill(mumps,
                                reverse(outer_coord_sizes))
     elseif preconditioner_type === Val(:electron_split_lu)
-        preconditioners = (z=fill(mpiqrstruct,
+        preconditioners = (z=fill(mumps,
                                   tuple(coords.vpa.n, reverse(outer_coord_sizes)...)),
                            vpa=fill(mpqirstruct,
                                     tuple(coords.z.n, reverse(outer_coord_sizes)...)),
-                           ppar=fill(mpiqrstruct,
+                           ppar=fill(mumps,
                                      reverse(outer_coord_sizes)),
                           )
     elseif preconditioner_type === Val(:electron_lu)
         pdf_plus_ppar_size = total_size_coords + coords.z.n
-        preconditioners = fill((mpiqrstruct,
+        preconditioners = fill((mumps,
                                 allocate_shared_float(pdf_plus_ppar_size, pdf_plus_ppar_size),
                                 allocate_shared_float(pdf_plus_ppar_size),
                                 allocate_shared_float(pdf_plus_ppar_size),