diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp index c777db1a35a..e45046329d7 100644 --- a/benchmark/conversion/conversion.cpp +++ b/benchmark/conversion/conversion.cpp @@ -118,6 +118,8 @@ struct ConversionBenchmark : Benchmark> { { gko::matrix_data data; data = Generator::generate_matrix_data(test_case); + // no reordering here, as it doesn't impact conversions beyond + // dense-sparse conversions std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << "), " << data.nonzeros.size() << std::endl; test_case["rows"] = data.size[0]; diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index 20feecf5ccf..576d6fa7d52 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -186,6 +186,7 @@ struct MatrixStatistics : Benchmark { json& test_case) const override { auto data = Generator::generate_matrix_data(test_case); + // no reordering here, as it doesn't change statistics std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << "), " << data.nonzeros.size() << std::endl; test_case["rows"] = data.size[0]; diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index 074fe202e6c..d81dfaa4d5d 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -183,6 +183,7 @@ struct PreconditionerBenchmark : Benchmark { { preconditioner_benchmark_state state; auto data = Generator::generate_matrix_data(test_case); + reorder(data, test_case); state.system_matrix = formats::matrix_factory(FLAGS_formats, exec, data); diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index 784b70eca61..46b7a231e9a 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" +#include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/generator.hpp" #include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" @@ -433,10 +434,17 @@ struct SolverBenchmark : Benchmark> { {std::numeric_limits::quiet_NaN()}, exec); state.x = generator.initialize({0.0}, exec); } else { - state.system_matrix = - generator.generate_matrix_with_optimal_format(exec, test_case); + auto data = generator.generate_matrix_data(test_case); + auto permutation = + reorder(data, test_case, generator.is_distributed()); + + state.system_matrix = generator.generate_matrix_with_format( + exec, test_case["optimal"]["spmv"].get(), data); state.b = generator.generate_rhs(exec, state.system_matrix.get(), test_case); + if (permutation) { + permute(state.b, permutation.get()); + } state.x = generator.generate_initial_guess( exec, state.system_matrix.get(), state.b.get()); } diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp index 2ee766d4f83..55170f009b0 100644 --- a/benchmark/sparse_blas/operations.cpp +++ b/benchmark/sparse_blas/operations.cpp @@ -691,7 +691,8 @@ class SymbolicCholeskyOperation : public BenchmarkOperation { class ReorderRcmOperation : public BenchmarkOperation { - using reorder_type = gko::reorder::Rcm; + using reorder_type = gko::experimental::reorder::Rcm; + using permutation_type = gko::matrix::Permutation; public: explicit ReorderRcmOperation(const Mtx* mtx) @@ -715,8 +716,8 @@ class ReorderRcmOperation : public BenchmarkOperation { private: std::shared_ptr mtx_; - std::unique_ptr factory_; - std::unique_ptr reorder_; + std::unique_ptr factory_; + std::unique_ptr reorder_; }; diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index 5d479eb7fc0..5385de4264c 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -114,7 +114,7 @@ struct SparseBlasBenchmark : Benchmark> { json& test_case) const override { auto data = Generator::generate_matrix_data(test_case); - data.ensure_row_major_order(); + reorder(data, test_case); std::clog << "Matrix is of size (" << data.size[0] << ", " << data.size[1] << "), " << data.nonzeros.size() << std::endl; test_case["rows"] = data.size[0]; diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index c85642bb5f1..4d1ab17ccf4 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" +#include "benchmark/utils/general_matrix.hpp" #include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/runner.hpp" @@ -104,6 +105,7 @@ struct SpmvBenchmark : Benchmark> { { spmv_benchmark_state state; state.data = generator.generate_matrix_data(test_case); + reorder(state.data, test_case, generator.is_distributed()); auto nrhs = FLAGS_nrhs; state.b = generator.create_multi_vector_random( diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py index e05e5b780ac..7226964dd05 100755 --- a/benchmark/test/preconditioner.py +++ b/benchmark/test/preconditioner.py @@ -43,3 +43,11 @@ expected_stdout="preconditioner.profile.stdout", expected_stderr="preconditioner.profile.stderr", ) + +# stdin +test_framework.compare_output( + ["-reorder", "amd"], + expected_stdout="preconditioner.reordered.stdout", + expected_stderr="preconditioner.reordered.stderr", + stdin='[{"size": 100, "stencil": "7pt"}]', +) diff --git a/benchmark/test/reference/preconditioner.reordered.stderr b/benchmark/test/reference/preconditioner.reordered.stderr new file mode 100644 index 00000000000..a428671486f --- /dev/null +++ b/benchmark/test/reference/preconditioner.reordered.stderr @@ -0,0 +1,9 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +Running with preconditioners: none +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner.reordered.stdout b/benchmark/test/reference/preconditioner.reordered.stdout new file mode 100644 index 00000000000..51adfb3b58b --- /dev/null +++ b/benchmark/test/reference/preconditioner.reordered.stdout @@ -0,0 +1,33 @@ +[ + { + "size": 100, + "stencil": "7pt", + "preconditioner": { + "none": { + "generate": { + "components": { + "generate()": 1.0, + "overhead": 1.0 + }, + "time": 1.0, + "repetitions": 10 + }, + "apply": { + "components": { + "apply()": 1.0, + "copy()": 1.0, + "dense::copy": 1.0, + "overhead": 1.0 + }, + "time": 1.0, + "repetitions": 10 + }, + "completed": true + } + }, + "reordered": "amd", + "rows": 125, + "cols": 125, + "nonzeros": 725 + } +] diff --git a/benchmark/test/reference/solver.reordered.stderr b/benchmark/test/reference/solver.reordered.stderr new file mode 100644 index 00000000000..d9c04b69cf5 --- /dev/null +++ b/benchmark/test/reference/solver.reordered.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +Running cg with 1000 iterations and residual goal of 1.000000e-06 +The number of right hand sides is 1 +Running test case stencil(100, 7pt) +Matrix is of size (125, 125) + Running solver: cg diff --git a/benchmark/test/reference/solver.reordered.stdout b/benchmark/test/reference/solver.reordered.stdout new file mode 100644 index 00000000000..c1b826ae3fc --- /dev/null +++ b/benchmark/test/reference/solver.reordered.stdout @@ -0,0 +1,57 @@ +[ + { + "size": 100, + "stencil": "7pt", + "optimal": { + "spmv": "csr" + }, + "solver": { + "cg": { + "recurrent_residuals": [], + "true_residuals": [], + "implicit_residuals": [], + "iteration_timestamps": [], + "rhs_norm": 1.0, + "generate": { + "components": { + "generate()": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "time": 1.0 + }, + "apply": { + "components": { + "apply()": 1.0, + "iteration": 1.0, + "allocate": 1.0, + "dense::fill": 1.0, + "cg::initialize": 1.0, + "advanced_apply()": 1.0, + "csr::advanced_spmv": 1.0, + "dense::compute_norm2_dispatch": 1.0, + "copy()": 1.0, + "dense::copy": 1.0, + "dense::compute_conj_dot_dispatch": 1.0, + "check()": 1.0, + "residual_norm::residual_norm": 1.0, + "cg::step_1": 1.0, + "csr::spmv": 1.0, + "cg::step_2": 1.0, + "free": 1.0, + "overhead": 1.0 + }, + "iterations": 7, + "time": 1.0 + }, + "preconditioner": {}, + "residual_norm": 1.0, + "repetitions": 1, + "completed": true + } + }, + "reordered": "amd", + "rows": 125, + "cols": 125 + } +] diff --git a/benchmark/test/reference/sparse_blas.reordered.stderr b/benchmark/test/reference/sparse_blas.reordered.stderr new file mode 100644 index 00000000000..497d5a72bbf --- /dev/null +++ b/benchmark/test/reference/sparse_blas.reordered.stderr @@ -0,0 +1,9 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The operations are symbolic_cholesky +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running sparse_blas: symbolic_cholesky diff --git a/benchmark/test/reference/sparse_blas.reordered.stdout b/benchmark/test/reference/sparse_blas.reordered.stdout new file mode 100644 index 00000000000..b5fc8998be0 --- /dev/null +++ b/benchmark/test/reference/sparse_blas.reordered.stdout @@ -0,0 +1,32 @@ +[ + { + "size": 100, + "stencil": "7pt", + "sparse_blas": { + "symbolic_cholesky": { + "time": 1.0, + "flops": 1.0, + "bandwidth": 1.0, + "repetitions": 10, + "components": { + "compute_elim_forest": 1.0, + "allocate": 1.0, + "free": 1.0, + "components::fill_array": 1.0, + "cholesky::symbolic_count": 1.0, + "components::prefix_sum_nonnegative": 1.0, + "copy": 1.0, + "cholesky::symbolic_factorize": 1.0, + "csr::sort_by_column_index": 1.0, + "overhead": 1.0 + }, + "factor_nonzeros": 1324, + "completed": true + } + }, + "reordered": "amd", + "rows": 125, + "cols": 125, + "nonzeros": 725 + } +] diff --git a/benchmark/test/reference/spmv.reordered.stderr b/benchmark/test/reference/spmv.reordered.stderr new file mode 100644 index 00000000000..97fe670aff7 --- /dev/null +++ b/benchmark/test/reference/spmv.reordered.stderr @@ -0,0 +1,10 @@ +This is Ginkgo 1.7.0 (develop) + running with core module 1.7.0 (develop) +Running on reference(0) +Running with 2 warm iterations and 10 running iterations +The random seed for right hand sides is 42 +The formats are coo +The number of right hand sides is 1 +Running test case stencil(100, 7pt) +Matrix is of size (125, 125), 725 + Running spmv: coo diff --git a/benchmark/test/reference/spmv.reordered.stdout b/benchmark/test/reference/spmv.reordered.stdout new file mode 100644 index 00000000000..5404235cdf7 --- /dev/null +++ b/benchmark/test/reference/spmv.reordered.stdout @@ -0,0 +1,22 @@ +[ + { + "size": 100, + "stencil": "7pt", + "spmv": { + "coo": { + "storage": 11600, + "max_relative_norm2": 1.0, + "time": 1.0, + "repetitions": 10, + "completed": true + } + }, + "reordered": "amd", + "rows": 125, + "cols": 125, + "nonzeros": 725, + "optimal": { + "spmv": "coo" + } + } +] diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py index 025ee92707c..5dd1d840a4e 100755 --- a/benchmark/test/solver.py +++ b/benchmark/test/solver.py @@ -43,3 +43,11 @@ expected_stdout="solver.profile.stdout", expected_stderr="solver.profile.stderr", ) + +# reordering +test_framework.compare_output( + ["-reorder", "amd"], + expected_stdout="solver.reordered.stdout", + expected_stderr="solver.reordered.stderr", + stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]', +) diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py index 724cdb866f0..8e6cda3c9bd 100755 --- a/benchmark/test/sparse_blas.py +++ b/benchmark/test/sparse_blas.py @@ -4,7 +4,8 @@ # check that all input modes work: # parameter test_framework.compare_output( - ["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'], + ["-operations", "transpose", "-input", + '[{"size": 100, "stencil": "7pt"}]'], expected_stdout="sparse_blas.simple.stdout", expected_stderr="sparse_blas.simple.stderr", ) @@ -55,3 +56,11 @@ expected_stdout="sparse_blas.profile.stdout", expected_stderr="sparse_blas.profile.stderr", ) + +# reordering +test_framework.compare_output( + ["-operations", "symbolic_cholesky", "-reorder", "amd"], + expected_stdout="sparse_blas.reordered.stdout", + expected_stderr="sparse_blas.reordered.stderr", + stdin='[{"size": 100, "stencil": "7pt"}]', +) diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py index 865f74bb6d0..f6f4a4b5c39 100755 --- a/benchmark/test/spmv.py +++ b/benchmark/test/spmv.py @@ -43,3 +43,11 @@ expected_stdout="spmv.profile.stdout", expected_stderr="spmv.profile.stderr", ) + +# stdin +test_framework.compare_output( + ["-reorder", "amd"], + expected_stdout="spmv.reordered.stdout", + expected_stderr="spmv.reordered.stderr", + stdin='[{"size": 100, "stencil": "7pt"}]', +) diff --git a/benchmark/utils/general_matrix.hpp b/benchmark/utils/general_matrix.hpp index 39d8b5a8107..01c84dd2837 100644 --- a/benchmark/utils/general_matrix.hpp +++ b/benchmark/utils/general_matrix.hpp @@ -41,12 +41,79 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" +#include "benchmark/utils/generator.hpp" +#include "ginkgo/core/matrix/permutation.hpp" + + +std::string reordering_algorithm_desc = + "Reordering algorithm to apply to the input matrices:\n" + " none - no reordering\n" + " amd - Approximate Minimum Degree reordering algorithm\n" +#if GKO_HAVE_METIS + " nd - Nested Dissection reordering algorithm\n" +#endif + " rcm - Reverse Cuthill-McKee reordering algorithm"; DEFINE_string(input_matrix, "", "Filename of a matrix to be used as the single input. Overwrites " "the value of the -input flag"); +DEFINE_string(reorder, "none", reordering_algorithm_desc.c_str()); + + +template +std::unique_ptr> reorder( + gko::matrix_data& data, json& test_case, + bool is_distributed = false) +{ + if (FLAGS_reorder == "none" || is_distributed) { + return nullptr; + } + using Csr = gko::matrix::Csr; + auto ref = gko::ReferenceExecutor::create(); + auto mtx = gko::share(Csr::create(ref)); + mtx->read(data); + std::unique_ptr> perm; + if (FLAGS_reorder == "amd") { + perm = gko::experimental::reorder::Amd::build() + .on(ref) + ->generate(mtx); +#if GKO_HAVE_METIS + } else if (FLAGS_reorder == "nd") { + perm = gko::experimental::reorder::NestedDissection::build() + .on(ref) + ->generate(mtx); +#endif + } else if (FLAGS_reorder == "rcm") { + perm = gko::experimental::reorder::Rcm::build() + .on(ref) + ->generate(mtx); + } else { + throw std::runtime_error{"Unknown reordering algorithm " + + FLAGS_reorder}; + } + mtx->permute(perm)->write(data); + test_case["reordered"] = FLAGS_reorder; + return perm; +} + + +template +void permute(std::unique_ptr>& vec, + const gko::matrix::Permutation* perm) +{ + vec = vec->permute(perm, gko::matrix::permute_mode::rows); +} + + +template +void permute( + std::unique_ptr>& vec, + const gko::matrix::Permutation* perm) +{} + /** * @copydoc initialize_argument_parsing diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp index 3f26ed3f2fc..c280cb1ac72 100644 --- a/benchmark/utils/generator.hpp +++ b/benchmark/utils/generator.hpp @@ -52,20 +52,25 @@ struct DefaultSystemGenerator { using value_type = ValueType; using Vec = vec; + static bool is_distributed() { return false; } + static gko::matrix_data generate_matrix_data( const json& config) { + gko::matrix_data data; if (config.contains("filename")) { std::ifstream in(config["filename"].get()); - return gko::read_generic_raw(in); + data = gko::read_generic_raw(in); } else if (config.contains("stencil")) { - return generate_stencil( + data = generate_stencil( config["stencil"].get(), config["size"].get()); } else { throw std::runtime_error( "No known way to generate matrix data found."); } + data.ensure_row_major_order(); + return data; } static std::string get_example_config() @@ -188,16 +193,19 @@ struct DistributedDefaultSystemGenerator { using Mtx = dist_mtx; using Vec = dist_vec; + static bool is_distributed() { return true; } + gko::matrix_data generate_matrix_data( const json& config) const { + gko::matrix_data data; if (config.contains("filename")) { std::ifstream in(config["filename"].get()); - return gko::read_generic_raw(in); + data = gko::read_generic_raw(in); } else if (config.contains("stencil")) { auto local_size = static_cast( config["size"].get() / comm.size()); - return generate_stencil( + data = generate_stencil( config["stencil"].get(), comm, local_size, config["comm_pattern"].get() == std::string("optimal")); @@ -205,6 +213,8 @@ struct DistributedDefaultSystemGenerator { throw std::runtime_error( "No known way to generate matrix data found."); } + data.ensure_row_major_order(); + return data; } static std::string get_example_config() @@ -240,15 +250,6 @@ struct DistributedDefaultSystemGenerator { } } - std::shared_ptr generate_matrix_with_optimal_format( - std::shared_ptr exec, json& config) const - { - auto data = generate_matrix_data(config); - return generate_matrix_with_format( - std::move(exec), config["optimal"]["spmv"].get(), - data); - } - std::shared_ptr generate_matrix_with_format( std::shared_ptr exec, const std::string& format_name, const gko::matrix_data& data,