add reorderings to benchmarks

ginkgo-project · Oct 10, 2023 · 27d37ed · 27d37ed
1 parent d263b3e
commit 27d37ed
Show file tree

Hide file tree

Showing 21 changed files with 318 additions and 20 deletions.
diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp
@@ -118,6 +118,8 @@ struct ConversionBenchmark : Benchmark<gko::device_matrix_data<etype, itype>> {
     {
         gko::matrix_data<etype, itype> data;
         data = Generator::generate_matrix_data(test_case);
+        // no reordering here, as it doesn't impact conversions beyond
+        // dense-sparse conversions
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];

diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp
@@ -186,6 +186,7 @@ struct MatrixStatistics : Benchmark<empty_state> {
                       json& test_case) const override
     {
         auto data = Generator::generate_matrix_data(test_case);
+        // no reordering here, as it doesn't change statistics
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];

diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp
@@ -183,6 +183,7 @@ struct PreconditionerBenchmark : Benchmark<preconditioner_benchmark_state> {
     {
         preconditioner_benchmark_state state;
         auto data = Generator::generate_matrix_data(test_case);
+        reorder(data, test_case);
 
         state.system_matrix =
             formats::matrix_factory(FLAGS_formats, exec, data);

diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/generator.hpp"
 #include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
@@ -433,10 +434,17 @@ struct SolverBenchmark : Benchmark<solver_benchmark_state<Generator>> {
                 {std::numeric_limits<rc_etype>::quiet_NaN()}, exec);
             state.x = generator.initialize({0.0}, exec);
         } else {
-            state.system_matrix =
-                generator.generate_matrix_with_optimal_format(exec, test_case);
+            auto data = generator.generate_matrix_data(test_case);
+            auto permutation =
+                reorder(data, test_case, generator.is_distributed());
+
+            state.system_matrix = generator.generate_matrix_with_format(
+                exec, test_case["optimal"]["spmv"].get<std::string>(), data);
             state.b = generator.generate_rhs(exec, state.system_matrix.get(),
                                              test_case);
+            if (permutation) {
+                permute(state.b, permutation.get());
+            }
             state.x = generator.generate_initial_guess(
                 exec, state.system_matrix.get(), state.b.get());
         }

diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp
@@ -691,7 +691,8 @@ class SymbolicCholeskyOperation : public BenchmarkOperation {
 
 
 class ReorderRcmOperation : public BenchmarkOperation {
-    using reorder_type = gko::reorder::Rcm<etype, itype>;
+    using reorder_type = gko::experimental::reorder::Rcm<itype>;
+    using permutation_type = gko::matrix::Permutation<itype>;
 
 public:
     explicit ReorderRcmOperation(const Mtx* mtx)
@@ -715,8 +716,8 @@ class ReorderRcmOperation : public BenchmarkOperation {
 
 private:
     std::shared_ptr<Mtx> mtx_;
-    std::unique_ptr<reorder_type::Factory> factory_;
-    std::unique_ptr<reorder_type> reorder_;
+    std::unique_ptr<reorder_type> factory_;
+    std::unique_ptr<permutation_type> reorder_;
 };
 
 

diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp
@@ -114,7 +114,7 @@ struct SparseBlasBenchmark : Benchmark<std::unique_ptr<Mtx>> {
                                json& test_case) const override
     {
         auto data = Generator::generate_matrix_data(test_case);
-        data.ensure_row_major_order();
+        reorder(data, test_case);
         std::clog << "Matrix is of size (" << data.size[0] << ", "
                   << data.size[1] << "), " << data.nonzeros.size() << std::endl;
         test_case["rows"] = data.size[0];

diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "benchmark/utils/formats.hpp"
 #include "benchmark/utils/general.hpp"
+#include "benchmark/utils/general_matrix.hpp"
 #include "benchmark/utils/iteration_control.hpp"
 #include "benchmark/utils/loggers.hpp"
 #include "benchmark/utils/runner.hpp"
@@ -104,6 +105,7 @@ struct SpmvBenchmark : Benchmark<spmv_benchmark_state<Generator>> {
     {
         spmv_benchmark_state<Generator> state;
         state.data = generator.generate_matrix_data(test_case);
+        reorder(state.data, test_case, generator.is_distributed());
 
         auto nrhs = FLAGS_nrhs;
         state.b = generator.create_multi_vector_random(

diff --git a/benchmark/test/preconditioner.py b/benchmark/test/preconditioner.py
@@ -43,3 +43,11 @@
     expected_stdout="preconditioner.profile.stdout",
     expected_stderr="preconditioner.profile.stderr",
 )
+
+# stdin
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="preconditioner.reordered.stdout",
+    expected_stderr="preconditioner.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
diff --git a/benchmark/test/reference/preconditioner.reordered.stderr b/benchmark/test/reference/preconditioner.reordered.stderr
@@ -0,0 +1,9 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+Running with preconditioners: none
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running preconditioner: none
diff --git a/benchmark/test/reference/preconditioner.reordered.stdout b/benchmark/test/reference/preconditioner.reordered.stdout
@@ -0,0 +1,33 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "preconditioner": {
+            "none": {
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0,
+                    "repetitions": 10
+                },
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/solver.reordered.stderr b/benchmark/test/reference/solver.reordered.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 1 running iterations
+The random seed for right hand sides is 42
+Running cg with 1000 iterations and residual goal of 1.000000e-06
+The number of right hand sides is 1
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125)
+	Running solver: cg
diff --git a/benchmark/test/reference/solver.reordered.stdout b/benchmark/test/reference/solver.reordered.stdout
@@ -0,0 +1,57 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "optimal": {
+            "spmv": "csr"
+        },
+        "solver": {
+            "cg": {
+                "recurrent_residuals": [],
+                "true_residuals": [],
+                "implicit_residuals": [],
+                "iteration_timestamps": [],
+                "rhs_norm": 1.0,
+                "generate": {
+                    "components": {
+                        "generate(<typename>)": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "time": 1.0
+                },
+                "apply": {
+                    "components": {
+                        "apply(<typename>)": 1.0,
+                        "iteration": 1.0,
+                        "allocate": 1.0,
+                        "dense::fill": 1.0,
+                        "cg::initialize": 1.0,
+                        "advanced_apply(<typename>)": 1.0,
+                        "csr::advanced_spmv": 1.0,
+                        "dense::compute_norm2_dispatch": 1.0,
+                        "copy(<typename>)": 1.0,
+                        "dense::copy": 1.0,
+                        "dense::compute_conj_dot_dispatch": 1.0,
+                        "check(<typename>)": 1.0,
+                        "residual_norm::residual_norm": 1.0,
+                        "cg::step_1": 1.0,
+                        "csr::spmv": 1.0,
+                        "cg::step_2": 1.0,
+                        "free": 1.0,
+                        "overhead": 1.0
+                    },
+                    "iterations": 7,
+                    "time": 1.0
+                },
+                "preconditioner": {},
+                "residual_norm": 1.0,
+                "repetitions": 1,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125
+    }
+]
diff --git a/benchmark/test/reference/sparse_blas.reordered.stderr b/benchmark/test/reference/sparse_blas.reordered.stderr
@@ -0,0 +1,9 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The operations are symbolic_cholesky
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running sparse_blas: symbolic_cholesky
diff --git a/benchmark/test/reference/sparse_blas.reordered.stdout b/benchmark/test/reference/sparse_blas.reordered.stdout
@@ -0,0 +1,32 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "sparse_blas": {
+            "symbolic_cholesky": {
+                "time": 1.0,
+                "flops": 1.0,
+                "bandwidth": 1.0,
+                "repetitions": 10,
+                "components": {
+                    "compute_elim_forest": 1.0,
+                    "allocate": 1.0,
+                    "free": 1.0,
+                    "components::fill_array": 1.0,
+                    "cholesky::symbolic_count": 1.0,
+                    "components::prefix_sum_nonnegative": 1.0,
+                    "copy": 1.0,
+                    "cholesky::symbolic_factorize": 1.0,
+                    "csr::sort_by_column_index": 1.0,
+                    "overhead": 1.0
+                },
+                "factor_nonzeros": 1324,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725
+    }
+]
diff --git a/benchmark/test/reference/spmv.reordered.stderr b/benchmark/test/reference/spmv.reordered.stderr
@@ -0,0 +1,10 @@
+This is Ginkgo 1.7.0 (develop)
+    running with core module 1.7.0 (develop)
+Running on reference(0)
+Running with 2 warm iterations and 10 running iterations
+The random seed for right hand sides is 42
+The formats are coo
+The number of right hand sides is 1
+Running test case stencil(100, 7pt)
+Matrix is of size (125, 125), 725
+	Running spmv: coo
diff --git a/benchmark/test/reference/spmv.reordered.stdout b/benchmark/test/reference/spmv.reordered.stdout
@@ -0,0 +1,22 @@
+[
+    {
+        "size": 100,
+        "stencil": "7pt",
+        "spmv": {
+            "coo": {
+                "storage": 11600,
+                "max_relative_norm2": 1.0,
+                "time": 1.0,
+                "repetitions": 10,
+                "completed": true
+            }
+        },
+        "reordered": "amd",
+        "rows": 125,
+        "cols": 125,
+        "nonzeros": 725,
+        "optimal": {
+            "spmv": "coo"
+        }
+    }
+]
diff --git a/benchmark/test/solver.py b/benchmark/test/solver.py
@@ -43,3 +43,11 @@
     expected_stdout="solver.profile.stdout",
     expected_stderr="solver.profile.stderr",
 )
+
+# reordering
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="solver.reordered.stdout",
+    expected_stderr="solver.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt", "optimal": {"spmv": "csr"}}]',
+)
diff --git a/benchmark/test/sparse_blas.py b/benchmark/test/sparse_blas.py
@@ -4,7 +4,8 @@
 # check that all input modes work:
 # parameter
 test_framework.compare_output(
-    ["-operations", "transpose", "-input", '[{"size": 100, "stencil": "7pt"}]'],
+    ["-operations", "transpose", "-input",
+        '[{"size": 100, "stencil": "7pt"}]'],
     expected_stdout="sparse_blas.simple.stdout",
     expected_stderr="sparse_blas.simple.stderr",
 )
@@ -55,3 +56,11 @@
     expected_stdout="sparse_blas.profile.stdout",
     expected_stderr="sparse_blas.profile.stderr",
 )
+
+# reordering
+test_framework.compare_output(
+    ["-operations", "symbolic_cholesky", "-reorder", "amd"],
+    expected_stdout="sparse_blas.reordered.stdout",
+    expected_stderr="sparse_blas.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)
diff --git a/benchmark/test/spmv.py b/benchmark/test/spmv.py
@@ -43,3 +43,11 @@
     expected_stdout="spmv.profile.stdout",
     expected_stderr="spmv.profile.stderr",
 )
+
+# stdin
+test_framework.compare_output(
+    ["-reorder", "amd"],
+    expected_stdout="spmv.reordered.stdout",
+    expected_stderr="spmv.reordered.stderr",
+    stdin='[{"size": 100, "stencil": "7pt"}]',
+)