From a2f11dbd4aa148a864f843ea93434c4fc6c40eed Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Tue, 18 Apr 2023 15:00:01 +0200 Subject: [PATCH] nlohmann_json refactor --- CMakeLists.txt | 2 +- benchmark/CMakeLists.txt | 4 +- benchmark/blas/blas.cpp | 21 +- benchmark/blas/blas_common.hpp | 256 ++++----- benchmark/blas/distributed/multi_vector.cpp | 35 +- .../CMakeLists.txt | 2 +- benchmark/conversion/conversion.cpp | 194 +++++++ benchmark/conversions/conversions.cpp | 232 -------- .../matrix_generator/matrix_generator.cpp | 36 +- .../matrix_statistics/matrix_statistics.cpp | 181 +++--- benchmark/preconditioner/preconditioner.cpp | 248 ++++---- benchmark/solver/distributed/solver.cpp | 17 +- benchmark/solver/solver.cpp | 19 +- benchmark/solver/solver_common.hpp | 417 +++++--------- benchmark/sparse_blas/operations.cpp | 13 +- benchmark/sparse_blas/operations.hpp | 8 +- benchmark/sparse_blas/sparse_blas.cpp | 202 +++---- benchmark/spmv/distributed/spmv.cpp | 55 +- benchmark/spmv/spmv.cpp | 31 +- benchmark/spmv/spmv_common.hpp | 295 ++++------ benchmark/test/reference/blas.profile.stderr | 72 +-- benchmark/test/reference/blas.simple.stderr | 72 +-- .../distributed_solver.profile.stderr | 8 +- .../distributed_solver.profile.stdout | 6 +- .../distributed_solver.simple.stdout | 6 +- .../reference/matrix_statistics.simple.stderr | 2 +- .../reference/matrix_statistics.simple.stdout | 7 +- .../multi_vector_distributed.profile.stderr | 258 +++++++++ .../multi_vector_distributed.simple.stderr | 82 +-- .../reference/preconditioner.profile.stderr | 37 +- .../reference/preconditioner.profile.stdout | 7 +- .../reference/preconditioner.simple.stderr | 33 +- .../reference/preconditioner.simple.stdout | 7 +- .../test/reference/solver.profile.stderr | 8 +- .../test/reference/solver.profile.stdout | 6 +- benchmark/test/reference/solver.simple.stdout | 6 +- .../test/reference/sparse_blas.profile.stderr | 34 +- .../test/reference/sparse_blas.simple.stderr | 30 +- benchmark/test/reference/spmv.profile.stderr | 25 +- benchmark/test/reference/spmv.profile.stdout | 6 +- benchmark/test/reference/spmv.simple.stderr | 21 +- benchmark/test/reference/spmv.simple.stdout | 6 +- .../reference/spmv_distributed.profile.stderr | 542 ++++++++++++++++++ .../reference/spmv_distributed.profile.stdout | 6 +- .../reference/spmv_distributed.simple.stderr | 22 +- .../reference/spmv_distributed.simple.stdout | 6 +- benchmark/utils/general.hpp | 360 +----------- benchmark/utils/generator.hpp | 118 ++-- benchmark/utils/iteration_control.hpp | 326 +++++++++++ benchmark/utils/json.hpp | 63 +- benchmark/utils/loggers.hpp | 96 ++-- benchmark/utils/runner.hpp | 209 +++++++ benchmark/utils/spmv_validation.hpp | 83 --- third_party/CMakeLists.txt | 4 +- third_party/nlohmann_json/CMakeLists.txt | 9 + third_party/rapidjson/CMakeLists.txt | 14 - 56 files changed, 2501 insertions(+), 2364 deletions(-) rename benchmark/{conversions => conversion}/CMakeLists.txt (88%) create mode 100644 benchmark/conversion/conversion.cpp delete mode 100644 benchmark/conversions/conversions.cpp create mode 100644 benchmark/utils/iteration_control.hpp create mode 100644 benchmark/utils/runner.hpp delete mode 100644 benchmark/utils/spmv_validation.hpp create mode 100644 third_party/nlohmann_json/CMakeLists.txt delete mode 100644 third_party/rapidjson/CMakeLists.txt diff --git a/CMakeLists.txt b/CMakeLists.txt index a0b1c4103d3..82e35e594e1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -272,7 +272,7 @@ if(GINKGO_BUILD_TESTS) endif() if(GINKGO_BUILD_BENCHMARKS) find_package(gflags 2.2.2 QUIET) - find_package(RapidJSON 1.1.0 QUIET) + find_package(nlohmann_json 3.9.1 QUIET) endif() if(GINKGO_BUILD_HWLOC) find_package(HWLOC 2.1) # No need for QUIET as we ship FindHWLOC diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 434474fd336..66ac0f8bb9e 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -59,7 +59,7 @@ endfunction() # All remaining arguments will be treated as source files function(ginkgo_add_single_benchmark_executable name use_lib_linops macro_def type) add_executable("${name}" ${ARGN}) - target_link_libraries("${name}" ginkgo gflags rapidjson) + target_link_libraries("${name}" ginkgo gflags nlohmann_json::nlohmann_json) # always include the device timer if (GINKGO_BUILD_CUDA) target_compile_definitions("${name}" PRIVATE HAS_CUDA_TIMER=1) @@ -152,7 +152,7 @@ if (GINKGO_BUILD_MPI) endif() add_subdirectory(blas) -add_subdirectory(conversions) +add_subdirectory(conversion) add_subdirectory(matrix_generator) add_subdirectory(matrix_statistics) add_subdirectory(preconditioner) diff --git a/benchmark/blas/blas.cpp b/benchmark/blas/blas.cpp index ee2dc06d01b..24876ef613c 100644 --- a/benchmark/blas/blas.cpp +++ b/benchmark/blas/blas.cpp @@ -130,25 +130,18 @@ Parameters for a benchmark case are: stride_B: stride for B matrix in gemm (optional, default m) stride_C: stride for C matrix in gemm (optional, default m) )"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing(&argc, &argv, header, format); - std::string extra_information = "The operations are " + FLAGS_operations; + std::string extra_information = + "The operations are " + FLAGS_operations + "\n"; print_general_information(extra_information); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - std::cerr - << "Input has to be a JSON array of benchmark configurations:\n" - << format; - std::exit(1); - } + auto test_cases = json::parse(get_input_stream()); - run_blas_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), operation_map, - test_cases, true); + run_test_cases(BlasBenchmark{operation_map}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/blas/blas_common.hpp b/benchmark/blas/blas_common.hpp index c9b43d1e633..88819a043b0 100644 --- a/benchmark/blas/blas_common.hpp +++ b/benchmark/blas/blas_common.hpp @@ -43,7 +43,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" #include "core/components/prefix_sum_kernels.hpp" @@ -70,14 +72,6 @@ DEFINE_string( "C has dimensions n x m and x and y have dimensions n x r"); -std::string example_config = R"( - [ - { "n": 100 }, - { "n": 200, "m": 200, "k": 200 } - ] -)"; - - class BenchmarkOperation { public: virtual ~BenchmarkOperation() = default; @@ -404,70 +398,101 @@ struct dimensions { }; -dimensions parse_dims(rapidjson::Value& test_case) -{ - auto get_optional = [](rapidjson::Value& obj, const char* name, - gko::size_type default_value) -> gko::size_type { - if (obj.HasMember(name)) { - return obj[name].GetUint64(); - } else { - return default_value; - } - }; - - dimensions result; - result.n = test_case["n"].GetInt64(); - result.k = get_optional(test_case, "k", result.n); - result.m = get_optional(test_case, "m", result.n); - result.r = get_optional(test_case, "r", 1); - if (test_case.HasMember("stride")) { - result.stride_x = test_case["stride"].GetInt64(); - result.stride_y = result.stride_x; - } else { - result.stride_x = get_optional(test_case, "stride_x", result.r); - result.stride_y = get_optional(test_case, "stride_y", result.r); +struct BlasBenchmark : Benchmark { + using map_type = + std::map( + std::shared_ptr, dimensions)>>; + map_type operation_map; + std::vector operations; + std::string name; + bool do_print; + + BlasBenchmark(map_type operation_map, bool do_print = true) + : operation_map{std::move(operation_map)}, + name{"blas"}, + operations{split(FLAGS_operations)}, + do_print{do_print} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; } - result.stride_A = get_optional(test_case, "stride_A", result.k); - result.stride_B = get_optional(test_case, "stride_B", result.m); - result.stride_C = get_optional(test_case, "stride_C", result.m); - return result; -} + bool should_print() const override { return do_print; } -std::string describe(rapidjson::Value& test_case) -{ - std::stringstream ss; - auto optional_output = [&](const char* name) { - if (test_case.HasMember(name) && test_case[name].IsInt64()) { - ss << name << " = " << test_case[name].GetInt64() << " "; - } - }; - optional_output("n"); - optional_output("k"); - optional_output("m"); - optional_output("r"); - optional_output("stride"); - optional_output("stride_x"); - optional_output("stride_y"); - optional_output("stride_A"); - optional_output("stride_B"); - optional_output("stride_C"); - return ss.str(); -} + std::string get_example_config() const override + { + return json::parse(R"([{"n": 100}, {"n": 200, "m": 200, "k": 200}])") + .dump(4); + } + bool validate_config(const json& value) const override + { + return value.contains("n") && value["n"].is_number_integer(); + } -template -void apply_blas(const char* operation_name, std::shared_ptr exec, - std::shared_ptr timer, const OpMap& operation_map, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& blas_case = test_case["blas"]; - add_or_set_member(blas_case, operation_name, - rapidjson::Value(rapidjson::kObjectType), allocator); + std::string describe_config(const json& test_case) const override + { + std::stringstream ss; + auto optional_output = [&](const char* name) { + if (test_case.contains(name) && + test_case[name].is_number_integer()) { + ss << name << " = " << test_case[name].get() << " "; + } + }; + optional_output("n"); + optional_output("k"); + optional_output("m"); + optional_output("r"); + optional_output("stride"); + optional_output("stride_x"); + optional_output("stride_y"); + optional_output("stride_A"); + optional_output("stride_B"); + optional_output("stride_C"); + return ss.str(); + } - auto op = operation_map.at(operation_name)(exec, parse_dims(test_case)); + dimensions setup(std::shared_ptr exec, + json& test_case) const override + { + auto get_optional = [](json& obj, const char* name, + gko::size_type default_value) -> gko::size_type { + if (obj.contains(name)) { + return obj[name].get(); + } else { + return default_value; + } + }; + + dimensions result; + result.n = test_case["n"].get(); + result.k = get_optional(test_case, "k", result.n); + result.m = get_optional(test_case, "m", result.n); + result.r = get_optional(test_case, "r", 1); + if (test_case.contains("stride")) { + result.stride_x = test_case["stride"].get(); + result.stride_y = result.stride_x; + } else { + result.stride_x = get_optional(test_case, "stride_x", result.r); + result.stride_y = get_optional(test_case, "stride_y", result.r); + } + result.stride_A = get_optional(test_case, "stride_A", result.k); + result.stride_B = get_optional(test_case, "stride_B", result.m); + result.stride_C = get_optional(test_case, "stride_C", result.m); + return result; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + dimensions& dims, const std::string& operation_name, + json& operation_case) const override + { + auto op = operation_map.at(operation_name)(exec, dims); IterationControl ic(timer); @@ -488,98 +513,9 @@ void apply_blas(const char* operation_name, std::shared_ptr exec, const auto flops = static_cast(op->get_flops()); const auto mem = static_cast(op->get_memory()); const auto repetitions = ic.get_num_repetitions(); - add_or_set_member(blas_case[operation_name], "time", runtime, - allocator); - add_or_set_member(blas_case[operation_name], "flops", flops / runtime, - allocator); - add_or_set_member(blas_case[operation_name], "bandwidth", mem / runtime, - allocator); - add_or_set_member(blas_case[operation_name], "repetitions", repetitions, - allocator); - - // compute and write benchmark data - add_or_set_member(blas_case[operation_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["blas"][operation_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["blas"][operation_name], "error", - msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -template -void run_blas_benchmarks(std::shared_ptr exec, - std::shared_ptr timer, - const OpMap& operation_map, - rapidjson::Document& test_cases, bool do_print) -{ - auto operations = split(FLAGS_operations, ','); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); + operation_case["time"] = runtime; + operation_case["flops"] = flops / runtime; + operation_case["bandwidth"] = mem / runtime; + operation_case["repetitions"] = repetitions; } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - if (!test_case.HasMember("blas")) { - test_case.AddMember("blas", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& blas_case = test_case["blas"]; - if (!FLAGS_overwrite && - all_of(begin(operations), end(operations), - [&blas_case](const std::string& s) { - return blas_case.HasMember(s.c_str()); - })) { - continue; - } - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = describe(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - for (const auto& operation_name : operations) { - { - auto operation_range = annotate(operation_name.c_str()); - apply_blas(operation_name.c_str(), exec, timer, - operation_map, test_case, allocator); - } - - if (do_print) { - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - - backup_results(test_cases); - } - } - } catch (const std::exception& e) { - std::cerr << "Error setting up benchmark, what(): " << e.what() - << std::endl; - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; diff --git a/benchmark/blas/distributed/multi_vector.cpp b/benchmark/blas/distributed/multi_vector.cpp index 4d3b821ed2e..aeab189d7db 100644 --- a/benchmark/blas/distributed/multi_vector.cpp +++ b/benchmark/blas/distributed/multi_vector.cpp @@ -50,6 +50,10 @@ int main(int argc, char* argv[]) { gko::experimental::mpi::environment mpi_env{argc, argv}; + const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); + const auto rank = comm.rank(); + const auto do_print = rank == 0; + std::string header = R"(" A benchmark for measuring performance of Ginkgo's BLAS-like " operations. @@ -60,26 +64,19 @@ Parameters for a benchmark case are: stride_x: stride for input vector x (optional, default r) stride_y: stride for in/out vector y (optional, default r) )"; - std::string format = example_config; - initialize_argument_parsing(&argc, &argv, header, format); - - std::string extra_information = "The operations are " + FLAGS_operations; - print_general_information(extra_information); + std::string format = Generator::get_example_config(); + initialize_argument_parsing(&argc, &argv, header, format, do_print); - const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); - const auto rank = comm.rank(); + if (do_print) { + std::string extra_information = + "The operations are " + FLAGS_operations + "\n"; + print_general_information(extra_information); + } auto exec = executor_factory_mpi.at(FLAGS_executor)(comm.get()); std::string json_input = broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); - if (!test_cases.IsArray()) { - std::cerr - << "Input has to be a JSON array of benchmark configurations:\n" - << format; - std::exit(1); - } + auto test_cases = json::parse(json_input); std::map( @@ -127,10 +124,10 @@ Parameters for a benchmark case are: exec, Generator{comm, {}}, dims.n, dims.r, dims.stride_y); }}}; - run_blas_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), - operation_map, test_cases, rank == 0); + run_test_cases(BlasBenchmark{operation_map, do_print}, exec, + get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases); - if (rank == 0) { - std::cout << test_cases << std::endl; + if (do_print) { + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/conversions/CMakeLists.txt b/benchmark/conversion/CMakeLists.txt similarity index 88% rename from benchmark/conversions/CMakeLists.txt rename to benchmark/conversion/CMakeLists.txt index 21dd363d3c0..7ecf578c055 100644 --- a/benchmark/conversions/CMakeLists.txt +++ b/benchmark/conversion/CMakeLists.txt @@ -1 +1 @@ -ginkgo_add_typed_benchmark_executables(conversion "NO" conversions.cpp) +ginkgo_add_typed_benchmark_executables(conversion "NO" conversion.cpp) diff --git a/benchmark/conversion/conversion.cpp b/benchmark/conversion/conversion.cpp new file mode 100644 index 00000000000..c51c8feab7d --- /dev/null +++ b/benchmark/conversion/conversion.cpp @@ -0,0 +1,194 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "benchmark/utils/formats.hpp" +#include "benchmark/utils/general.hpp" +#include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" +#include "benchmark/utils/runner.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" + + +#ifdef GINKGO_BENCHMARK_ENABLE_TUNING +#include "benchmark/utils/tuning_variables.hpp" +#endif // GINKGO_BENCHMARK_ENABLE_TUNING + + +using Generator = DefaultSystemGenerator<>; + + +struct ConversionBenchmark : Benchmark> { + std::string name; + std::vector operations; + + ConversionBenchmark() : name{"conversion"} + { + auto ref_exec = gko::ReferenceExecutor::create(); + auto formats = split(FLAGS_formats); + for (const auto& from_format : formats) { + operations.push_back(from_format + "-read"); + auto from_mtx = + formats::matrix_type_factory.at(from_format)(ref_exec); + // all pairs of conversions that are supported by Ginkgo + for (const auto& to_format : formats) { + if (from_format == to_format) { + continue; + } + auto to_mtx = + formats::matrix_type_factory.at(to_format)(ref_exec); + try { + to_mtx->copy_from(from_mtx); + operations.push_back(from_format + "-" + to_format); + } catch (const std::exception& e) { + } + } + } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; + } + + bool should_print() const override { return true; } + + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + bool validate_config(const json& test_case) const override + { + return Generator::validate_config(test_case); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + gko::matrix_data setup(std::shared_ptr exec, + json& test_case) const override + { + gko::matrix_data data; + data = Generator::generate_matrix_data(test_case); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + return data; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + gko::matrix_data& data, + const std::string& operation_name, + json& operation_case) const override + { + auto split_it = + std::find(operation_name.begin(), operation_name.end(), '-'); + std::string from_name{operation_name.begin(), split_it}; + std::string to_name{split_it + 1, operation_name.end()}; + auto mtx_from = formats::matrix_type_factory.at(from_name)(exec); + auto readable = + gko::as>(mtx_from.get()); + IterationControl ic{timer}; + if (to_name == "read") { + // warm run + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + readable->read(data); + exec->synchronize(); + } + // timed run + for (auto _ : ic.run()) { + readable->read(data); + } + } else { + readable->read(data); + auto mtx_to = formats::matrix_type_factory.at(to_name)(exec); + + // warm run + for (auto _ : ic.warmup_run()) { + exec->synchronize(); + mtx_to->copy_from(mtx_from); + exec->synchronize(); + } + // timed run + for (auto _ : ic.run()) { + mtx_to->copy_from(mtx_from); + } + } + operation_case["time"] = ic.compute_time(FLAGS_timer_method); + operation_case["repetitions"] = ic.get_num_repetitions(); + } +}; + + +int main(int argc, char* argv[]) +{ + std::string header = + "A benchmark for measuring performance of Ginkgo's conversions.\n"; + std::string format_str = Generator::get_example_config(); + initialize_argument_parsing(&argc, &argv, header, format_str); + + std::string extra_information = + std::string() + "The formats are " + FLAGS_formats + "\n"; + print_general_information(extra_information); + + auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); + auto formats = split(FLAGS_formats, ','); + + auto test_cases = json::parse(get_input_stream()); + + run_test_cases(ConversionBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); + + std::cout << std::setw(4) << test_cases << std::endl; +} diff --git a/benchmark/conversions/conversions.cpp b/benchmark/conversions/conversions.cpp deleted file mode 100644 index 7363e3eb8df..00000000000 --- a/benchmark/conversions/conversions.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include -#include -#include -#include -#include -#include -#include - - -#include "benchmark/utils/formats.hpp" -#include "benchmark/utils/general.hpp" -#include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" -#include "benchmark/utils/timer.hpp" -#include "benchmark/utils/types.hpp" - - -#ifdef GINKGO_BENCHMARK_ENABLE_TUNING -#include "benchmark/utils/tuning_variables.hpp" -#endif // GINKGO_BENCHMARK_ENABLE_TUNING - - -// This function supposes that management of `FLAGS_overwrite` is done before -// calling it -void convert_matrix(const gko::LinOp* matrix_from, const char* format_to, - const char* conversion_name, - std::shared_ptr exec, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& conversion_case = test_case["conversions"]; - add_or_set_member(conversion_case, conversion_name, - rapidjson::Value(rapidjson::kObjectType), allocator); - - gko::matrix_data data{gko::dim<2>{1, 1}, 1}; - auto matrix_to = share(formats::matrix_factory(format_to, exec, data)); - - auto timer = get_timer(exec, FLAGS_gpu_timer); - IterationControl ic{timer}; - - // warm run - for (auto _ : ic.warmup_run()) { - exec->synchronize(); - matrix_to->copy_from(matrix_from); - exec->synchronize(); - matrix_to->clear(); - } - // timed run - for (auto _ : ic.run()) { - matrix_to->copy_from(matrix_from); - } - add_or_set_member(conversion_case[conversion_name], "time", - ic.compute_time(FLAGS_timer_method), allocator); - add_or_set_member(conversion_case[conversion_name], "repetitions", - ic.get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(conversion_case[conversion_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["conversions"][conversion_name], - "completed", false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["conversions"][conversion_name], - "error", msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -int main(int argc, char* argv[]) -{ - std::string header = - "A benchmark for measuring performance of Ginkgo's conversions.\n"; - std::string format_str = example_config; - initialize_argument_parsing(&argc, &argv, header, format_str); - - std::string extra_information = - std::string() + "The formats are " + FLAGS_formats + "\n"; - print_general_information(extra_information); - - auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - auto formats = split(FLAGS_formats, ','); - - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } - - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; - - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - std::clog << "Benchmarking conversions. " << std::endl; - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("conversions")) { - test_case.AddMember("conversions", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& conversion_case = test_case["conversions"]; - - std::clog << "Running test case\n" << test_case << std::endl; - gko::matrix_data data; - try { - data = generator.generate_matrix_data(test_case); - } catch (std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } - continue; - } - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << ")" << std::endl; - add_or_set_member(test_case, "size", data.size[0], allocator); - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = generator.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - for (const auto& format_from : formats) { - try { - auto matrix_from = - share(formats::matrix_factory(format_from, exec, data)); - for (const auto& format_to : formats) { - if (format_from == format_to) { - continue; - } - auto conversion_name = - std::string(format_from) + "-" + format_to; - - if (!FLAGS_overwrite && - conversion_case.HasMember(conversion_name.c_str())) { - continue; - } - { - auto conversion_range = - annotate(conversion_name.c_str()); - convert_matrix(matrix_from.get(), format_to.c_str(), - conversion_name.c_str(), exec, test_case, - allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - } - backup_results(test_cases); - } catch (const gko::AllocationError& e) { - for (const auto& format : formats::matrix_type_factory) { - const auto format_to = std::get<0>(format); - auto conversion_name = - std::string(format_from) + "-" + format_to; - add_or_set_member( - test_case["conversions"][conversion_name.c_str()], - "completed", false, allocator); - } - std::cerr << "Error when allocating data for type " - << format_from << ". what(): " << e.what() - << std::endl; - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error when running benchmark, what(): " - << e.what() << std::endl; - } - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } - - std::cout << test_cases << std::endl; -} diff --git a/benchmark/matrix_generator/matrix_generator.cpp b/benchmark/matrix_generator/matrix_generator.cpp index 138b5a9c2ce..193d95f897f 100644 --- a/benchmark/matrix_generator/matrix_generator.cpp +++ b/benchmark/matrix_generator/matrix_generator.cpp @@ -85,31 +85,33 @@ std::string input_format = // clang-format on -void validate_option_object(const rapidjson::Value& value) +void validate_option_object(const json& value) { - if (!value.IsObject() || !value.HasMember("filename") || - !value["filename"].IsString() || !value.HasMember("problem") || - !value["problem"].IsObject() || !value["problem"].HasMember("type") || - !value["problem"]["type"].IsString()) { + if (!value.is_object() || !value.contains("filename") || + !value["filename"].is_string() || !value.contains("problem") || + !value["problem"].is_object() || !value["problem"].contains("type") || + !value["problem"]["type"].is_string()) { print_config_error_and_exit(2); } } using generator_function = std::function( - rapidjson::Value&, std::default_random_engine&)>; + json&, std::default_random_engine&)>; // matrix generators gko::matrix_data generate_block_diagonal( - rapidjson::Value& config, std::default_random_engine& engine) + json& config, std::default_random_engine& engine) { - if (!config.HasMember("num_blocks") || !config["num_blocks"].IsUint() || - !config.HasMember("block_size") || !config["block_size"].IsUint()) { + if (!config.contains("num_blocks") || + !config["num_blocks"].is_number_unsigned() || + !config.contains("block_size") || + !config["block_size"].is_number_unsigned()) { print_config_error_and_exit(2); } - auto num_blocks = config["num_blocks"].GetUint(); - auto block_size = config["block_size"].GetUint(); + auto num_blocks = config["num_blocks"].get(); + auto block_size = config["block_size"].get(); auto block = gko::matrix_data( gko::dim<2>(block_size), std::uniform_real_distribution(-1.0, 1.0), engine); @@ -132,20 +134,18 @@ int main(int argc, char* argv[]) std::clog << gko::version_info::get() << std::endl; auto engine = get_engine(); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document configurations; - configurations.ParseStream(jcin); + auto configurations = json::parse(get_input_stream()); - if (!configurations.IsArray()) { + if (!configurations.is_array()) { print_config_error_and_exit(1); } - for (auto& config : configurations.GetArray()) { + for (auto& config : configurations) { try { validate_option_object(config); std::clog << "Generating matrix: " << config << std::endl; - auto filename = config["filename"].GetString(); - auto type = config["problem"]["type"].GetString(); + auto filename = config["filename"].get(); + auto type = config["problem"]["type"].get(); auto mdata = generator[type](config["problem"], engine); std::ofstream ofs(filename); gko::write_raw(ofs, mdata, gko::layout_type::coordinate); diff --git a/benchmark/matrix_statistics/matrix_statistics.cpp b/benchmark/matrix_statistics/matrix_statistics.cpp index 09cae6a7554..a2a90076f2b 100644 --- a/benchmark/matrix_statistics/matrix_statistics.cpp +++ b/benchmark/matrix_statistics/matrix_statistics.cpp @@ -40,8 +40,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/types.hpp" +#include "ginkgo/core/base/executor.hpp" #ifdef GINKGO_BENCHMARK_ENABLE_TUNING @@ -51,9 +52,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // See en.wikipedia.org/wiki/Five-number_summary // Quartile computation uses Method 3 from en.wikipedia.org/wiki/Quartile -void compute_summary(const std::vector& dist, - rapidjson::Value& out, - rapidjson::MemoryPoolAllocator<>& allocator) +void compute_summary(const std::vector& dist, json& out) { const auto q = dist.size() / 4; const auto r = dist.size() % 4; @@ -72,23 +71,14 @@ void compute_summary(const std::vector& dist, }; // clang-format on - add_or_set_member(out, "min", dist[0], allocator); - add_or_set_member( - out, "q1", - coefs[r][0] * static_cast(dist[positions[r][0]]) + - coefs[r][1] * static_cast(dist[positions[r][1]]), - allocator); - add_or_set_member( - out, "median", - coefs[r][2] * static_cast(dist[positions[r][2]]) + - coefs[r][3] * static_cast(dist[positions[r][3]]), - allocator); - add_or_set_member( - out, "q3", - coefs[r][4] * static_cast(dist[positions[r][4]]) + - coefs[r][5] * static_cast(dist[positions[r][5]]), - allocator); - add_or_set_member(out, "max", dist[dist.size() - 1], allocator); + out["min"] = dist.front(); + out["q1"] = coefs[r][0] * static_cast(dist[positions[r][0]]) + + coefs[r][1] * static_cast(dist[positions[r][1]]); + out["median"] = coefs[r][2] * static_cast(dist[positions[r][2]]) + + coefs[r][3] * static_cast(dist[positions[r][3]]); + out["q3"] = coefs[r][4] * static_cast(dist[positions[r][4]]) + + coefs[r][5] * static_cast(dist[positions[r][5]]); + out["max"] = dist.back(); } @@ -108,39 +98,30 @@ double compute_moment(int degree, const std::vector& dist, // See en.wikipedia.org/wiki/Moment_(mathematics) -void compute_moments(const std::vector& dist, - rapidjson::Value& out, - rapidjson::MemoryPoolAllocator<>& allocator) +void compute_moments(const std::vector& dist, json& out) { const auto mean = compute_moment(1, dist); - add_or_set_member(out, "mean", mean, allocator); + out["mean"] = mean; const auto variance = compute_moment(2, dist, mean); - add_or_set_member(out, "variance", variance, allocator); + out["variance"] = variance; const auto dev = std::sqrt(variance); - add_or_set_member(out, "skewness", compute_moment(3, dist, mean, dev), - allocator); - add_or_set_member(out, "kurtosis", compute_moment(4, dist, mean, dev), - allocator); - add_or_set_member(out, "hyperskewness", compute_moment(5, dist, mean, dev), - allocator); - add_or_set_member(out, "hyperflatness", compute_moment(6, dist, mean, dev), - allocator); + out["skewness"] = compute_moment(3, dist, mean, dev); + out["kurtosis"] = compute_moment(4, dist, mean, dev); + out["hyperskewness"] = compute_moment(5, dist, mean, dev); + out["hyperflatness"] = compute_moment(6, dist, mean, dev); } -template void compute_distribution_properties(const std::vector& dist, - rapidjson::Value& out, - Allocator& allocator) + json& out) { - compute_summary(dist, out, allocator); - compute_moments(dist, out, allocator); + compute_summary(dist, out); + compute_moments(dist, out); } -template void extract_matrix_statistics(gko::matrix_data& data, - rapidjson::Value& problem, Allocator& allocator) + json& problem) { std::vector row_dist(data.size[0]); std::vector col_dist(data.size[1]); @@ -149,72 +130,90 @@ void extract_matrix_statistics(gko::matrix_data& data, ++col_dist[v.column]; } - add_or_set_member(problem, "rows", data.size[0], allocator); - add_or_set_member(problem, "columns", data.size[1], allocator); - add_or_set_member(problem, "nonzeros", data.nonzeros.size(), allocator); + problem["rows"] = data.size[0]; + problem["columns"] = data.size[1]; + problem["nonzeros"] = data.nonzeros.size(); std::sort(begin(row_dist), end(row_dist)); - add_or_set_member(problem, "row_distribution", - rapidjson::Value(rapidjson::kObjectType), allocator); - compute_distribution_properties(row_dist, problem["row_distribution"], - allocator); + problem["row_distribution"] = json::object(); + compute_distribution_properties(row_dist, problem["row_distribution"]); std::sort(begin(col_dist), end(col_dist)); - add_or_set_member(problem, "col_distribution", - rapidjson::Value(rapidjson::kObjectType), allocator); - compute_distribution_properties(col_dist, problem["col_distribution"], - allocator); + problem["col_distribution"] = json::object(); + compute_distribution_properties(col_dist, problem["col_distribution"]); } -int main(int argc, char* argv[]) -{ - std::string header = - "A utility that collects additional statistical properties of the " - "matrix.\n"; - std::string format = example_config; - initialize_argument_parsing(&argc, &argv, header, format); +using Generator = DefaultSystemGenerator; - std::clog << gko::version_info::get() << std::endl; - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } +struct MatrixStatistics : Benchmark { + std::string name; + std::vector empty; - auto& allocator = test_cases.GetAllocator(); + MatrixStatistics() : name{"problem"} {} - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("problem")) { - test_case.AddMember("problem", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& problem = test_case["problem"]; + const std::string& get_name() const override { return name; } - std::clog << "Running test case\n" << test_case << std::endl; + const std::vector& get_operations() const override + { + return empty; + } - auto matrix = - DefaultSystemGenerator::generate_matrix_data( - test_case); + bool should_print() const override { return true; } - std::clog << "Matrix is of size (" << matrix.size[0] << ", " - << matrix.size[1] << ")" << std::endl; - add_or_set_member(test_case, "size", matrix.size[0], allocator); + std::string get_example_config() const override + { + return Generator::get_example_config(); + } - extract_matrix_statistics(matrix, test_case["problem"], allocator); + bool validate_config(const json& test_case) const override + { + return Generator::validate_config(test_case); + } - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error extracting statistics, what(): " << e.what() - << std::endl; - } + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); } - std::cout << test_cases << std::endl; + int setup(std::shared_ptr exec, + json& test_case) const override + { + auto data = Generator::generate_matrix_data(test_case); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + + extract_matrix_statistics(data, test_case["problem"]); + return 0; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + int& data, const std::string& operation_name, + json& operation_case) const override + {} +}; + + +int main(int argc, char* argv[]) +{ + std::string header = + "A utility that collects additional statistical properties of the " + "matrix.\n"; + std::string format = Generator::get_example_config(); + initialize_argument_parsing(&argc, &argv, header, format); + + std::clog << gko::version_info::get() << std::endl; + + auto test_cases = json::parse(get_input_stream()); + auto exec = gko::ReferenceExecutor::create(); + + run_test_cases(MatrixStatistics{}, exec, get_timer(exec, false), + test_cases); + + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/preconditioner/preconditioner.cpp b/benchmark/preconditioner/preconditioner.cpp index b731ab7ebfd..cc066d01517 100644 --- a/benchmark/preconditioner/preconditioner.cpp +++ b/benchmark/preconditioner/preconditioner.cpp @@ -43,9 +43,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/preconditioners.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" @@ -128,34 +129,85 @@ std::string encode_parameters(const char* precond_name) } -void run_preconditioner(const char* precond_name, - std::shared_ptr exec, - std::shared_ptr system_matrix, - const vec* b, const vec* x, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& precond_object = test_case["preconditioner"]; - auto encoded_name = encode_parameters(precond_name); +struct preconditioner_benchmark_state { + std::unique_ptr x; + std::unique_ptr b; + std::shared_ptr system_matrix; +}; + + +using Generator = DefaultSystemGenerator<>; + - if (!FLAGS_overwrite && - precond_object.HasMember(encoded_name.c_str())) { - return; +struct PreconditionerBenchmark : Benchmark { + std::string name; + std::vector preconditioners; + std::map precond_decoder; + + PreconditionerBenchmark() + : name{"preconditioner"}, preconditioners{split(FLAGS_preconditioners)} + { + for (auto precond : split(FLAGS_preconditioners)) { + preconditioners.push_back(encode_parameters(precond.c_str())); + precond_decoder[preconditioners.back()] = precond; } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return preconditioners; + } + + bool should_print() const override { return true; } + + bool validate_config(const json& value) const override + { + return Generator::validate_config(value); + } + + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + preconditioner_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + preconditioner_benchmark_state state; + auto data = Generator::generate_matrix_data(test_case); + + state.system_matrix = + formats::matrix_factory(FLAGS_formats, exec, data); + state.b = Generator::create_multi_vector_random(exec, data.size[0]); + state.x = Generator::create_multi_vector(exec, data.size[0], + gko::zero()); + + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + return state; + } - add_or_set_member(precond_object, encoded_name.c_str(), - rapidjson::Value(rapidjson::kObjectType), allocator); - auto& this_precond_data = precond_object[encoded_name.c_str()]; - add_or_set_member(this_precond_data, "generate", - rapidjson::Value(rapidjson::kObjectType), allocator); - add_or_set_member(this_precond_data, "apply", - rapidjson::Value(rapidjson::kObjectType), allocator); + void run(std::shared_ptr exec, std::shared_ptr timer, + preconditioner_benchmark_state& state, + const std::string& encoded_precond_name, + json& precond_case) const override + { + auto decoded_precond_name = precond_decoder.at(encoded_precond_name); + precond_case["generate"] = json::object(); + precond_case["apply"] = json::object(); for (auto stage : {"generate", "apply"}) { - add_or_set_member(this_precond_data[stage], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + precond_case[stage]["components"] = json::object(); } IterationControl ic_gen{get_timer(exec, FLAGS_gpu_timer)}; @@ -163,54 +215,50 @@ void run_preconditioner(const char* precond_name, { // fast run, gets total time - auto x_clone = clone(x); - - auto precond = precond_factory.at(precond_name)(exec); + auto x_clone = clone(state.x); + auto precond = precond_factory.at(decoded_precond_name)(exec); for (auto _ : ic_apply.warmup_run()) { - precond->generate(system_matrix)->apply(b, x_clone); + precond->generate(state.system_matrix)->apply(state.b, x_clone); } std::unique_ptr precond_op; for (auto _ : ic_gen.run()) { - precond_op = precond->generate(system_matrix); + precond_op = precond->generate(state.system_matrix); } - add_or_set_member(this_precond_data["generate"], "time", - ic_gen.compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(this_precond_data["generate"], "repetitions", - ic_gen.get_num_repetitions(), allocator); + precond_case["generate"]["time"] = + ic_gen.compute_time(FLAGS_timer_method); + precond_case["generate"]["repetitions"] = + ic_gen.get_num_repetitions(); for (auto _ : ic_apply.run()) { - precond_op->apply(b, x_clone); + precond_op->apply(state.b, x_clone); } - add_or_set_member(this_precond_data["apply"], "time", - ic_apply.compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(this_precond_data["apply"], "repetitions", - ic_apply.get_num_repetitions(), allocator); + precond_case["apply"]["time"] = + ic_apply.compute_time(FLAGS_timer_method); + precond_case["apply"]["repetitions"] = + ic_apply.get_num_repetitions(); } if (FLAGS_detailed) { // slow run, times each component separately - auto x_clone = clone(x); - auto precond = precond_factory.at(precond_name)(exec); + auto x_clone = clone(state.x); + auto precond = precond_factory.at(decoded_precond_name)(exec); std::unique_ptr precond_op; { auto gen_logger = create_operations_logger( - FLAGS_nested_names, - this_precond_data["generate"]["components"], allocator, + FLAGS_nested_names, precond_case["generate"]["components"], ic_gen.get_num_repetitions()); exec->add_logger(gen_logger); if (exec->get_master() != exec) { exec->get_master()->add_logger(gen_logger); } for (auto i = 0u; i < ic_gen.get_num_repetitions(); ++i) { - precond_op = precond->generate(system_matrix); + precond_op = precond->generate(state.system_matrix); } if (exec->get_master() != exec) { exec->get_master()->remove_logger(gen_logger); @@ -219,39 +267,22 @@ void run_preconditioner(const char* precond_name, } auto apply_logger = create_operations_logger( - FLAGS_nested_names, this_precond_data["apply"]["components"], - allocator, ic_apply.get_num_repetitions()); + FLAGS_nested_names, precond_case["apply"]["components"], + ic_apply.get_num_repetitions()); exec->add_logger(apply_logger); if (exec->get_master() != exec) { exec->get_master()->add_logger(apply_logger); } for (auto i = 0u; i < ic_apply.get_num_repetitions(); ++i) { - precond_op->apply(b, x_clone); + precond_op->apply(state.b, x_clone); } if (exec->get_master() != exec) { exec->get_master()->remove_logger(apply_logger); } exec->remove_logger(apply_logger); } - - add_or_set_member(this_precond_data, "completed", true, allocator); - } catch (const std::exception& e) { - auto encoded_name = encode_parameters(precond_name); - add_or_set_member(test_case["preconditioner"], encoded_name.c_str(), - rapidjson::Value(rapidjson::kObjectType), allocator); - add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], - "completed", false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["preconditioner"][encoded_name.c_str()], - "error", msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; } -} +}; int main(int argc, char* argv[]) @@ -260,7 +291,7 @@ int main(int argc, char* argv[]) FLAGS_formats = "csr"; std::string header = "A benchmark for measuring preconditioner performance.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing(&argc, &argv, header, format); std::string extra_information = @@ -278,85 +309,10 @@ int main(int argc, char* argv[]) std::exit(1); } - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; - - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("preconditioner")) { - test_case.AddMember("preconditioner", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& precond_object = test_case["preconditioner"]; - if (!FLAGS_overwrite && - all_of(begin(preconditioners), end(preconditioners), - [&precond_object](const std::string& s) { - return precond_object.HasMember(s.c_str()); - })) { - continue; - } - std::clog << "Running test case\n" << test_case << std::endl; - - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = generator.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - - auto data = generator.generate_matrix_data(test_case); - - auto system_matrix = - share(formats::matrix_factory(FLAGS_formats, exec, data)); - auto b = generator.create_multi_vector_random( - exec, system_matrix->get_size()[0]); - auto x = generator.create_multi_vector( - exec, system_matrix->get_size()[0], gko::zero()); - - std::clog << "Matrix is of size (" << system_matrix->get_size()[0] - << ", " << system_matrix->get_size()[1] << ")" - << std::endl; - add_or_set_member(test_case, "size", data.size[0], allocator); - for (const auto& precond_name : preconditioners) { - { - auto precond_range = annotate(precond_name.c_str()); - run_preconditioner(precond_name.c_str(), exec, - system_matrix, b.get(), x.get(), - test_case, allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - backup_results(test_cases); - } - } catch (const std::exception& e) { - std::cerr << "Error setting up preconditioner, what(): " << e.what() - << std::endl; - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } + run_test_cases(PreconditionerBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/solver/distributed/solver.cpp b/benchmark/solver/distributed/solver.cpp index 2db71c16ca3..99afc7f6532 100644 --- a/benchmark/solver/distributed/solver.cpp +++ b/benchmark/solver/distributed/solver.cpp @@ -52,7 +52,7 @@ struct Generator : public DistributedDefaultSystemGenerator { std::unique_ptr generate_rhs(std::shared_ptr exec, const gko::LinOp* system_matrix, - rapidjson::Value& config) const + json& config) const { return Vec::create( exec, comm, gko::dim<2>{system_matrix->get_size()[0], FLAGS_nrhs}, @@ -84,7 +84,7 @@ int main(int argc, char* argv[]) std::string header = "A benchmark for measuring Ginkgo's distributed solvers\n"; - std::string format = example_config + R"( + std::string format = solver_example_config + R"( The matrix will either be read from an input file if the filename parameter is given, or generated as a stencil matrix. If the filename parameter is given, all processes will read the file and @@ -134,17 +134,12 @@ int main(int argc, char* argv[]) "optimal": {"spmv": "csr-csr"}] )" : broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); + auto test_cases = json::parse(json_input); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } - - run_solver_benchmarks(exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), - test_cases, Generator(comm), rank == 0); + run_test_cases(SolverBenchmark{Generator{comm}}, exec, + get_mpi_timer(exec, comm, FLAGS_gpu_timer), test_cases); if (rank == 0) { - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp index 9190c99dad0..2361e6f1552 100644 --- a/benchmark/solver/solver.cpp +++ b/benchmark/solver/solver.cpp @@ -58,7 +58,7 @@ int main(int argc, char* argv[]) FLAGS_min_repetitions = 1; std::string header = "A benchmark for measuring performance of Ginkgo's solvers.\n"; - std::string format = example_config + R"( + std::string format = solver_example_config + R"( "optimal":"spmv" can be one of the recognized spmv formats )"; initialize_argument_parsing(&argc, &argv, header, format); @@ -75,24 +75,19 @@ int main(int argc, char* argv[]) auto exec = get_executor(FLAGS_gpu_timer); - rapidjson::Document test_cases; + json test_cases; if (!FLAGS_overhead) { - rapidjson::IStreamWrapper jcin(get_input_stream()); - test_cases.ParseStream(jcin); + test_cases = json::parse(get_input_stream()); } else { // Fake test case to run once auto overhead_json = std::string() + " [{\"filename\": \"overhead.mtx\", \"optimal\": " "{ \"spmv\": \"csr\"}}]"; - test_cases.Parse(overhead_json.c_str()); + test_cases = json::parse(overhead_json); } - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } - - run_solver_benchmarks(exec, get_timer(exec, FLAGS_gpu_timer), test_cases, - SolverGenerator{}, true); + run_test_cases(SolverBenchmark{SolverGenerator{}}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/solver/solver_common.hpp b/benchmark/solver/solver_common.hpp index a9167077782..e84db45d910 100644 --- a/benchmark/solver/solver_common.hpp +++ b/benchmark/solver/solver_common.hpp @@ -37,8 +37,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" #include "benchmark/utils/preconditioners.hpp" +#include "benchmark/utils/runner.hpp" #ifdef GINKGO_BENCHMARK_ENABLE_TUNING @@ -107,7 +109,7 @@ DEFINE_bool(overhead, false, "If set, uses dummy data to benchmark Ginkgo overhead"); -std::string example_config = R"( +std::string solver_example_config = R"( [ {"filename": "my_file.mtx", "optimal": {"spmv": "ell-csr"}, "rhs": "my_file_rhs.mtx"}, @@ -119,28 +121,6 @@ std::string example_config = R"( )"; -// input validation -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of solver configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -void validate_option_object(const rapidjson::Value& value) -{ - if (!value.IsObject() || - !((value.HasMember("size") && value.HasMember("stencil") && - value["size"].IsInt64() && value["stencil"].IsString()) || - (value.HasMember("filename") && value["filename"].IsString())) || - (!value.HasMember("optimal") && !value["optimal"].HasMember("spmv") && - !value["optimal"]["spmv"].IsString())) { - print_config_error_and_exit(); - } -} - - std::shared_ptr create_criterion( std::shared_ptr exec, std::uint32_t max_iters) { @@ -284,21 +264,17 @@ std::unique_ptr generate_solver( } -void write_precond_info(const gko::LinOp* precond, - rapidjson::Value& precond_info, - rapidjson::MemoryPoolAllocator<>& allocator) +void write_precond_info(const gko::LinOp* precond, json& precond_info) { if (const auto jacobi = dynamic_cast*>(precond)) { // extract block sizes const auto bdata = jacobi->get_parameters().block_pointers.get_const_data(); - add_or_set_member(precond_info, "block_sizes", - rapidjson::Value(rapidjson::kArrayType), allocator); + precond_info["block_sizes"] = json::array(); const auto nblocks = jacobi->get_num_blocks(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_sizes"].PushBack(bdata[i + 1] - bdata[i], - allocator); + precond_info["block_sizes"].push_back(bdata[i + 1] - bdata[i]); } // extract block precisions @@ -306,24 +282,19 @@ void write_precond_info(const gko::LinOp* precond, jacobi->get_parameters() .storage_optimization.block_wise.get_const_data(); if (pdata) { - add_or_set_member(precond_info, "block_precisions", - rapidjson::Value(rapidjson::kArrayType), - allocator); + precond_info["block_precisions"] = json::array(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_precisions"].PushBack( - static_cast(pdata[i]), allocator); + precond_info["block_precisions"].push_back( + static_cast(pdata[i])); } } // extract condition numbers const auto cdata = jacobi->get_conditioning(); if (cdata) { - add_or_set_member(precond_info, "block_conditioning", - rapidjson::Value(rapidjson::kArrayType), - allocator); + precond_info["block_conditioning"] = json::array(); for (auto i = decltype(nblocks){0}; i < nblocks; ++i) { - precond_info["block_conditioning"].PushBack(cdata[i], - allocator); + precond_info["block_conditioning"].push_back(cdata[i]); } } } @@ -335,10 +306,10 @@ struct SolverGenerator : DefaultSystemGenerator<> { std::unique_ptr generate_rhs(std::shared_ptr exec, const gko::LinOp* system_matrix, - rapidjson::Value& config) const + json& config) const { - if (config.HasMember("rhs")) { - std::ifstream rhs_fd{config["rhs"].GetString()}; + if (config.contains("rhs")) { + std::ifstream rhs_fd{config["rhs"].get()}; return gko::read(rhs_fd, std::move(exec)); } else { gko::dim<2> vec_size{system_matrix->get_size()[0], FLAGS_nrhs}; @@ -399,45 +370,112 @@ struct SolverGenerator : DefaultSystemGenerator<> { }; -template -void solve_system(const std::string& solver_name, - const std::string& precond_name, - const char* precond_solver_name, - std::shared_ptr exec, - std::shared_ptr timer, - std::shared_ptr system_matrix, - const VectorType* b, const VectorType* x, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& solver_case = test_case["solver"]; - if (!FLAGS_overwrite && solver_case.HasMember(precond_solver_name)) { - return; +template +struct solver_benchmark_state { + using Vec = typename Generator::Vec; + std::shared_ptr system_matrix; + std::unique_ptr b; + std::unique_ptr x; +}; + + +template +struct SolverBenchmark : Benchmark> { + std::string name; + std::vector precond_solvers; + std::map> decoder; + Generator generator; + + SolverBenchmark(Generator generator) : name{"solver"}, generator{generator} + { + auto solvers = split(FLAGS_solvers, ','); + auto preconds = split(FLAGS_preconditioners, ','); + for (const auto& s : solvers) { + for (const auto& p : preconds) { + precond_solvers.push_back(s + (p == "none" ? "" : "-" + p)); + decoder[precond_solvers.back()] = {s, p}; + } + } + } + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return precond_solvers; + } + + bool should_print() const override { return true; } + + std::string get_example_config() const override + { + return solver_example_config; + } + + bool validate_config(const json& value) const override + { + return ((value.contains("size") && value.contains("stencil") && + value["size"].is_number_integer() && + value["stencil"].is_string()) || + (value.contains("filename") && + value["filename"].is_string())) && + (value.contains("optimal") && + value["optimal"].contains("spmv") && + value["optimal"]["spmv"].is_string()); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + solver_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + solver_benchmark_state state; + + if (FLAGS_overhead) { + state.system_matrix = generator.initialize({1.0}, exec); + state.b = generator.initialize( + {std::numeric_limits::quiet_NaN()}, exec); + state.x = generator.initialize({0.0}, exec); + } else { + state.system_matrix = + generator.generate_matrix_with_optimal_format(exec, test_case); + state.b = generator.generate_rhs(exec, state.system_matrix.get(), + test_case); + state.x = generator.generate_initial_guess( + exec, state.system_matrix.get(), state.b.get()); } - add_or_set_member(solver_case, precond_solver_name, - rapidjson::Value(rapidjson::kObjectType), allocator); - auto& solver_json = solver_case[precond_solver_name]; - add_or_set_member(solver_json, "recurrent_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "true_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "implicit_residuals", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(solver_json, "iteration_timestamps", - rapidjson::Value(rapidjson::kArrayType), allocator); - if (b->get_size()[1] == 1 && !FLAGS_overhead) { - auto rhs_norm = compute_norm2(b); - add_or_set_member(solver_json, "rhs_norm", rhs_norm, allocator); + std::clog << "Matrix is of size (" << state.system_matrix->get_size()[0] + << ", " << state.system_matrix->get_size()[1] << ")" + << std::endl; + test_case["rows"] = state.system_matrix->get_size()[0]; + test_case["cols"] = state.system_matrix->get_size()[1]; + return state; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + solver_benchmark_state& state, + const std::string& encoded_solver_name, + json& solver_case) const override + { + const auto decoded_pair = decoder.at(encoded_solver_name); + auto& solver_name = decoded_pair.first; + auto& precond_name = decoded_pair.second; + solver_case["recurrent_residuals"] = json::array(); + solver_case["true_residuals"] = json::array(); + solver_case["implicit_residuals"] = json::array(); + solver_case["iteration_timestamps"] = json::array(); + if (state.b->get_size()[1] == 1 && !FLAGS_overhead) { + auto rhs_norm = compute_norm2(state.b.get()); + solver_case["rhs_norm"] = rhs_norm; } for (auto stage : {"generate", "apply"}) { - add_or_set_member(solver_json, stage, - rapidjson::Value(rapidjson::kObjectType), - allocator); - add_or_set_member(solver_json[stage], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + solver_case[stage] = json::object(); + solver_case[stage]["components"] = json::object(); } IterationControl ic{timer}; @@ -445,24 +483,24 @@ void solve_system(const std::string& solver_name, // warm run std::shared_ptr solver; for (auto _ : ic.warmup_run()) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_warmup_max_iters) - ->generate(system_matrix); - solver->apply(b, x_clone); + ->generate(state.system_matrix); + solver->apply(state.b, x_clone); exec->synchronize(); } // detail run if (FLAGS_detailed && !FLAGS_overhead) { // slow run, get the time of each functions - auto x_clone = clone(x); + auto x_clone = clone(state.x); { auto gen_logger = create_operations_logger( - FLAGS_nested_names, solver_json["generate"]["components"], - allocator, 1); + FLAGS_nested_names, solver_case["generate"]["components"], + 1); exec->add_logger(gen_logger); if (exec != exec->get_master()) { exec->get_master()->add_logger(gen_logger); @@ -471,7 +509,7 @@ void solve_system(const std::string& solver_name, auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_max_iters) - ->generate(system_matrix); + ->generate(state.system_matrix); exec->remove_logger(gen_logger); if (exec != exec->get_master()) { @@ -481,25 +519,21 @@ void solve_system(const std::string& solver_name, if (auto prec = dynamic_cast(solver.get())) { - add_or_set_member(solver_json, "preconditioner", - rapidjson::Value(rapidjson::kObjectType), - allocator); + solver_case["preconditioner"] = json::object(); write_precond_info( clone(exec->get_master(), prec->get_preconditioner()).get(), - solver_json["preconditioner"], allocator); + solver_case["preconditioner"]); } { auto apply_logger = create_operations_logger( - FLAGS_nested_names, solver_json["apply"]["components"], - allocator, 1); + FLAGS_nested_names, solver_case["apply"]["components"], 1); exec->add_logger(apply_logger); if (exec != exec->get_master()) { exec->get_master()->add_logger(apply_logger); } - - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); exec->remove_logger(apply_logger); if (exec != exec->get_master()) { @@ -508,17 +542,18 @@ void solve_system(const std::string& solver_name, } // slow run, gets the recurrent and true residuals of each iteration - if (b->get_size()[1] == 1) { - x_clone = clone(x); + if (state.b->get_size()[1] == 1) { + x_clone = clone(state.x); auto res_logger = std::make_shared>( - system_matrix, b, solver_json["recurrent_residuals"], - solver_json["true_residuals"], - solver_json["implicit_residuals"], - solver_json["iteration_timestamps"], allocator); + state.system_matrix, state.b, + solver_case["recurrent_residuals"], + solver_case["true_residuals"], + solver_case["implicit_residuals"], + solver_case["iteration_timestamps"]); solver->add_logger(res_logger); - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); if (!res_logger->has_implicit_res_norms()) { - solver_json.RemoveMember("implicit_residuals"); + solver_case.erase("implicit_residuals"); } } exec->synchronize(); @@ -528,16 +563,16 @@ void solve_system(const std::string& solver_name, auto it_logger = std::make_shared(); auto generate_timer = get_timer(exec, FLAGS_gpu_timer); auto apply_timer = ic.get_timer(); - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto status : ic.run(false)) { - x_clone = clone(x); + x_clone = clone(state.x); exec->synchronize(); generate_timer->tic(); auto precond = precond_factory.at(precond_name)(exec); solver = generate_solver(exec, give(precond), solver_name, FLAGS_max_iters) - ->generate(system_matrix); + ->generate(state.system_matrix); generate_timer->toc(); exec->synchronize(); @@ -545,173 +580,33 @@ void solve_system(const std::string& solver_name, solver->add_logger(it_logger); } apply_timer->tic(); - solver->apply(b, x_clone); + solver->apply(state.b, x_clone); apply_timer->toc(); if (ic.get_num_repetitions() == 0) { solver->remove_logger(it_logger); } } - it_logger->write_data(solver_json["apply"], allocator); + it_logger->write_data(solver_case["apply"]); - if (b->get_size()[1] == 1 && !FLAGS_overhead) { + if (state.b->get_size()[1] == 1 && !FLAGS_overhead) { // a solver is considered direct if it didn't log any iterations - if (solver_json["apply"].HasMember("iterations") && - solver_json["apply"]["iterations"].GetInt() == 0) { - auto error = - compute_direct_error(solver.get(), b, x_clone.get()); - add_or_set_member(solver_json, "forward_error", error, - allocator); - } - auto residual = - compute_residual_norm(system_matrix.get(), b, x_clone.get()); - add_or_set_member(solver_json, "residual_norm", residual, - allocator); - } - add_or_set_member(solver_json["generate"], "time", - generate_timer->compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(solver_json["apply"], "time", - apply_timer->compute_time(FLAGS_timer_method), - allocator); - add_or_set_member(solver_json, "repetitions", - apply_timer->get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(solver_json, "completed", true, allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["solver"][precond_solver_name], "completed", - false, allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["solver"][precond_solver_name], "error", - msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -template -void run_solver_benchmarks(std::shared_ptr exec, - std::shared_ptr timer, - rapidjson::Document& test_cases, - const SystemGenerator& system_generator, - bool do_print) -{ - auto solvers = split(FLAGS_solvers, ','); - auto preconds = split(FLAGS_preconditioners, ','); - std::vector precond_solvers; - for (const auto& s : solvers) { - for (const auto& p : preconds) { - precond_solvers.push_back(s + (p == "none" ? "" : "-" + p)); - } - } - - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember("solver")) { - test_case.AddMember("solver", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& solver_case = test_case["solver"]; - if (!FLAGS_overwrite && - all_of(begin(precond_solvers), end(precond_solvers), - [&solver_case](const std::string& s) { - return solver_case.HasMember(s.c_str()); - })) { - continue; - } - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = system_generator.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - - using Vec = typename SystemGenerator::Vec; - std::shared_ptr system_matrix; - std::unique_ptr b; - std::unique_ptr x; - if (FLAGS_overhead) { - system_matrix = system_generator.initialize({1.0}, exec); - b = system_generator.initialize( - {std::numeric_limits::quiet_NaN()}, exec); - x = system_generator.initialize({0.0}, exec); - } else { - system_matrix = - system_generator.generate_matrix_with_optimal_format( - exec, test_case); - b = system_generator.generate_rhs(exec, system_matrix.get(), - test_case); - x = system_generator.generate_initial_guess( - exec, system_matrix.get(), b.get()); - } - - if (do_print) { - std::clog << "Matrix is of size (" - << system_matrix->get_size()[0] << ", " - << system_matrix->get_size()[1] << ")" << std::endl; - } - add_or_set_member(test_case, "size", system_matrix->get_size()[0], - allocator); - auto precond_solver_name = begin(precond_solvers); - for (const auto& solver_name : solvers) { - auto solver_range = annotate(solver_name.c_str()); - for (const auto& precond_name : preconds) { - if (do_print) { - std::clog - << "\tRunning solver: " << *precond_solver_name - << std::endl; - } - { - auto precond_range = annotate(precond_name.c_str()); - solve_system(solver_name, precond_name, - precond_solver_name->c_str(), exec, timer, - system_matrix, b.get(), x.get(), test_case, - allocator); - } - if (do_print) { - backup_results(test_cases); - } - ++precond_solver_name; - } - } - } catch (const std::exception& e) { - std::cerr << "Error setting up solver, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); + if (solver_case["apply"].contains("iterations") && + solver_case["apply"]["iterations"].get() == 0) { + auto error = compute_direct_error(solver.get(), state.b.get(), + x_clone.get()); + solver_case["forward_error"] = error; } + auto residual = compute_residual_norm(state.system_matrix.get(), + state.b.get(), x_clone.get()); + solver_case["residual_norm"] = residual; } + solver_case["generate"]["time"] = + generate_timer->compute_time(FLAGS_timer_method); + solver_case["apply"]["time"] = + apply_timer->compute_time(FLAGS_timer_method); + solver_case["repetitions"] = apply_timer->get_num_repetitions(); } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; #endif // GINKGO_BENCHMARK_SOLVER_SOLVER_COMMON_HPP diff --git a/benchmark/sparse_blas/operations.cpp b/benchmark/sparse_blas/operations.cpp index 6a817a67c0d..8ee3977c989 100644 --- a/benchmark/sparse_blas/operations.cpp +++ b/benchmark/sparse_blas/operations.cpp @@ -38,7 +38,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/sparse_blas/operations.hpp" -#include "benchmark/utils/json.hpp" #include "core/factorization/elimination_forest.hpp" #include "core/factorization/symbolic.hpp" #include "core/matrix/csr_kernels.hpp" @@ -632,11 +631,9 @@ class SymbolicLuOperation : public BenchmarkOperation { void run() override { gko::factorization::symbolic_lu(mtx_, result_); } - void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) override + void write_stats(json& object) override { - add_or_set_member(object, "factor_nonzeros", - result_->get_num_stored_elements(), allocator); + object["factor_nonzeros"] = result_->get_num_stored_elements(); } private: @@ -680,11 +677,9 @@ class SymbolicCholeskyOperation : public BenchmarkOperation { forest_); } - void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) override + void write_stats(json& object) override { - add_or_set_member(object, "factor_nonzeros", - result_->get_num_stored_elements(), allocator); + object["factor_nonzeros"] = result_->get_num_stored_elements(); } private: diff --git a/benchmark/sparse_blas/operations.hpp b/benchmark/sparse_blas/operations.hpp index 99cf72b8e59..48034eb8a1f 100644 --- a/benchmark/sparse_blas/operations.hpp +++ b/benchmark/sparse_blas/operations.hpp @@ -36,9 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include - - +#include "benchmark/utils/json.hpp" #include "benchmark/utils/types.hpp" @@ -79,9 +77,7 @@ class BenchmarkOperation { /** * Allows the operation to write arbitrary information to the JSON output. */ - virtual void write_stats(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& allocator) - {} + virtual void write_stats(json& object) {} }; diff --git a/benchmark/sparse_blas/sparse_blas.cpp b/benchmark/sparse_blas/sparse_blas.cpp index 38b2d09c996..fac20edce29 100644 --- a/benchmark/sparse_blas/sparse_blas.cpp +++ b/benchmark/sparse_blas/sparse_blas.cpp @@ -47,7 +47,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/sparse_blas/operations.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" +#include "benchmark/utils/iteration_control.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/types.hpp" #include "core/test/utils/matrix_generator.hpp" @@ -68,18 +69,64 @@ DEFINE_bool(validate, false, "against the ReferenceExecutor solution."); -void apply_sparse_blas(const char* operation_name, - std::shared_ptr exec, const Mtx* mtx, - rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - add_or_set_member(test_case, operation_name, - rapidjson::Value(rapidjson::kObjectType), allocator); +using Generator = DefaultSystemGenerator<>; + + +struct SparseBlasBenchmark : Benchmark> { + std::string name; + std::vector operations; + + SparseBlasBenchmark() + : name{"sparse_blas"}, operations{split(FLAGS_operations)} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return operations; + } - auto op = get_operation(operation_name, mtx); + bool should_print() const override { return true; } + + bool validate_config(const json& value) const override + { + return Generator::validate_config(value); + } + + std::string get_example_config() const override + { + return Generator::get_example_config(); + } + + std::string describe_config(const json& test_case) const override + { + return Generator::describe_config(test_case); + } + + std::unique_ptr setup(std::shared_ptr exec, + json& test_case) const override + { + auto data = Generator::generate_matrix_data(test_case); + data.ensure_row_major_order(); + std::clog << "Matrix is of size (" << data.size[0] << ", " + << data.size[1] << "), " << data.nonzeros.size() << std::endl; + test_case["rows"] = data.size[0]; + test_case["cols"] = data.size[1]; + test_case["nonzeros"] = data.nonzeros.size(); + + auto mtx = Mtx::create(exec, data.size, data.nonzeros.size()); + mtx->read(data); + return mtx; + } + + + void run(std::shared_ptr exec, std::shared_ptr timer, + std::unique_ptr& mtx, const std::string& operation_name, + json& operation_case) const override + { + auto op = get_operation(operation_name, mtx.get()); - auto timer = get_timer(exec, FLAGS_gpu_timer); IterationControl ic(timer); // warm run @@ -99,51 +146,27 @@ void apply_sparse_blas(const char* operation_name, const auto flops = static_cast(op->get_flops()); const auto mem = static_cast(op->get_memory()); const auto repetitions = ic.get_num_repetitions(); - add_or_set_member(test_case[operation_name], "time", runtime, - allocator); - add_or_set_member(test_case[operation_name], "flops", flops / runtime, - allocator); - add_or_set_member(test_case[operation_name], "bandwidth", mem / runtime, - allocator); - add_or_set_member(test_case[operation_name], "repetitions", repetitions, - allocator); + operation_case["time"] = runtime; + operation_case["flops"] = flops / runtime; + operation_case["bandwidth"] = mem / runtime; + operation_case["repetitions"] = repetitions; if (FLAGS_validate) { auto validation_result = op->validate(); - add_or_set_member(test_case[operation_name], "correct", - validation_result.first, allocator); - add_or_set_member(test_case[operation_name], "error", - validation_result.second, allocator); + operation_case["correct"] = validation_result.first; + operation_case["error"] = validation_result.second; } if (FLAGS_detailed) { - add_or_set_member(test_case[operation_name], "components", - rapidjson::Value(rapidjson::kObjectType), - allocator); + operation_case["components"] = json::object(); auto gen_logger = create_operations_logger( - FLAGS_nested_names, test_case[operation_name]["components"], - allocator, 1); + FLAGS_nested_names, operation_case["components"], 1); exec->add_logger(gen_logger); op->run(); exec->remove_logger(gen_logger); } - op->write_stats(test_case[operation_name], allocator); - - add_or_set_member(test_case[operation_name], "completed", true, - allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case[operation_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case[operation_name], "error", msg_value, - allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; + op->write_stats(operation_case); } -} +}; int main(int argc, char* argv[]) @@ -151,94 +174,19 @@ int main(int argc, char* argv[]) std::string header = "A benchmark for measuring performance of Ginkgo's sparse BLAS " "operations.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing(&argc, &argv, header, format); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); - std::string extra_information = "The operations are " + FLAGS_operations; + std::string extra_information = + "The operations are " + FLAGS_operations + "\n"; print_general_information(extra_information); - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); - } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); - } - return {}; - }; - - auto operations = split(FLAGS_operations, ','); - - DefaultSystemGenerator<> generator{}; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - validate_option_object(test_case); - if (!test_case.HasMember(benchmark_name)) { - test_case.AddMember(rapidjson::Value(benchmark_name, allocator), - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& sp_blas_case = test_case[benchmark_name]; - std::clog << "Running test case\n" << test_case << std::endl; - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = generator.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - auto data = generator.generate_matrix_data(test_case); - data.ensure_row_major_order(); - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << "), " << data.nonzeros.size() - << std::endl; - add_or_set_member(test_case, "rows", data.size[0], allocator); - add_or_set_member(test_case, "cols", data.size[1], allocator); - add_or_set_member(test_case, "nonzeros", data.nonzeros.size(), - allocator); - - auto mtx = Mtx::create(exec, data.size, data.nonzeros.size()); - mtx->read(data); - for (const auto& operation_name : operations) { - if (FLAGS_overwrite || - !sp_blas_case.HasMember(operation_name.c_str())) { - { - auto operation_range = annotate(operation_name.c_str()); - apply_sparse_blas(operation_name.c_str(), exec, - mtx.get(), sp_blas_case, allocator); - } - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - backup_results(test_cases); - } - } - // write the output if we have no strategies - backup_results(test_cases); - } catch (const std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } - } - } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } + run_test_cases(SparseBlasBenchmark{}, exec, + get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/spmv/distributed/spmv.cpp b/benchmark/spmv/distributed/spmv.cpp index 3c2986846b3..75a5acd36e3 100644 --- a/benchmark/spmv/distributed/spmv.cpp +++ b/benchmark/spmv/distributed/spmv.cpp @@ -58,38 +58,7 @@ DEFINE_string(non_local_formats, "csr", "run. See the 'formats' option for a list of supported versions"); -std::string example_config = R"( - [ - {"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}, - {"filename": "my_file.mtx"} - ] -)"; - - -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -struct Generator : DistributedDefaultSystemGenerator> { - Generator(gko::experimental::mpi::communicator comm) - : DistributedDefaultSystemGenerator>{ - std::move(comm), {}} - {} - - void validate_options(const rapidjson::Value& options) const - { - if (!options.IsObject() || - !((options.HasMember("size") && options.HasMember("stencil") && - options.HasMember("comm_pattern")) || - options.HasMember("filename"))) { - print_config_error_and_exit(); - } - } -}; +using Generator = DistributedDefaultSystemGenerator>; int main(int argc, char* argv[]) @@ -98,13 +67,14 @@ int main(int argc, char* argv[]) const auto comm = gko::experimental::mpi::communicator(MPI_COMM_WORLD); const auto rank = comm.rank(); + const auto do_print = rank == 0; std::string header = "A benchmark for measuring performance of Ginkgo's spmv.\n"; - std::string format = example_config; - initialize_argument_parsing(&argc, &argv, header, format); + std::string format = Generator::get_example_config(); + initialize_argument_parsing(&argc, &argv, header, format, do_print); - if (rank == 0) { + if (do_print) { std::string extra_information = "The formats are [" + FLAGS_local_formats + "]x[" + FLAGS_non_local_formats + "]\n" + @@ -125,16 +95,13 @@ int main(int argc, char* argv[]) } std::string json_input = broadcast_json_input(get_input_stream(), comm); - rapidjson::Document test_cases; - test_cases.Parse(json_input.c_str()); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(json_input); - run_spmv_benchmark(exec, test_cases, formats, Generator{comm}, - get_mpi_timer(exec, comm, FLAGS_gpu_timer), rank == 0); + run_test_cases(SpmvBenchmark{Generator{comm}, formats, do_print}, + exec, get_mpi_timer(exec, comm, FLAGS_gpu_timer), + test_cases); - if (rank == 0) { - std::cout << test_cases << std::endl; + if (do_print) { + std::cout << std::setw(4) << test_cases << std::endl; } } diff --git a/benchmark/spmv/spmv.cpp b/benchmark/spmv/spmv.cpp index df000cecd47..99387da6089 100644 --- a/benchmark/spmv/spmv.cpp +++ b/benchmark/spmv/spmv.cpp @@ -41,29 +41,16 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" #include "benchmark/utils/generator.hpp" -#include "benchmark/utils/spmv_validation.hpp" -struct Generator : DefaultSystemGenerator<> { - void validate_options(const rapidjson::Value& options) const - { - if (!options.IsObject() || - !((options.HasMember("size") && options.HasMember("stencil")) || - options.HasMember("filename"))) { - std::cerr - << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); - } - } -}; +using Generator = DefaultSystemGenerator<>; int main(int argc, char* argv[]) { std::string header = "A benchmark for measuring performance of Ginkgo's spmv.\n"; - std::string format = example_config; + std::string format = Generator::get_example_config(); initialize_argument_parsing(&argc, &argv, header, format); std::string extra_information = "The formats are " + FLAGS_formats + @@ -72,17 +59,11 @@ int main(int argc, char* argv[]) print_general_information(extra_information); auto exec = executor_factory.at(FLAGS_executor)(FLAGS_gpu_timer); - auto formats = split(FLAGS_formats, ','); - rapidjson::IStreamWrapper jcin(get_input_stream()); - rapidjson::Document test_cases; - test_cases.ParseStream(jcin); - if (!test_cases.IsArray()) { - print_config_error_and_exit(); - } + auto test_cases = json::parse(get_input_stream()); - run_spmv_benchmark(exec, test_cases, formats, Generator{}, - get_timer(exec, FLAGS_gpu_timer), true); + run_test_cases(SpmvBenchmark{Generator{}, split(FLAGS_formats)}, + exec, get_timer(exec, FLAGS_gpu_timer), test_cases); - std::cout << test_cases << std::endl; + std::cout << std::setw(4) << test_cases << std::endl; } diff --git a/benchmark/spmv/spmv_common.hpp b/benchmark/spmv/spmv_common.hpp index 01869732712..f9d0f4097db 100644 --- a/benchmark/spmv/spmv_common.hpp +++ b/benchmark/spmv/spmv_common.hpp @@ -36,7 +36,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "benchmark/utils/formats.hpp" #include "benchmark/utils/general.hpp" +#include "benchmark/utils/iteration_control.hpp" #include "benchmark/utils/loggers.hpp" +#include "benchmark/utils/runner.hpp" #include "benchmark/utils/timer.hpp" #include "benchmark/utils/types.hpp" #ifdef GINKGO_BENCHMARK_ENABLE_TUNING @@ -48,57 +50,119 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. DEFINE_uint32(nrhs, 1, "The number of right hand sides"); -// This function supposes that management of `FLAGS_overwrite` is done before -// calling it -template -void apply_spmv(const char* format_name, std::shared_ptr exec, - const Generator& generator, std::shared_ptr timer, - const gko::matrix_data& data, - const VectorType* b, const VectorType* x, - const VectorType* answer, rapidjson::Value& test_case, - rapidjson::MemoryPoolAllocator<>& allocator) -{ - try { - auto& spmv_case = test_case["spmv"]; - add_or_set_member(spmv_case, format_name, - rapidjson::Value(rapidjson::kObjectType), allocator); +template +struct spmv_benchmark_state { + gko::matrix_data data; + std::unique_ptr x; + std::unique_ptr b; + std::unique_ptr answer; +}; + + +template +struct SpmvBenchmark : Benchmark> { + using Vec = typename Generator::Vec; + std::string name; + std::vector formats; + bool do_print; + Generator generator; + + SpmvBenchmark(Generator generator, std::vector formats, + bool do_print = true) + : name{"spmv"}, + formats{std::move(formats)}, + generator{generator}, + do_print{do_print} + {} + + const std::string& get_name() const override { return name; } + + const std::vector& get_operations() const override + { + return formats; + } + + bool should_print() const override { return do_print; } + + std::string get_example_config() const override + { + return generator.get_example_config(); + } + + bool validate_config(const json& test_case) const override + { + return generator.validate_config(test_case); + } + + std::string describe_config(const json& test_case) const override + { + return generator.describe_config(test_case); + } + spmv_benchmark_state setup(std::shared_ptr exec, + json& test_case) const override + { + spmv_benchmark_state state; + state.data = generator.generate_matrix_data(test_case); + + auto nrhs = FLAGS_nrhs; + state.b = generator.create_multi_vector_random( + exec, gko::dim<2>{state.data.size[1], nrhs}); + state.x = generator.create_multi_vector_random( + exec, gko::dim<2>{state.data.size[0], nrhs}); + if (do_print) { + std::clog << "Matrix is of size (" << state.data.size[0] << ", " + << state.data.size[1] << "), " + << state.data.nonzeros.size() << std::endl; + } + test_case["rows"] = state.data.size[0]; + test_case["cols"] = state.data.size[1]; + test_case["nonzeros"] = state.data.nonzeros.size(); + if (FLAGS_detailed) { + state.answer = gko::clone(state.x); + auto system_matrix = + generator.generate_matrix_with_default_format(exec, state.data); + exec->synchronize(); + system_matrix->apply(state.b, state.answer); + exec->synchronize(); + } + return state; + } + + void run(std::shared_ptr exec, std::shared_ptr timer, + spmv_benchmark_state& state, + const std::string& format_name, json& format_case) const override + { auto system_matrix = generator.generate_matrix_with_format( - exec, format_name, data, &spmv_case[format_name], &allocator); + exec, format_name, state.data, &format_case); // check the residual if (FLAGS_detailed) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); exec->synchronize(); - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); exec->synchronize(); auto max_relative_norm2 = - compute_max_relative_norm2(x_clone.get(), answer); - add_or_set_member(spmv_case[format_name], "max_relative_norm2", - max_relative_norm2, allocator); + compute_max_relative_norm2(x_clone.get(), state.answer.get()); + format_case["max_relative_norm2"] = max_relative_norm2; } IterationControl ic{timer}; // warm run for (auto _ : ic.warmup_run()) { - auto x_clone = clone(x); + auto x_clone = clone(state.x); exec->synchronize(); - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); exec->synchronize(); } // tuning run #ifdef GINKGO_BENCHMARK_ENABLE_TUNING auto& format_case = spmv_case[format_name]; - if (!format_case.HasMember("tuning")) { - format_case.AddMember( - "tuning", rapidjson::Value(rapidjson::kObjectType), allocator); - } + format_case["tuning"] = json::object(); auto& tuning_case = format_case["tuning"]; - add_or_set_member(tuning_case, "time", - rapidjson::Value(rapidjson::kArrayType), allocator); - add_or_set_member(tuning_case, "values", - rapidjson::Value(rapidjson::kArrayType), allocator); + tuning_case["time"] = json::array(); + tuning_case["values"] = json::array(); // Enable tuning for this portion of code gko::_tuning_flag = true; @@ -112,13 +176,13 @@ void apply_spmv(const char* format_name, std::shared_ptr exec, gko::_tuned_value = val; auto tuning_timer = get_timer(exec, FLAGS_gpu_timer); IterationControl ic_tuning{tuning_timer}; - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto _ : ic_tuning.run()) { - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); } - tuning_case["time"].PushBack( - ic_tuning.compute_time(FLAGS_timer_method), allocator); - tuning_case["values"].PushBack(val, allocator); + tuning_case["time"].push_back( + ic_tuning.compute_time(FLAGS_timer_method)); + tuning_case["values"].push_back(val); } // We put back the flag to false to use the default (non-tuned) values // for the following @@ -126,150 +190,41 @@ void apply_spmv(const char* format_name, std::shared_ptr exec, #endif // GINKGO_BENCHMARK_ENABLE_TUNING // timed run - auto x_clone = clone(x); + auto x_clone = clone(state.x); for (auto _ : ic.run()) { - system_matrix->apply(b, x_clone); + system_matrix->apply(state.b, x_clone); } - add_or_set_member(spmv_case[format_name], "time", - ic.compute_time(FLAGS_timer_method), allocator); - add_or_set_member(spmv_case[format_name], "repetitions", - ic.get_num_repetitions(), allocator); - - // compute and write benchmark data - add_or_set_member(spmv_case[format_name], "completed", true, allocator); - } catch (const std::exception& e) { - add_or_set_member(test_case["spmv"][format_name], "completed", false, - allocator); - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case["spmv"][format_name], "error", - msg_value, allocator); - } - std::cerr << "Error when processing test case\n" - << test_case << "\n" - << "what(): " << e.what() << std::endl; - } -} - - -template -void run_spmv_benchmark(std::shared_ptr exec, - rapidjson::Document& test_cases, - const std::vector formats, - const SystemGenerator& system_generator, - std::shared_ptr timer, bool do_print) -{ - auto& allocator = test_cases.GetAllocator(); - auto profiler_hook = create_profiler_hook(exec); - if (profiler_hook) { - exec->add_logger(profiler_hook); + format_case["time"] = ic.compute_time(FLAGS_timer_method); + format_case["repetitions"] = ic.get_num_repetitions(); } - auto annotate = - [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { - if (profiler_hook) { - return profiler_hook->user_range(name); + + void postprocess(json& test_case) const override + { + if (!test_case.contains("optimal")) { + test_case["optimal"] = json::object(); } - return {}; - }; - - for (auto& test_case : test_cases.GetArray()) { - try { - // set up benchmark - system_generator.validate_options(test_case); - if (!test_case.HasMember("spmv")) { - test_case.AddMember("spmv", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - auto& spmv_case = test_case["spmv"]; - if (!FLAGS_overwrite && - all_of(begin(formats), end(formats), - [&spmv_case](const std::string& s) { - return spmv_case.HasMember(s.c_str()); - })) { + auto best_time = std::numeric_limits::max(); + std::string best_format; + // find the fastest among all formats we tested + for (const auto& format : formats) { + if (!test_case[name].contains(format)) { continue; } - if (do_print) { - std::clog << "Running test case\n" << test_case << std::endl; - } - // annotate the test case - // This string needs to outlive `test_case_range` to make sure we - // don't use its const char* c_str() after it was freed. - auto test_case_str = system_generator.describe_config(test_case); - auto test_case_range = annotate(test_case_str.c_str()); - - auto data = system_generator.generate_matrix_data(test_case); - - auto nrhs = FLAGS_nrhs; - auto b = system_generator.create_multi_vector_random( - exec, gko::dim<2>{data.size[1], nrhs}); - auto x = system_generator.create_multi_vector_random( - exec, gko::dim<2>{data.size[0], nrhs}); - if (do_print) { - std::clog << "Matrix is of size (" << data.size[0] << ", " - << data.size[1] << ")" << std::endl; - } - add_or_set_member(test_case, "size", data.size[0], allocator); - add_or_set_member(test_case, "nnz", data.nonzeros.size(), - allocator); - auto best_performance = std::numeric_limits::max(); - if (!test_case.HasMember("optimal")) { - test_case.AddMember("optimal", - rapidjson::Value(rapidjson::kObjectType), - allocator); - } - - // Compute the result from ginkgo::coo as the correct answer - auto answer = gko::clone(x); - if (FLAGS_detailed) { - auto system_matrix = - system_generator.generate_matrix_with_default_format(exec, - data); - exec->synchronize(); - system_matrix->apply(b, answer); - exec->synchronize(); - } - for (const auto& format_name : formats) { - { - auto format_range = annotate(format_name.c_str()); - apply_spmv(format_name.c_str(), exec, system_generator, - timer, data, b.get(), x.get(), answer.get(), - test_case, allocator); - } - if (do_print) { - std::clog << "Current state:" << std::endl - << test_cases << std::endl; - } - if (spmv_case[format_name.c_str()]["completed"].GetBool()) { - auto performance = - spmv_case[format_name.c_str()]["time"].GetDouble(); - if (performance < best_performance) { - best_performance = performance; - add_or_set_member( - test_case["optimal"], "spmv", - rapidjson::Value(format_name.c_str(), allocator) - .Move(), - allocator); - } - } - if (do_print) { - backup_results(test_cases); + auto& format_case = test_case[name][format]; + if (format_case.contains("completed") && + format_case["completed"].get()) { + auto time = format_case["time"]; + if (time < best_time) { + best_time = time; + best_format = format; } } - } catch (const std::exception& e) { - std::cerr << "Error setting up matrix data, what(): " << e.what() - << std::endl; - if (FLAGS_keep_errors) { - rapidjson::Value msg_value; - msg_value.SetString(e.what(), allocator); - add_or_set_member(test_case, "error", msg_value, allocator); - } + } + if (!best_format.empty()) { + test_case["optimal"][name] = best_format; } } - if (profiler_hook) { - exec->remove_logger(profiler_hook); - } -} +}; + #endif // GINKGO_BENCHMARK_SPMV_SPMV_COMMON_HPP diff --git a/benchmark/test/reference/blas.profile.stderr b/benchmark/test/reference/blas.profile.stderr index 1fb7d5b93bc..6018c7425b4 100644 --- a/benchmark/test/reference/blas.profile.stderr +++ b/benchmark/test/reference/blas.profile.stderr @@ -3,12 +3,14 @@ This is Ginkgo 1.5.0 (develop) Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 -The operations are copy,axpy,scalRunning test case +The operations are copy,axpy,scal +Running test case { "n": 100, "blas": {} } DEBUG: begin n = 100 + Running blas: copy DEBUG: begin copy DEBUG: begin allocate DEBUG: end allocate @@ -23,21 +25,7 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end copy -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] + Running blas: axpy DEBUG: begin axpy DEBUG: begin allocate DEBUG: end allocate @@ -60,28 +48,7 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end axpy -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] + Running blas: scal DEBUG: begin scal DEBUG: begin allocate DEBUG: end allocate @@ -98,33 +65,4 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end scal -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - }, - "scal": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "completed": true - } - } - } -] DEBUG: end n = 100 diff --git a/benchmark/test/reference/blas.simple.stderr b/benchmark/test/reference/blas.simple.stderr index e9b186e1353..89deadd081c 100644 --- a/benchmark/test/reference/blas.simple.stderr +++ b/benchmark/test/reference/blas.simple.stderr @@ -3,74 +3,12 @@ This is Ginkgo 1.5.0 (develop) Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 -The operations are copy,axpy,scalRunning test case +The operations are copy,axpy,scal +Running test case { "n": 100, "blas": {} } -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "scal": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/distributed_solver.profile.stderr b/benchmark/test/reference/distributed_solver.profile.stderr index 64b09a754c3..2f2dcc5a86a 100644 --- a/benchmark/test/reference/distributed_solver.profile.stderr +++ b/benchmark/test/reference/distributed_solver.profile.stderr @@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -DEBUG: begin stencil(100,7pt,stencil) Running test case { "size": 100, @@ -213,9 +212,9 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy(gko::matrix::Dense,gko::matrix::Dense) Matrix is of size (125, 125) -DEBUG: begin cg +DEBUG: begin stencil(100,7pt,stencil) Running solver: cg -DEBUG: begin none +DEBUG: begin cg DEBUG: begin allocate DEBUG: end allocate DEBUG: begin dense::compute_squared_norm2 @@ -1826,8 +1825,8 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end none DEBUG: end cg +DEBUG: end stencil(100,7pt,stencil) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -1842,4 +1841,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt,stencil) diff --git a/benchmark/test/reference/distributed_solver.profile.stdout b/benchmark/test/reference/distributed_solver.profile.stdout index 16dc6741930..e47be8c0e0e 100644 --- a/benchmark/test/reference/distributed_solver.profile.stdout +++ b/benchmark/test/reference/distributed_solver.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": { @@ -59,6 +59,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/distributed_solver.simple.stdout b/benchmark/test/reference/distributed_solver.simple.stdout index 96ef102f8b8..d08c9ce99d8 100644 --- a/benchmark/test/reference/distributed_solver.simple.stdout +++ b/benchmark/test/reference/distributed_solver.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "optimal": { @@ -60,6 +60,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/matrix_statistics.simple.stderr b/benchmark/test/reference/matrix_statistics.simple.stderr index e77cd5d413a..ff7229aa1c2 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stderr +++ b/benchmark/test/reference/matrix_statistics.simple.stderr @@ -6,4 +6,4 @@ Running test case "stencil": "7pt", "problem": {} } -Matrix is of size (125, 125) +Matrix is of size (125, 125), 725 diff --git a/benchmark/test/reference/matrix_statistics.simple.stdout b/benchmark/test/reference/matrix_statistics.simple.stdout index 4470784e7c5..13746ce8a46 100644 --- a/benchmark/test/reference/matrix_statistics.simple.stdout +++ b/benchmark/test/reference/matrix_statistics.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "problem": { "rows": 125, @@ -33,6 +33,9 @@ "hyperskewness": -1.741577812922432, "hyperflatness": 7.762345679012379 } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/multi_vector_distributed.profile.stderr b/benchmark/test/reference/multi_vector_distributed.profile.stderr index e69de29bb2d..cb52bc67f7d 100644 --- a/benchmark/test/reference/multi_vector_distributed.profile.stderr +++ b/benchmark/test/reference/multi_vector_distributed.profile.stderr @@ -0,0 +1,258 @@ +This is Ginkgo 1.5.0 (develop) + running with core module 1.5.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The operations are copy,axpy,scal +Running test case +{ + "n": 100, + "blas": {} +} +DEBUG: begin n = 100 + Running blas: copy +DEBUG: begin copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end copy + Running blas: axpy +DEBUG: begin axpy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::add_scaled +DEBUG: end dense::add_scaled +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end axpy + Running blas: scal +DEBUG: begin scal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::scale +DEBUG: end dense::scale +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end scal +DEBUG: end n = 100 diff --git a/benchmark/test/reference/multi_vector_distributed.simple.stderr b/benchmark/test/reference/multi_vector_distributed.simple.stderr index 23f3554e9c4..89deadd081c 100644 --- a/benchmark/test/reference/multi_vector_distributed.simple.stderr +++ b/benchmark/test/reference/multi_vector_distributed.simple.stderr @@ -3,84 +3,12 @@ This is Ginkgo 1.5.0 (develop) Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 -The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop) - running with core module 1.5.0 (develop) -Running on reference(0) -Running with 2 warm iterations and 10 running iterations -The random seed for right hand sides is 42 -The operations are copy,axpy,scalThis is Ginkgo 1.5.0 (develop) - running with core module 1.5.0 (develop) -Running on reference(0) -Running with 2 warm iterations and 10 running iterations -The random seed for right hand sides is 42 -The operations are copy,axpy,scalRunning test case +The operations are copy,axpy,scal +Running test case { "n": 100, "blas": {} } -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] -Current state: -[ - { - "n": 100, - "blas": { - "copy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "axpy": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - }, - "scal": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "completed": true - } - } - } -] + Running blas: copy + Running blas: axpy + Running blas: scal diff --git a/benchmark/test/reference/preconditioner.profile.stderr b/benchmark/test/reference/preconditioner.profile.stderr index 97341459e69..18f26eb00f7 100644 --- a/benchmark/test/reference/preconditioner.profile.stderr +++ b/benchmark/test/reference/preconditioner.profile.stderr @@ -10,7 +10,6 @@ Running test case "stencil": "7pt", "preconditioner": {} } -DEBUG: begin stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin components::fill_array @@ -59,7 +58,9 @@ DEBUG: begin dense::fill DEBUG: end dense::fill DEBUG: begin dense::fill_in_matrix_data DEBUG: end dense::fill_in_matrix_data -Matrix is of size (125, 125) +Matrix is of size (125, 125), 725 +DEBUG: begin stencil(100,7pt) + Running preconditioner: none DEBUG: begin none DEBUG: begin copy(gko::matrix::Dense,gko::matrix::Dense) DEBUG: begin allocate @@ -94,36 +95,7 @@ DEBUG: end apply(gko::matrix::Identity) DEBUG: begin free DEBUG: end free DEBUG: end none -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "preconditioner": { - "none": { - "generate": { - "components": { - "generate(gko::matrix::IdentityFactory)": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 1 - }, - "apply": { - "components": { - "apply(gko::matrix::Identity)": 1.0, - "copy(gko::matrix::Dense,gko::matrix::Dense)": 1.0, - "dense::copy": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 1 - }, - "completed": true - } - } - } -] +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -134,4 +106,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/preconditioner.profile.stdout b/benchmark/test/reference/preconditioner.profile.stdout index c775fd61285..135168e2c0e 100644 --- a/benchmark/test/reference/preconditioner.profile.stdout +++ b/benchmark/test/reference/preconditioner.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "preconditioner": { "none": { @@ -25,6 +25,9 @@ }, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/preconditioner.simple.stderr b/benchmark/test/reference/preconditioner.simple.stderr index 4a7ee9498d5..b24661cb0f7 100644 --- a/benchmark/test/reference/preconditioner.simple.stderr +++ b/benchmark/test/reference/preconditioner.simple.stderr @@ -10,34 +10,5 @@ Running test case "stencil": "7pt", "preconditioner": {} } -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "preconditioner": { - "none": { - "generate": { - "components": { - "generate(gko::matrix::IdentityFactory)": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "apply": { - "components": { - "apply(gko::matrix::Identity)": 1.0, - "copy(gko::matrix::Dense,gko::matrix::Dense)": 1.0, - "dense::copy": 1.0, - "overhead": 1.0 - }, - "time": 1.0, - "repetitions": 10 - }, - "completed": true - } - } - } -] +Matrix is of size (125, 125), 725 + Running preconditioner: none diff --git a/benchmark/test/reference/preconditioner.simple.stdout b/benchmark/test/reference/preconditioner.simple.stdout index 84100628d73..7a2d400e3aa 100644 --- a/benchmark/test/reference/preconditioner.simple.stdout +++ b/benchmark/test/reference/preconditioner.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "preconditioner": { "none": { @@ -25,6 +25,9 @@ }, "completed": true } - } + }, + "rows": 125, + "cols": 125, + "nonzeros": 725 } ] diff --git a/benchmark/test/reference/solver.profile.stderr b/benchmark/test/reference/solver.profile.stderr index e50ab7f27b3..6faa5cec620 100644 --- a/benchmark/test/reference/solver.profile.stderr +++ b/benchmark/test/reference/solver.profile.stderr @@ -5,7 +5,6 @@ Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 Running cg with 1000 iterations and residual goal of 1.000000e-06 The number of right hand sides is 1 -DEBUG: begin stencil(100,7pt) Running test case { "size": 100, @@ -62,9 +61,9 @@ DEBUG: begin dense::copy DEBUG: end dense::copy DEBUG: end copy(gko::matrix::Dense,gko::matrix::Dense) Matrix is of size (125, 125) -DEBUG: begin cg +DEBUG: begin stencil(100,7pt) Running solver: cg -DEBUG: begin none +DEBUG: begin cg DEBUG: begin allocate DEBUG: end allocate DEBUG: begin dense::compute_norm2_dispatch @@ -1321,8 +1320,8 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end none DEBUG: end cg +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free @@ -1333,4 +1332,3 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/solver.profile.stdout b/benchmark/test/reference/solver.profile.stdout index a61b432ca0d..b44cea7ddf4 100644 --- a/benchmark/test/reference/solver.profile.stdout +++ b/benchmark/test/reference/solver.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "optimal": { "spmv": "csr" @@ -54,6 +54,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/solver.simple.stdout b/benchmark/test/reference/solver.simple.stdout index 2e44c73fdfa..505035ab9c4 100644 --- a/benchmark/test/reference/solver.simple.stdout +++ b/benchmark/test/reference/solver.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "optimal": { "spmv": "csr" @@ -55,6 +55,8 @@ "repetitions": 1, "completed": true } - } + }, + "rows": 125, + "cols": 125 } ] diff --git a/benchmark/test/reference/sparse_blas.profile.stderr b/benchmark/test/reference/sparse_blas.profile.stderr index 02dfdfdacfd..ce5076db7cb 100644 --- a/benchmark/test/reference/sparse_blas.profile.stderr +++ b/benchmark/test/reference/sparse_blas.profile.stderr @@ -3,13 +3,13 @@ This is Ginkgo 1.5.0 (develop) Running on reference(0) Running with 0 warm iterations and 1 running iterations The random seed for right hand sides is 42 -The operations are transposeRunning test case +The operations are transpose +Running test case { "size": 100, "stencil": "7pt", "sparse_blas": {} } -DEBUG: begin stencil(100,7pt) Matrix is of size (125, 125), 725 DEBUG: begin allocate DEBUG: end allocate @@ -35,6 +35,8 @@ DEBUG: begin components::convert_idxs_to_ptrs DEBUG: end components::convert_idxs_to_ptrs DEBUG: begin free DEBUG: end free +DEBUG: begin stencil(100,7pt) + Running sparse_blas: transpose DEBUG: begin transpose DEBUG: begin allocate DEBUG: end allocate @@ -69,36 +71,10 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end transpose -Current state: -[ - { - "size": 100, - "stencil": "7pt", - "sparse_blas": { - "transpose": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 1, - "components": { - "allocate": 1.0, - "components::fill_array": 1.0, - "csr::transpose": 1.0, - "free": 1.0, - "overhead": 1.0 - }, - "completed": true - } - }, - "rows": 125, - "cols": 125, - "nonzeros": 725 - } -] +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/sparse_blas.simple.stderr b/benchmark/test/reference/sparse_blas.simple.stderr index a813994e739..b90f45cc585 100644 --- a/benchmark/test/reference/sparse_blas.simple.stderr +++ b/benchmark/test/reference/sparse_blas.simple.stderr @@ -3,36 +3,12 @@ This is Ginkgo 1.5.0 (develop) Running on reference(0) Running with 2 warm iterations and 10 running iterations The random seed for right hand sides is 42 -The operations are transposeRunning test case +The operations are transpose +Running test case { "size": 100, "stencil": "7pt", "sparse_blas": {} } Matrix is of size (125, 125), 725 -Current state: -[ - { - "size": 100, - "stencil": "7pt", - "sparse_blas": { - "transpose": { - "time": 1.0, - "flops": 1.0, - "bandwidth": 1.0, - "repetitions": 10, - "components": { - "allocate": 1.0, - "components::fill_array": 1.0, - "csr::transpose": 1.0, - "free": 1.0, - "overhead": 1.0 - }, - "completed": true - } - }, - "rows": 125, - "cols": 125, - "nonzeros": 725 - } -] + Running sparse_blas: transpose diff --git a/benchmark/test/reference/spmv.profile.stderr b/benchmark/test/reference/spmv.profile.stderr index 3ddabd987ad..469ae69c548 100644 --- a/benchmark/test/reference/spmv.profile.stderr +++ b/benchmark/test/reference/spmv.profile.stderr @@ -11,7 +11,6 @@ Running test case "stencil": "7pt", "spmv": {} } -DEBUG: begin stencil(100,7pt) DEBUG: begin allocate DEBUG: end allocate DEBUG: begin allocate @@ -52,7 +51,7 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -Matrix is of size (125, 125) +Matrix is of size (125, 125), 725 DEBUG: begin copy(gko::matrix::Dense,gko::matrix::Dense) DEBUG: begin allocate DEBUG: end allocate @@ -77,6 +76,8 @@ DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free +DEBUG: begin stencil(100,7pt) + Running spmv: coo DEBUG: begin coo DEBUG: begin allocate DEBUG: end allocate @@ -151,28 +152,10 @@ DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: end coo -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "spmv": { - "coo": { - "storage": 11600, - "max_relative_norm2": 1.0, - "time": 1.0, - "repetitions": 1, - "completed": true - } - }, - "nnz": 725, - "optimal": {} - } -] +DEBUG: end stencil(100,7pt) DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free DEBUG: begin free DEBUG: end free -DEBUG: end stencil(100,7pt) diff --git a/benchmark/test/reference/spmv.profile.stdout b/benchmark/test/reference/spmv.profile.stdout index ec7309613b6..fc152bcc12a 100644 --- a/benchmark/test/reference/spmv.profile.stdout +++ b/benchmark/test/reference/spmv.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "spmv": { "coo": { @@ -12,7 +12,9 @@ "completed": true } }, - "nnz": 725, + "rows": 125, + "cols": 125, + "nonzeros": 725, "optimal": { "spmv": "coo" } diff --git a/benchmark/test/reference/spmv.simple.stderr b/benchmark/test/reference/spmv.simple.stderr index 8a2ebe9fe15..813763c0e7a 100644 --- a/benchmark/test/reference/spmv.simple.stderr +++ b/benchmark/test/reference/spmv.simple.stderr @@ -11,22 +11,5 @@ Running test case "stencil": "7pt", "spmv": {} } -Matrix is of size (125, 125) -Current state: -[ - { - "size": 125, - "stencil": "7pt", - "spmv": { - "coo": { - "storage": 11600, - "max_relative_norm2": 1.0, - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "nnz": 725, - "optimal": {} - } -] +Matrix is of size (125, 125), 725 + Running spmv: coo diff --git a/benchmark/test/reference/spmv.simple.stdout b/benchmark/test/reference/spmv.simple.stdout index 90f8903a452..737938d7c96 100644 --- a/benchmark/test/reference/spmv.simple.stdout +++ b/benchmark/test/reference/spmv.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 125, + "size": 100, "stencil": "7pt", "spmv": { "coo": { @@ -12,7 +12,9 @@ "completed": true } }, - "nnz": 725, + "rows": 125, + "cols": 125, + "nonzeros": 725, "optimal": { "spmv": "coo" } diff --git a/benchmark/test/reference/spmv_distributed.profile.stderr b/benchmark/test/reference/spmv_distributed.profile.stderr index e69de29bb2d..6d7f8e82254 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stderr +++ b/benchmark/test/reference/spmv_distributed.profile.stderr @@ -0,0 +1,542 @@ +This is Ginkgo 1.5.0 (develop) + running with core module 1.5.0 (develop) +Running on reference(0) +Running with 0 warm iterations and 1 running iterations +The random seed for right hand sides is 42 +The formats are [csr]x[csr] +The number of right hand sides is 1 +Running test case +{ + "size": 100, + "stencil": "7pt", + "comm_pattern": "stencil", + "spmv": {} +} +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin dense::fill_in_matrix_data +DEBUG: end dense::fill_in_matrix_data +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +Matrix is of size (81, 81), 135 +DEBUG: begin copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy(gko::matrix::Coo,gko::matrix::Coo) +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy(gko::matrix::Coo,gko::matrix::Coo) +DEBUG: begin copy(gko::matrix::Coo,gko::matrix::Coo) +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy(gko::matrix::Coo,gko::matrix::Coo) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin distributed_matrix::build_local_nonlocal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: end distributed_matrix::build_local_nonlocal +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin apply(gko::experimental::distributed::Matrix) +DEBUG: begin dense::row_gather +DEBUG: end dense::row_gather +DEBUG: begin apply(gko::matrix::Coo) +DEBUG: begin coo::spmv +DEBUG: end coo::spmv +DEBUG: end apply(gko::matrix::Coo) +DEBUG: begin advanced_apply(gko::matrix::Coo) +DEBUG: begin coo::advanced_spmv +DEBUG: end coo::advanced_spmv +DEBUG: end advanced_apply(gko::matrix::Coo) +DEBUG: end apply(gko::experimental::distributed::Matrix) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin stencil(100,7pt,stencil) + Running spmv: csr-csr +DEBUG: begin csr-csr +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin partition::build_ranges_from_global_size +DEBUG: end partition::build_ranges_from_global_size +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin partition::build_from_contiguous +DEBUG: end partition::build_from_contiguous +DEBUG: begin partition::build_starting_indices +DEBUG: end partition::build_starting_indices +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy(gko::matrix::Csr,gko::matrix::Csr) +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy(gko::matrix::Csr,gko::matrix::Csr) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::fill_array +DEBUG: end components::fill_array +DEBUG: begin copy(gko::matrix::Csr,gko::matrix::Csr) +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: end copy(gko::matrix::Csr,gko::matrix::Csr) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::fill +DEBUG: end dense::fill +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin components::aos_to_soa +DEBUG: end components::aos_to_soa +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin distributed_matrix::build_local_nonlocal +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: end distributed_matrix::build_local_nonlocal +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin free +DEBUG: end free +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin free +DEBUG: end free +DEBUG: begin components::convert_idxs_to_ptrs +DEBUG: end components::convert_idxs_to_ptrs +DEBUG: begin copy +DEBUG: end copy +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin apply(gko::experimental::distributed::Matrix) +DEBUG: begin dense::row_gather +DEBUG: end dense::row_gather +DEBUG: begin apply(gko::matrix::Csr) +DEBUG: begin csr::spmv +DEBUG: end csr::spmv +DEBUG: end apply(gko::matrix::Csr) +DEBUG: begin advanced_apply(gko::matrix::Csr) +DEBUG: begin csr::advanced_spmv +DEBUG: end csr::advanced_spmv +DEBUG: end advanced_apply(gko::matrix::Csr) +DEBUG: end apply(gko::experimental::distributed::Matrix) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::compute_squared_norm2 +DEBUG: end dense::compute_squared_norm2 +DEBUG: begin dense::compute_sqrt +DEBUG: end dense::compute_sqrt +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::add_scaled +DEBUG: end dense::add_scaled +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::compute_squared_norm2 +DEBUG: end dense::compute_squared_norm2 +DEBUG: begin dense::compute_sqrt +DEBUG: end dense::compute_sqrt +DEBUG: begin copy(gko::matrix::Dense,gko::matrix::Dense) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy(gko::matrix::Dense,gko::matrix::Dense) +DEBUG: begin copy(gko::matrix::Dense,gko::matrix::Dense) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy(gko::matrix::Dense,gko::matrix::Dense) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin allocate +DEBUG: end allocate +DEBUG: begin dense::copy +DEBUG: end dense::copy +DEBUG: end copy(gko::experimental::distributed::Vector,gko::experimental::distributed::Vector) +DEBUG: begin apply(gko::experimental::distributed::Matrix) +DEBUG: begin dense::row_gather +DEBUG: end dense::row_gather +DEBUG: begin apply(gko::matrix::Csr) +DEBUG: begin csr::spmv +DEBUG: end csr::spmv +DEBUG: end apply(gko::matrix::Csr) +DEBUG: begin advanced_apply(gko::matrix::Csr) +DEBUG: begin csr::advanced_spmv +DEBUG: end csr::advanced_spmv +DEBUG: end advanced_apply(gko::matrix::Csr) +DEBUG: end apply(gko::experimental::distributed::Matrix) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: end csr-csr +DEBUG: end stencil(100,7pt,stencil) +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free +DEBUG: begin free +DEBUG: end free diff --git a/benchmark/test/reference/spmv_distributed.profile.stdout b/benchmark/test/reference/spmv_distributed.profile.stdout index 2aeeeb5b0d5..fd5d3d28882 100644 --- a/benchmark/test/reference/spmv_distributed.profile.stdout +++ b/benchmark/test/reference/spmv_distributed.profile.stdout @@ -1,7 +1,7 @@ [ { - "size": 81, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "spmv": { @@ -13,7 +13,9 @@ "completed": true } }, - "nnz": 135, + "rows": 81, + "cols": 81, + "nonzeros": 135, "optimal": { "spmv": "csr-csr" } diff --git a/benchmark/test/reference/spmv_distributed.simple.stderr b/benchmark/test/reference/spmv_distributed.simple.stderr index 57f31d44686..a51e4954923 100644 --- a/benchmark/test/reference/spmv_distributed.simple.stderr +++ b/benchmark/test/reference/spmv_distributed.simple.stderr @@ -12,23 +12,5 @@ Running test case "comm_pattern": "stencil", "spmv": {} } -Matrix is of size (81, 81) -Current state: -[ - { - "size": 81, - "stencil": "7pt", - "comm_pattern": "stencil", - "spmv": { - "csr-csr": { - "storage": 2316, - "max_relative_norm2": 1.0, - "time": 1.0, - "repetitions": 10, - "completed": true - } - }, - "nnz": 135, - "optimal": {} - } -] +Matrix is of size (81, 81), 135 + Running spmv: csr-csr diff --git a/benchmark/test/reference/spmv_distributed.simple.stdout b/benchmark/test/reference/spmv_distributed.simple.stdout index d8cd32ba834..48db1ac2592 100644 --- a/benchmark/test/reference/spmv_distributed.simple.stdout +++ b/benchmark/test/reference/spmv_distributed.simple.stdout @@ -1,7 +1,7 @@ [ { - "size": 81, + "size": 100, "stencil": "7pt", "comm_pattern": "stencil", "spmv": { @@ -13,7 +13,9 @@ "completed": true } }, - "nnz": 135, + "rows": 81, + "cols": 81, + "nonzeros": 135, "optimal": { "spmv": "csr-csr" } diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index 182ddfb4c31..ba221964031 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -41,6 +41,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -52,10 +53,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#include -#include -#include -#include #include "benchmark/utils/json.hpp" @@ -92,10 +89,6 @@ DEFINE_string( DEFINE_bool(detailed, true, "If set, performs several runs to obtain more detailed results"); -DEFINE_bool(keep_errors, true, - "If set, writes exception messages during the execution into the " - "JSON output"); - DEFINE_bool(nested_names, false, "If set, separately logs nested operations"); DEFINE_bool(profile, false, @@ -104,8 +97,8 @@ DEFINE_bool(profile, false, DEFINE_string( profiler_hook, "none", - "Which profiler annotation mode to use, if any. Options are nvtx, roctx, " - "vtune, tau, debug, auto (choose based on executor)."); + "Which profiler annotation mode to use, if any. Options are " + "none, nvtx, roctx, vtune, tau, debug, auto (choose based on executor)."); DEFINE_uint32(seed, 42, "Seed used for the random number generator"); @@ -145,27 +138,32 @@ DEFINE_double( * @param format the format of the benchmark input data */ void initialize_argument_parsing(int* argc, char** argv[], std::string& header, - std::string& format) + std::string& format, bool do_print = true) { - std::ostringstream doc; - doc << header << "Usage: " << (*argv)[0] << " [options]\n" - << format - << " The results are written on standard output, in the same " - "format,\n" - << " but with test cases extended to include an additional member " - "\n" - << " object for each benchmark run.\n" - << " If run with a --backup flag, an intermediate result is " - "written \n" - << " to a file in the same format. The backup file can be used as " - "\n" - << " input to this test suite, and the benchmarking will \n" - << " continue from the point where the backup file was created."; - - gflags::SetUsageMessage(doc.str()); - std::ostringstream ver; - ver << gko::version_info::get(); - gflags::SetVersionString(ver.str()); + if (do_print) { + std::ostringstream doc; + doc << header << "Usage: " << (*argv)[0] << " [options]\n" + << format + << " The results are written on standard output, in the same " + "format,\n" + << " but with test cases extended to include an additional member " + "\n" + << " object for each benchmark run.\n" + << " If run with a --backup flag, an intermediate result is " + "written \n" + << " to a file in the same format. The backup file can be used as " + "\n" + << " input to this test suite, and the benchmarking will \n" + << " continue from the point where the backup file was created."; + + gflags::SetUsageMessage(doc.str()); + std::ostringstream ver; + ver << gko::version_info::get(); + gflags::SetVersionString(ver.str()); + } else { + gflags::SetUsageMessage(""); + gflags::SetVersionString(""); + } gflags::ParseCommandLineFlags(argc, argv, true); if (FLAGS_profile) { FLAGS_repetitions = "1"; @@ -202,31 +200,6 @@ void print_general_information(const std::string& extra) } -std::shared_ptr create_profiler_hook( - std::shared_ptr exec) -{ - using gko::log::ProfilerHook; - std::map()>> - hook_map{ - {"none", [] { return std::shared_ptr{}; }}, - {"auto", [&] { return ProfilerHook::create_for_executor(exec); }}, - {"nvtx", [] { return ProfilerHook::create_nvtx(); }}, - {"roctx", [] { return ProfilerHook::create_roctx(); }}, - {"tau", [] { return ProfilerHook::create_tau(); }}, - {"vtune", [] { return ProfilerHook::create_vtune(); }}, - {"debug", [] { - return ProfilerHook::create_custom( - [](const char* name, gko::log::profile_event_category) { - std::clog << "DEBUG: begin " << name << '\n'; - }, - [](const char* name, gko::log::profile_event_category) { - std::clog << "DEBUG: end " << name << '\n'; - }); - }}}; - return hook_map.at(FLAGS_profiler_hook)(); -} - - // Returns a random number engine std::default_random_engine& get_engine() { @@ -269,7 +242,7 @@ std::istream& get_input_stream() // backup generation -void backup_results(rapidjson::Document& results) +void backup_results(json& results) { static int next = 0; static auto filenames = []() -> std::array { @@ -489,279 +462,4 @@ gko::remove_complex compute_max_relative_norm2( } -/** - * A class for controlling the number warmup and timed iterations. - * - * The behavior is determined by the following flags - * - 'repetitions' switch between fixed and adaptive number of iterations - * - 'warmup' warmup iterations, applies in fixed and adaptive case - * - 'min_repetitions' minimal number of repetitions (adaptive case) - * - 'max_repetitions' maximal number of repetitions (adaptive case) - * - 'min_runtime' minimal total runtime (adaptive case) - * - 'repetition_growth_factor' controls the increase between two successive - * timings - * - * Usage: - * `IterationControl` exposes the member functions: - * - `warmup_run()`: controls run defined by `warmup` flag - * - `run(bool)`: controls run defined by all other flags - * - `get_timer()`: access to underlying timer - * The first two methods return an object that is to be used in a range-based - * for loop: - * ``` - * IterationControl ic(get_timer(...)); - * - * // warmup run always uses fixed number of iteration and does not issue - * // timings - * for(auto status: ic.warmup_run()){ - * // execute benchmark - * } - * // run may use adaptive number of iterations (depending on cmd line flag) - * // and issues timing (unless manage_timings is false) - * for(auto status: ic.run(manage_timings [default is true])){ - * if(! manage_timings) ic.get_timer->tic(); - * // execute benchmark - * if(! manage_timings) ic.get_timer->toc(); - * } - * - * ``` - * At the beginning of both methods, the timer is reset. - * The `status` object exposes the member - * - `cur_it`, containing the current iteration number, - * and the methods - * - `is_finished`, checks if the benchmark is finished, - */ -class IterationControl { - using IndexType = unsigned int; //!< to be compatible with GFLAGS type - - class run_control; - -public: - /** - * Creates an `IterationControl` object. - * - * Uses the commandline flags to setup the stopping criteria for the - * warmup and timed run. - * - * @param timer the timer that is to be used for the timings - */ - explicit IterationControl(const std::shared_ptr& timer) - { - status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, - FLAGS_warmup, 0., 0}; - if (FLAGS_repetitions == "auto") { - status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions, - FLAGS_max_repetitions, FLAGS_min_runtime}; - } else { - const auto reps = - static_cast(std::stoi(FLAGS_repetitions)); - status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0}; - } - } - - IterationControl() = default; - IterationControl(const IterationControl&) = default; - IterationControl(IterationControl&&) = default; - - /** - * Creates iterable `run_control` object for the warmup run. - * - * This run uses always a fixed number of iterations. - */ - run_control warmup_run() - { - status_warmup_.cur_it = 0; - status_warmup_.managed_timer.clear(); - return run_control{&status_warmup_}; - } - - /** - * Creates iterable `run_control` object for the timed run. - * - * This run may be adaptive, depending on the commandline flags. - * - * @param manage_timings If true, the timer calls (`tic/toc`) are handled - * by the `run_control` object, otherwise they need to be executed outside - */ - run_control run(bool manage_timings = true) - { - status_run_.cur_it = 0; - status_run_.managed_timer.clear(); - status_run_.managed_timer.manage_timings = manage_timings; - return run_control{&status_run_}; - } - - std::shared_ptr get_timer() const - { - return status_run_.managed_timer.timer; - } - - /** - * Compute the time from the given statistical method - * - * @param method the statistical method. If the timer does not have the - * same iteration as the IterationControl, it can only use - * average from the IterationControl. - * - * @return the statistical time - */ - double compute_time(const std::string& method = "average") const - { - if (status_run_.managed_timer.timer->get_num_repetitions() == - this->get_num_repetitions()) { - return status_run_.managed_timer.compute_time(method); - } else { - assert(method == "average"); - return status_run_.managed_timer.get_total_time() / - this->get_num_repetitions(); - } - } - - IndexType get_num_repetitions() const { return status_run_.cur_it; } - -private: - struct TimerManager { - std::shared_ptr timer; - bool manage_timings = false; - - void tic() - { - if (manage_timings) { - timer->tic(); - } - } - void toc(unsigned int num = 1) - { - if (manage_timings) { - timer->toc(num); - } - } - - void clear() { timer->clear(); } - - double get_total_time() const { return timer->get_total_time(); } - - double compute_time(const std::string& method = "average") const - { - return timer->compute_time(method); - } - }; - - /** - * Stores stopping criteria of the adaptive benchmark run as well as the - * current iteration number. - */ - struct status { - TimerManager managed_timer{}; - - IndexType min_it = 0; - IndexType max_it = 0; - double max_runtime = 0.; - - IndexType cur_it = 0; - - /** - * checks if the adaptive run is complete - * - * the adaptive run is complete if: - * - the minimum number of iteration is reached - * - and either: - * - the maximum number of repetitions is reached - * - the total runtime is above the threshold - * - * @return completeness state of the adaptive run - */ - bool is_finished() const - { - return cur_it >= min_it && - (cur_it >= max_it || - managed_timer.get_total_time() >= max_runtime); - } - }; - - /** - * Iterable class managing the benchmark iteration. - * - * Has to be used in a range-based for loop. - */ - struct run_control { - struct iterator { - /** - * Increases the current iteration count and finishes timing if - * necessary. - * - * As `++it` is the last step of a for-loop, the managed_timer is - * stopped, if enough iterations have passed since the last timing. - * The interval between two timings is steadily increased to - * reduce the timing overhead. - */ - iterator operator++() - { - cur_info->cur_it++; - if (cur_info->cur_it >= next_timing && !stopped) { - cur_info->managed_timer.toc( - static_cast(cur_info->cur_it - start_timing)); - stopped = true; - next_timing = static_cast(std::ceil( - next_timing * FLAGS_repetition_growth_factor)); - // If repetition_growth_factor <= 1, next_timing will be - // next iteration. - if (next_timing <= cur_info->cur_it) { - next_timing = cur_info->cur_it + 1; - } - } - return *this; - } - - status operator*() const { return *cur_info; } - - /** - * Checks if the benchmark is finished and handles timing, if - * necessary. - * - * As `begin != end` is the first step in a for-loop, the - * managed_timer is started, if it was previously stopped. - * Additionally, if the benchmark is complete and the managed_timer - * is still running it is stopped. (This may occur if the maximal - * number of repetitions is surpassed) - * - * Uses only the information from the `status` object, i.e. - * the right hand side is ignored. - * - * @return true if benchmark is not finished, else false - */ - bool operator!=(const iterator&) - { - const bool is_finished = cur_info->is_finished(); - if (!is_finished && stopped) { - stopped = false; - cur_info->managed_timer.tic(); - start_timing = cur_info->cur_it; - } else if (is_finished && !stopped) { - cur_info->managed_timer.toc( - static_cast(cur_info->cur_it - start_timing)); - stopped = true; - } - return !is_finished; - } - - status* cur_info; - IndexType next_timing = 1; //!< next iteration to stop timing - IndexType start_timing = 0; //!< iteration for starting timing - bool stopped = true; - }; - - iterator begin() const { return iterator{info}; } - - // not used, could potentially be used in c++17 as a sentinel - iterator end() const { return iterator{}; } - - status* info; - }; - - status status_warmup_; - status status_run_; -}; - - #endif // GKO_BENCHMARK_UTILS_GENERAL_HPP_ diff --git a/benchmark/utils/generator.hpp b/benchmark/utils/generator.hpp index 5524ad19744..62c87e7a174 100644 --- a/benchmark/utils/generator.hpp +++ b/benchmark/utils/generator.hpp @@ -53,28 +53,45 @@ struct DefaultSystemGenerator { using Vec = vec; static gko::matrix_data generate_matrix_data( - rapidjson::Value& config) + const json& config) { - if (config.HasMember("filename")) { - std::ifstream in(config["filename"].GetString()); + if (config.contains("filename")) { + std::ifstream in(config["filename"].get()); return gko::read_generic_raw(in); - } else if (config.HasMember("stencil")) { + } else if (config.contains("stencil")) { return generate_stencil( - config["stencil"].GetString(), config["size"].GetInt64()); + config["stencil"].get(), + config["size"].get()); } else { throw std::runtime_error( "No known way to generate matrix data found."); } } - static std::string describe_config(rapidjson::Value& config) + static std::string get_example_config() { - if (config.HasMember("filename")) { - return config.GetString(); - } else if (config.HasMember("stencil")) { + return json:: + parse(R"([{"filename": "my_file.mtx"},{"filename": "my_file2.mtx"},{"size": 100, "stencil": "7pt"}])") + .dump(4); + } + + static bool validate_config(const json& test_case) + { + return ((test_case.contains("size") && test_case.contains("stencil") && + test_case["size"].is_number_integer() && + test_case["stencil"].is_string()) || + (test_case.contains("filename") && + test_case["filename"].is_string())); + } + + static std::string describe_config(const json& config) + { + if (config.contains("filename")) { + return config.get(); + } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].GetInt64() << "," - << config["stencil"].GetString() << ")"; + ss << "stencil(" << config["size"].get() << "," + << config["stencil"].get() << ")"; return ss.str(); } else { throw std::runtime_error("No known way to describe config."); @@ -82,30 +99,30 @@ struct DefaultSystemGenerator { } static std::shared_ptr generate_matrix_with_optimal_format( - std::shared_ptr exec, rapidjson::Value& config) + std::shared_ptr exec, json& config) { auto data = generate_matrix_data(config); return generate_matrix_with_format( - std::move(exec), config["optimal"]["spmv"].GetString(), data); + std::move(exec), config["optimal"]["spmv"].get(), + data); } static std::shared_ptr generate_matrix_with_format( std::shared_ptr exec, const std::string& format_name, const gko::matrix_data& data, - rapidjson::Value* spmv_case = nullptr, - rapidjson::MemoryPoolAllocator<>* allocator = nullptr) + json* spmv_case = nullptr) { auto storage_logger = std::make_shared(); - if (spmv_case && allocator) { + if (spmv_case) { exec->add_logger(storage_logger); } auto mtx = gko::share(::formats::matrix_factory(format_name, exec, data)); - if (spmv_case && allocator) { + if (spmv_case) { exec->remove_logger(storage_logger); - storage_logger->write_data(*spmv_case, *allocator); + storage_logger->write_data(*spmv_case); } return mtx; @@ -172,32 +189,51 @@ struct DistributedDefaultSystemGenerator { using Vec = dist_vec; gko::matrix_data generate_matrix_data( - rapidjson::Value& config) const + const json& config) const { - if (config.HasMember("filename")) { - std::ifstream in(config["filename"].GetString()); + if (config.contains("filename")) { + std::ifstream in(config["filename"].get()); return gko::read_generic_raw(in); - } else if (config.HasMember("stencil")) { + } else if (config.contains("stencil")) { auto local_size = static_cast( - config["size"].GetInt64() / comm.size()); + config["size"].get() / comm.size()); return generate_stencil( - config["stencil"].GetString(), comm, local_size, - config["comm_pattern"].GetString() == std::string("optimal")); + config["stencil"].get(), comm, local_size, + config["comm_pattern"].get() == + std::string("optimal")); } else { throw std::runtime_error( "No known way to generate matrix data found."); } } - std::string describe_config(rapidjson::Value& config) const + static std::string get_example_config() { - if (config.HasMember("filename")) { - return config.GetString(); - } else if (config.HasMember("stencil")) { + return json:: + parse(R"([{"size": 100, "stencil": "7pt", "comm_pattern": "stencil"}, {"filename": "my_file.mtx"}])") + .dump(4); + } + + static bool validate_config(const json& test_case) + { + return ((test_case.contains("size") && test_case.contains("stencil") && + test_case.contains("comm_pattern") && + test_case["size"].is_number_integer() && + test_case["stencil"].is_string() && + test_case["comm_pattern"].is_string()) || + (test_case.contains("filename") && + test_case["filename"].is_string())); + } + + static std::string describe_config(const json& config) + { + if (config.contains("filename")) { + return config.get(); + } else if (config.contains("stencil")) { std::stringstream ss; - ss << "stencil(" << config["size"].GetInt64() << "," - << config["stencil"].GetString() << "," - << config["comm_pattern"].GetString() << ")"; + ss << "stencil(" << config["size"].get() << "," + << config["stencil"].get() << "," + << config["comm_pattern"].get() << ")"; return ss.str(); } else { throw std::runtime_error("No known way to describe config."); @@ -205,29 +241,33 @@ struct DistributedDefaultSystemGenerator { } std::shared_ptr generate_matrix_with_optimal_format( - std::shared_ptr exec, rapidjson::Value& config) const + std::shared_ptr exec, json& config) const { auto data = generate_matrix_data(config); return generate_matrix_with_format( - std::move(exec), config["optimal"]["spmv"].GetString(), data); + std::move(exec), config["optimal"]["spmv"].get(), + data); } std::shared_ptr generate_matrix_with_format( std::shared_ptr exec, const std::string& format_name, const gko::matrix_data& data, - rapidjson::Value* spmv_case = nullptr, - rapidjson::MemoryPoolAllocator<>* allocator = nullptr) const + json* spmv_case = nullptr) const { auto part = gko::experimental::distributed:: Partition::build_from_global_size_uniform( exec, comm.size(), static_cast(data.size[0])); auto formats = split(format_name, '-'); + if (formats.size() != 2) { + throw std::runtime_error{"Invalid distributed format specifier " + + format_name}; + } auto local_mat = formats::matrix_type_factory.at(formats[0])(exec); auto non_local_mat = formats::matrix_type_factory.at(formats[1])(exec); auto storage_logger = std::make_shared(); - if (spmv_case && allocator) { + if (spmv_case) { exec->add_logger(storage_logger); } @@ -235,9 +275,9 @@ struct DistributedDefaultSystemGenerator { exec, comm, local_mat, non_local_mat); dist_mat->read_distributed(data, part); - if (spmv_case && allocator) { + if (spmv_case) { exec->remove_logger(storage_logger); - storage_logger->write_data(comm, *spmv_case, *allocator); + storage_logger->write_data(comm, *spmv_case); } return dist_mat; diff --git a/benchmark/utils/iteration_control.hpp b/benchmark/utils/iteration_control.hpp new file mode 100644 index 00000000000..295ae7870d6 --- /dev/null +++ b/benchmark/utils/iteration_control.hpp @@ -0,0 +1,326 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ +#define GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ + + +#include + + +#include +#include +#include + + +#include "benchmark/utils/general.hpp" +#include "benchmark/utils/timer.hpp" +#include "benchmark/utils/types.hpp" +#include "core/distributed/helpers.hpp" + + +/** + * A class for controlling the number warmup and timed iterations. + * + * The behavior is determined by the following flags + * - 'repetitions' switch between fixed and adaptive number of iterations + * - 'warmup' warmup iterations, applies in fixed and adaptive case + * - 'min_repetitions' minimal number of repetitions (adaptive case) + * - 'max_repetitions' maximal number of repetitions (adaptive case) + * - 'min_runtime' minimal total runtime (adaptive case) + * - 'repetition_growth_factor' controls the increase between two successive + * timings + * + * Usage: + * `IterationControl` exposes the member functions: + * - `warmup_run()`: controls run defined by `warmup` flag + * - `run(bool)`: controls run defined by all other flags + * - `get_timer()`: access to underlying timer + * The first two methods return an object that is to be used in a range-based + * for loop: + * ``` + * IterationControl ic(get_timer(...)); + * + * // warmup run always uses fixed number of iteration and does not issue + * // timings + * for(auto status: ic.warmup_run()){ + * // execute benchmark + * } + * // run may use adaptive number of iterations (depending on cmd line flag) + * // and issues timing (unless manage_timings is false) + * for(auto status: ic.run(manage_timings [default is true])){ + * if(! manage_timings) ic.get_timer->tic(); + * // execute benchmark + * if(! manage_timings) ic.get_timer->toc(); + * } + * + * ``` + * At the beginning of both methods, the timer is reset. + * The `status` object exposes the member + * - `cur_it`, containing the current iteration number, + * and the methods + * - `is_finished`, checks if the benchmark is finished, + */ +class IterationControl { + using IndexType = unsigned int; //!< to be compatible with GFLAGS type + + class run_control; + +public: + /** + * Creates an `IterationControl` object. + * + * Uses the commandline flags to setup the stopping criteria for the + * warmup and timed run. + * + * @param timer the timer that is to be used for the timings + */ + explicit IterationControl(const std::shared_ptr& timer) + { + status_warmup_ = {TimerManager{timer, false}, FLAGS_warmup, + FLAGS_warmup, 0., 0}; + if (FLAGS_repetitions == "auto") { + status_run_ = {TimerManager{timer, true}, FLAGS_min_repetitions, + FLAGS_max_repetitions, FLAGS_min_runtime}; + } else { + const auto reps = + static_cast(std::stoi(FLAGS_repetitions)); + status_run_ = {TimerManager{timer, true}, reps, reps, 0., 0}; + } + } + + IterationControl() = default; + IterationControl(const IterationControl&) = default; + IterationControl(IterationControl&&) = default; + + /** + * Creates iterable `run_control` object for the warmup run. + * + * This run uses always a fixed number of iterations. + */ + run_control warmup_run() + { + status_warmup_.cur_it = 0; + status_warmup_.managed_timer.clear(); + return run_control{&status_warmup_}; + } + + /** + * Creates iterable `run_control` object for the timed run. + * + * This run may be adaptive, depending on the commandline flags. + * + * @param manage_timings If true, the timer calls (`tic/toc`) are handled + * by the `run_control` object, otherwise they need to be executed outside + */ + run_control run(bool manage_timings = true) + { + status_run_.cur_it = 0; + status_run_.managed_timer.clear(); + status_run_.managed_timer.manage_timings = manage_timings; + return run_control{&status_run_}; + } + + std::shared_ptr get_timer() const + { + return status_run_.managed_timer.timer; + } + + /** + * Compute the time from the given statistical method + * + * @param method the statistical method. If the timer does not have the + * same iteration as the IterationControl, it can only use + * average from the IterationControl. + * + * @return the statistical time + */ + double compute_time(const std::string& method = "average") const + { + if (status_run_.managed_timer.timer->get_num_repetitions() == + this->get_num_repetitions()) { + return status_run_.managed_timer.compute_time(method); + } else { + assert(method == "average"); + return status_run_.managed_timer.get_total_time() / + this->get_num_repetitions(); + } + } + + IndexType get_num_repetitions() const { return status_run_.cur_it; } + +private: + struct TimerManager { + std::shared_ptr timer; + bool manage_timings = false; + + void tic() + { + if (manage_timings) { + timer->tic(); + } + } + void toc(unsigned int num = 1) + { + if (manage_timings) { + timer->toc(num); + } + } + + void clear() { timer->clear(); } + + double get_total_time() const { return timer->get_total_time(); } + + double compute_time(const std::string& method = "average") const + { + return timer->compute_time(method); + } + }; + + /** + * Stores stopping criteria of the adaptive benchmark run as well as the + * current iteration number. + */ + struct status { + TimerManager managed_timer{}; + + IndexType min_it = 0; + IndexType max_it = 0; + double max_runtime = 0.; + + IndexType cur_it = 0; + + /** + * checks if the adaptive run is complete + * + * the adaptive run is complete if: + * - the minimum number of iteration is reached + * - and either: + * - the maximum number of repetitions is reached + * - the total runtime is above the threshold + * + * @return completeness state of the adaptive run + */ + bool is_finished() const + { + return cur_it >= min_it && + (cur_it >= max_it || + managed_timer.get_total_time() >= max_runtime); + } + }; + + /** + * Iterable class managing the benchmark iteration. + * + * Has to be used in a range-based for loop. + */ + struct run_control { + struct iterator { + /** + * Increases the current iteration count and finishes timing if + * necessary. + * + * As `++it` is the last step of a for-loop, the managed_timer is + * stopped, if enough iterations have passed since the last timing. + * The interval between two timings is steadily increased to + * reduce the timing overhead. + */ + iterator operator++() + { + cur_info->cur_it++; + if (cur_info->cur_it >= next_timing && !stopped) { + cur_info->managed_timer.toc( + static_cast(cur_info->cur_it - start_timing)); + stopped = true; + next_timing = static_cast(std::ceil( + next_timing * FLAGS_repetition_growth_factor)); + // If repetition_growth_factor <= 1, next_timing will be + // next iteration. + if (next_timing <= cur_info->cur_it) { + next_timing = cur_info->cur_it + 1; + } + } + return *this; + } + + status operator*() const { return *cur_info; } + + /** + * Checks if the benchmark is finished and handles timing, if + * necessary. + * + * As `begin != end` is the first step in a for-loop, the + * managed_timer is started, if it was previously stopped. + * Additionally, if the benchmark is complete and the managed_timer + * is still running it is stopped. (This may occur if the maximal + * number of repetitions is surpassed) + * + * Uses only the information from the `status` object, i.e. + * the right hand side is ignored. + * + * @return true if benchmark is not finished, else false + */ + bool operator!=(const iterator&) + { + const bool is_finished = cur_info->is_finished(); + if (!is_finished && stopped) { + stopped = false; + cur_info->managed_timer.tic(); + start_timing = cur_info->cur_it; + } else if (is_finished && !stopped) { + cur_info->managed_timer.toc( + static_cast(cur_info->cur_it - start_timing)); + stopped = true; + } + return !is_finished; + } + + status* cur_info; + IndexType next_timing = 1; //!< next iteration to stop timing + IndexType start_timing = 0; //!< iteration for starting timing + bool stopped = true; + }; + + iterator begin() const { return iterator{info}; } + + // not used, could potentially be used in c++17 as a sentinel + iterator end() const { return iterator{}; } + + status* info; + }; + + status status_warmup_; + status status_run_; +}; + + +#endif // GKO_BENCHMARK_UTILS_ITERATION_CONTROL_HPP_ diff --git a/benchmark/utils/json.hpp b/benchmark/utils/json.hpp index b0cd384cae5..684db0229aa 100644 --- a/benchmark/utils/json.hpp +++ b/benchmark/utils/json.hpp @@ -34,69 +34,10 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define GKO_BENCHMARK_UTILS_JSON_HPP_ -#include +#include -#include - - -#include -#include -#include -#include - - -// helper for setting rapidjson object members -template -std::enable_if_t< - !std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, - Allocator&& allocator) -{ - if (object.HasMember(name)) { - object[name] = std::forward(value); - } else { - auto n = rapidjson::Value(name, allocator); - object.AddMember(n, std::forward(value), allocator); - } -} - - -/** - @internal This is required to fix some MacOS problems (and possibly other - compilers). There is no explicit RapidJSON constructor for `std::size_t` so a - conversion to a known constructor is required to solve any ambiguity. See the - last comments of https://github.com/ginkgo-project/ginkgo/issues/270. - */ -template -std::enable_if_t< - std::is_same::type, gko::size_type>::value, void> -add_or_set_member(rapidjson::Value& object, NameType&& name, T&& value, - Allocator&& allocator) -{ - if (object.HasMember(name)) { - object[name] = - std::forward(static_cast(value)); - } else { - auto n = rapidjson::Value(name, allocator); - object.AddMember( - n, std::forward(static_cast(value)), - allocator); - } -} - - -// helper for writing out rapidjson Values -inline std::ostream& operator<<(std::ostream& os, const rapidjson::Value& value) -{ - rapidjson::OStreamWrapper jos(os); - rapidjson::PrettyWriter, - rapidjson::UTF8<>, rapidjson::CrtAllocator, - rapidjson::kWriteNanAndInfFlag> - writer(jos); - value.Accept(writer); - return os; -} +using json = nlohmann::ordered_json; #endif // GKO_BENCHMARK_UTILS_JSON_HPP_ diff --git a/benchmark/utils/loggers.hpp b/benchmark/utils/loggers.hpp index 0f39914c692..15175554fe5 100644 --- a/benchmark/utils/loggers.hpp +++ b/benchmark/utils/loggers.hpp @@ -50,10 +50,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, gko::log::ProfilerHook::NestedSummaryWriter { - JsonSummaryWriter(rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& alloc, - gko::uint32 repetitions) - : object{&object}, alloc{&alloc}, repetitions{repetitions} + JsonSummaryWriter(json& object, gko::uint32 repetitions) + : object{&object}, repetitions{repetitions} {} void write( @@ -62,13 +60,10 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, { for (const auto& entry : entries) { if (entry.name != "total") { - add_or_set_member(*object, entry.name.c_str(), - entry.exclusive_ns * 1e-9 / repetitions, - *alloc); + (*object)[entry.name] = entry.exclusive_ns * 1e-9 / repetitions; } } - add_or_set_member(*object, "overhead", overhead_ns * 1e-9 / repetitions, - *alloc); + (*object)["overhead"] = overhead_ns * 1e-9 / repetitions; } void write_nested(const gko::log::ProfilerHook::nested_summary_entry& root, @@ -84,33 +79,29 @@ struct JsonSummaryWriter : gko::log::ProfilerHook::SummaryWriter, visit(visit, child, new_prefix); exclusive_ns -= child.elapsed_ns; } - add_or_set_member(*object, (prefix + node.name).c_str(), - exclusive_ns * 1e-9 / repetitions, *alloc); + (*object)[prefix + node.name] = exclusive_ns * 1e-9 / repetitions; }; // we don't need to annotate the total for (const auto& child : root.children) { visit(visit, child, ""); } - add_or_set_member(*object, "overhead", overhead_ns * 1e-9 / repetitions, - *alloc); + (*object)["overhead"] = overhead_ns * 1e-9 / repetitions; } - rapidjson::Value* object; - rapidjson::MemoryPoolAllocator<>* alloc; + json* object; gko::uint32 repetitions; }; inline std::shared_ptr create_operations_logger( - bool nested, rapidjson::Value& object, - rapidjson::MemoryPoolAllocator<>& alloc, gko::uint32 repetitions) + bool nested, json& object, gko::uint32 repetitions) { if (nested) { return gko::log::ProfilerHook::create_nested_summary( - std::make_unique(object, alloc, repetitions)); + std::make_unique(object, repetitions)); } else { return gko::log::ProfilerHook::create_summary( - std::make_unique(object, alloc, repetitions)); + std::make_unique(object, repetitions)); } } @@ -131,21 +122,18 @@ struct StorageLogger : gko::log::Logger { storage[location] = 0; } - void write_data(rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) + void write_data(json& output) { const std::lock_guard lock(mutex); gko::size_type total{}; for (const auto& e : storage) { total += e.second; } - add_or_set_member(output, "storage", total, allocator); + output["storage"] = total; } #if GINKGO_BUILD_MPI - void write_data(gko::experimental::mpi::communicator comm, - rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) + void write_data(gko::experimental::mpi::communicator comm, json& output) { const std::lock_guard lock(mutex); gko::size_type total{}; @@ -157,7 +145,7 @@ struct StorageLogger : gko::log::Logger { ? static_cast(MPI_IN_PLACE) : &total, &total, 1, MPI_SUM, 0); - add_or_set_member(output, "storage", total, allocator); + output["storage"] = total; } #endif @@ -188,17 +176,16 @@ struct ResidualLogger : gko::log::Logger { const gko::LinOp* solution, const gko::LinOp* residual_norm, const gko::LinOp* implicit_sq_residual_norm) const override { - timestamps.PushBack(std::chrono::duration( - std::chrono::steady_clock::now() - start) - .count(), - alloc); + timestamps.push_back(std::chrono::duration( + std::chrono::steady_clock::now() - start) + .count()); if (residual_norm) { - rec_res_norms.PushBack( - get_norm(gko::as>(residual_norm)), alloc); + rec_res_norms.push_back( + get_norm(gko::as>(residual_norm))); } else { gko::detail::vector_dispatch( residual, [&](const auto v_residual) { - rec_res_norms.PushBack(compute_norm2(v_residual), alloc); + rec_res_norms.push_back(compute_norm2(v_residual)); }); } if (solution) { @@ -206,32 +193,25 @@ struct ResidualLogger : gko::log::Logger { rc_vtype>(solution, [&](auto v_solution) { using concrete_type = std::remove_pointer_t>; - true_res_norms.PushBack( - compute_residual_norm(matrix, gko::as(b), - v_solution), - alloc); + true_res_norms.push_back(compute_residual_norm( + matrix, gko::as(b), v_solution)); }); } else { - true_res_norms.PushBack(-1.0, alloc); + true_res_norms.push_back(-1.0); } if (implicit_sq_residual_norm) { - implicit_res_norms.PushBack( - std::sqrt(get_norm( - gko::as>(implicit_sq_residual_norm))), - alloc); + implicit_res_norms.push_back(std::sqrt( + get_norm(gko::as>(implicit_sq_residual_norm)))); has_implicit_res_norm = true; } else { - implicit_res_norms.PushBack(-1.0, alloc); + implicit_res_norms.push_back(-1.0); } } ResidualLogger(gko::ptr_param matrix, - gko::ptr_param b, - rapidjson::Value& rec_res_norms, - rapidjson::Value& true_res_norms, - rapidjson::Value& implicit_res_norms, - rapidjson::Value& timestamps, - rapidjson::MemoryPoolAllocator<>& alloc) + gko::ptr_param b, json& rec_res_norms, + json& true_res_norms, json& implicit_res_norms, + json& timestamps) : gko::log::Logger(gko::log::Logger::iteration_complete_mask), matrix{matrix.get()}, b{b.get()}, @@ -240,8 +220,7 @@ struct ResidualLogger : gko::log::Logger { true_res_norms{true_res_norms}, has_implicit_res_norm{}, implicit_res_norms{implicit_res_norms}, - timestamps{timestamps}, - alloc{alloc} + timestamps{timestamps} {} bool has_implicit_res_norms() const { return has_implicit_res_norm; } @@ -250,12 +229,11 @@ struct ResidualLogger : gko::log::Logger { const gko::LinOp* matrix; const gko::LinOp* b; std::chrono::steady_clock::time_point start; - rapidjson::Value& rec_res_norms; - rapidjson::Value& true_res_norms; + json& rec_res_norms; + json& true_res_norms; mutable bool has_implicit_res_norm; - rapidjson::Value& implicit_res_norms; - rapidjson::Value& timestamps; - rapidjson::MemoryPoolAllocator<>& alloc; + json& implicit_res_norms; + json& timestamps; }; @@ -273,11 +251,7 @@ struct IterationLogger : gko::log::Logger { : gko::log::Logger(gko::log::Logger::iteration_complete_mask) {} - void write_data(rapidjson::Value& output, - rapidjson::MemoryPoolAllocator<>& allocator) - { - add_or_set_member(output, "iterations", this->num_iters, allocator); - } + void write_data(json& output) { output["iterations"] = this->num_iters; } private: mutable gko::size_type num_iters{0}; diff --git a/benchmark/utils/runner.hpp b/benchmark/utils/runner.hpp new file mode 100644 index 00000000000..3520f7299ee --- /dev/null +++ b/benchmark/utils/runner.hpp @@ -0,0 +1,209 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_BENCHMARK_UTILS_RUNNER_HPP_ +#define GKO_BENCHMARK_UTILS_RUNNER_HPP_ + + +#include + + +#include +#include +#include + + +#include "benchmark/utils/general.hpp" + + +std::shared_ptr create_profiler_hook( + std::shared_ptr exec, bool do_print = true) +{ + using gko::log::ProfilerHook; + std::map()>> + hook_map{ + {"none", [] { return std::shared_ptr{}; }}, + {"auto", [&] { return ProfilerHook::create_for_executor(exec); }}, + {"nvtx", [] { return ProfilerHook::create_nvtx(); }}, + {"roctx", [] { return ProfilerHook::create_roctx(); }}, + {"tau", [] { return ProfilerHook::create_tau(); }}, + {"vtune", [] { return ProfilerHook::create_vtune(); }}, + {"debug", [do_print] { + return ProfilerHook::create_custom( + [do_print](const char* name, + gko::log::profile_event_category) { + if (do_print) { + std::clog << "DEBUG: begin " << name << '\n'; + } + }, + [do_print](const char* name, + gko::log::profile_event_category) { + if (do_print) { + std::clog << "DEBUG: end " << name << '\n'; + } + }); + }}}; + return hook_map.at(FLAGS_profiler_hook)(); +} + + +template +struct Benchmark { + /** The name to be used in the JSON output. */ + virtual const std::string& get_name() const = 0; + + /** The operations to loop over for each test case. */ + virtual const std::vector& get_operations() const = 0; + + /** Should we write logging output? */ + virtual bool should_print() const = 0; + + /** Example JSON input */ + virtual std::string get_example_config() const = 0; + + /** Is the input test case in the correct format? */ + virtual bool validate_config(const json& value) const = 0; + + /** Textual representation of the test case for profiler annotation */ + virtual std::string describe_config(const json& test_case) const = 0; + + /** Sets up shared state and test case info */ + virtual State setup(std::shared_ptr exec, + json& test_case) const = 0; + + /** Runs a single operation of the benchmark */ + virtual void run(std::shared_ptr exec, + std::shared_ptr timer, State& state, + const std::string& operation, + json& operation_case) const = 0; + + /** Post-process test case info. */ + virtual void postprocess(json& test_case) const {} +}; + + +template +void run_test_cases(const Benchmark& benchmark, + std::shared_ptr exec, + std::shared_ptr timer, json& test_cases) +{ + if (!test_cases.is_array()) { + if (benchmark.should_print()) { + std::cerr + << "Input has to be a JSON array of benchmark configurations:\n" + << benchmark.get_example_config() << std::endl; + } + std::exit(1); + } + for (const auto& test_case : test_cases) { + if (!test_case.is_object() || !benchmark.validate_config(test_case)) { + if (benchmark.should_print()) { + std::cerr << "Invalid test case:\n" + << std::setw(4) << test_case << "\nInput format:\n" + << benchmark.get_example_config() << std::endl; + } + std::exit(2); + } + } + + auto profiler_hook = create_profiler_hook(exec, benchmark.should_print()); + if (profiler_hook) { + exec->add_logger(profiler_hook); + } + auto annotate = + [profiler_hook](const char* name) -> gko::log::profiling_scope_guard { + if (profiler_hook) { + return profiler_hook->user_range(name); + } + return {}; + }; + + for (auto& test_case : test_cases) { + try { + // set up benchmark + if (!test_case.contains(benchmark.get_name())) { + test_case[benchmark.get_name()] = json::object(); + } + if (benchmark.should_print()) { + std::clog << "Running test case\n" + << std::setw(4) << test_case << std::endl; + } + auto test_case_state = benchmark.setup(exec, test_case); + auto test_case_str = benchmark.describe_config(test_case); + auto test_case_range = annotate(test_case_str.c_str()); + auto& benchmark_case = test_case[benchmark.get_name()]; + for (const auto& operation_name : benchmark.get_operations()) { + if (benchmark_case.contains(operation_name) && + !FLAGS_overwrite) { + continue; + } + benchmark_case[operation_name] = json::object(); + if (benchmark.should_print()) { + std::clog << "\tRunning " << benchmark.get_name() << ": " + << operation_name << std::endl; + } + auto& operation_case = benchmark_case[operation_name]; + try { + auto operation_range = annotate(operation_name.c_str()); + benchmark.run(exec, timer, test_case_state, operation_name, + operation_case); + operation_case["completed"] = true; + } catch (const std::exception& e) { + operation_case["completed"] = false; + operation_case["error_type"] = + gko::name_demangling::get_dynamic_type(e); + operation_case["error"] = e.what(); + std::cerr << "Error when processing test case\n" + << std::setw(4) << test_case << "\n" + << "what(): " << e.what() << std::endl; + } + + if (benchmark.should_print()) { + backup_results(test_cases); + } + } + benchmark.postprocess(test_case); + } catch (const std::exception& e) { + std::cerr << "Error setting up benchmark, what(): " << e.what() + << std::endl; + test_case["error_type"] = gko::name_demangling::get_dynamic_type(e); + test_case["error"] = e.what(); + } + } + + if (profiler_hook) { + exec->remove_logger(profiler_hook); + } +} + + +#endif // GKO_BENCHMARK_UTILS_RUNNER_HPP_ diff --git a/benchmark/utils/spmv_validation.hpp b/benchmark/utils/spmv_validation.hpp deleted file mode 100644 index 83ea2085ec2..00000000000 --- a/benchmark/utils/spmv_validation.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#ifndef GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ -#define GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ - - -#include - - -#include -#include - - -#include - - -std::string example_config = R"( - [ - {"filename": "my_file.mtx"}, - {"filename": "my_file2.mtx"}, - {"size": 100, "stencil": "7pt"}, - ] -)"; - - -/** - * Function which outputs the input format for benchmarks similar to the spmv. - */ -[[noreturn]] void print_config_error_and_exit() -{ - std::cerr << "Input has to be a JSON array of matrix configurations:\n" - << example_config << std::endl; - std::exit(1); -} - - -/** - * Validates whether the input format is correct for spmv-like benchmarks. - * - * @param value the JSON value to test. - */ -void validate_option_object(const rapidjson::Value& value) -{ - if (!value.IsObject() || - !((value.HasMember("size") && value.HasMember("stencil") && - value["size"].IsInt64() && value["stencil"].IsString()) || - (value.HasMember("filename") && value["filename"].IsString()))) { - print_config_error_and_exit(); - } -} - - -#endif // GKO_BENCHMARK_UTILS_SPMV_VALIDATION_HPP_ diff --git a/third_party/CMakeLists.txt b/third_party/CMakeLists.txt index c714a51c187..71470d10dc3 100644 --- a/third_party/CMakeLists.txt +++ b/third_party/CMakeLists.txt @@ -18,8 +18,8 @@ if(GINKGO_BUILD_BENCHMARKS) if (NOT gflags_FOUND) add_subdirectory(gflags) endif() - if (NOT RapidJSON_FOUND) - add_subdirectory(rapidjson) + if (NOT nlohmann_json_FOUND) + add_subdirectory(nlohmann_json) endif() endif() diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt new file mode 100644 index 00000000000..77064c66c40 --- /dev/null +++ b/third_party/nlohmann_json/CMakeLists.txt @@ -0,0 +1,9 @@ +message(STATUS "Fetching external nlohmann_json") +include(FetchContent) +FetchContent_Declare( + nlohmann_json + GIT_REPOSITORY https://github.com/nlohmann/json.git + GIT_TAG bc889afb4c5bf1c0d8ee29ef35eaaf4c8bef8a5d +) +set(JSON_BuildTests OFF CACHE INTERNAL "") +FetchContent_MakeAvailable(nlohmann_json) diff --git a/third_party/rapidjson/CMakeLists.txt b/third_party/rapidjson/CMakeLists.txt deleted file mode 100644 index a96b90cb882..00000000000 --- a/third_party/rapidjson/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -message(STATUS "Fetching external RapidJSON") -include(FetchContent) -FetchContent_Declare( - rapidjson - GIT_REPOSITORY https://github.com/Tencent/rapidjson.git - GIT_TAG 27c3a8dc0e2c9218fe94986d249a12b5ed838f1d -) -FetchContent_GetProperties(rapidjson) -if(NOT rapidjson_POPULATED) - FetchContent_Populate(rapidjson) -endif() -set(RapidJSON_INCLUDE_DIR "${rapidjson_SOURCE_DIR}/include") -add_library(rapidjson INTERFACE) -set_target_properties(rapidjson PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${RapidJSON_INCLUDE_DIR}")