From e519de1cfe39dbff8267ba40939a51197169571c Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 15:28:52 +0200
Subject: [PATCH 01/13] osx: try to help cmake find libomp

---
 .github/workflows/osx.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/osx.yml b/.github/workflows/osx.yml
index 958489e09fd..441be04d842 100644
--- a/.github/workflows/osx.yml
+++ b/.github/workflows/osx.yml
@@ -55,7 +55,7 @@ jobs:
         mkdir install
         export INSTALL_PREFIX=`pwd`/install
         cd build
-        cmake .. -DCMAKE_CXX_FLAGS=-Wpedantic -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }}
+        cmake .. -DCMAKE_PREFIX_PATH=/opt/homebrew/opt/libomp/ -DCMAKE_CXX_FLAGS=-Wpedantic -DBUILD_SHARED_LIBS=${{ matrix.config.shared }} -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} -DCMAKE_BUILD_TYPE=${{ matrix.config.build_type }} -DGINKGO_MIXED_PRECISION=${{ matrix.config.mixed }}
         make -j8
         ctest -j10 --output-on-failure
 

From 4f1cd22c5349f0aab34c59cdbbfa5c4f2273dacd Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 16:00:46 +0200
Subject: [PATCH 02/13] third parties: bump to new releases

---
 third_party/gtest/CMakeLists.txt         | 2 +-
 third_party/nlohmann_json/CMakeLists.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/third_party/gtest/CMakeLists.txt b/third_party/gtest/CMakeLists.txt
index 378a7cdc705..fb0407ba215 100644
--- a/third_party/gtest/CMakeLists.txt
+++ b/third_party/gtest/CMakeLists.txt
@@ -3,7 +3,7 @@ include(FetchContent)
 FetchContent_Declare(
     googletest
     GIT_REPOSITORY https://github.com/google/googletest.git
-    GIT_TAG        release-1.12.1
+    GIT_TAG        v1.14.0
 )
 # need to set the variables in CACHE due to CMP0077
 set(gtest_disable_pthreads ON CACHE INTERNAL "")
diff --git a/third_party/nlohmann_json/CMakeLists.txt b/third_party/nlohmann_json/CMakeLists.txt
index 6f413e458b9..dbc43660bbe 100644
--- a/third_party/nlohmann_json/CMakeLists.txt
+++ b/third_party/nlohmann_json/CMakeLists.txt
@@ -3,7 +3,7 @@ include(FetchContent)
 FetchContent_Declare(
     nlohmann_json
     GIT_REPOSITORY https://github.com/nlohmann/json.git
-    GIT_TAG        v3.9.1
+    GIT_TAG        v3.11.3
 )
 set(JSON_BuildTests OFF CACHE INTERNAL "")
 set(JSON_Install OFF CACHE INTERNAL "")

From b5a12e1c4b8f0632e43a998b0dd36a875df94c96 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 16:20:25 +0200
Subject: [PATCH 03/13] fix:test::gtest helpers: Pass OpenMP when required

---
 core/test/gtest/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index cdfc67fafdf..824075234a5 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -9,6 +9,12 @@ function(add_gtest_main suffix definitions)
         target_compile_definitions(ginkgo_gtest_main_mpi${suffix} PRIVATE ${definitions})
         ginkgo_compile_features(ginkgo_gtest_main_mpi${suffix})
     endif()
+    if (GINKGO_BUILD_OMP)
+	target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC OpenMP::OpenMP_CXX)
+        if (GINKGO_BUILD_MPI)
+	    target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC OpenMP::OpenMP_CXX)
+        endif()
+    endif()
 endfunction()
 
 add_gtest_main("" "")

From bb006d89ed566670bcbff5f413c7a3be858b7fd9 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 17:19:54 +0200
Subject: [PATCH 04/13] Fix: Cast according to Issue #1562

Related issue: https://github.com/ginkgo-project/ginkgo/issues/1562
---
 core/reorder/amd.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/core/reorder/amd.cpp b/core/reorder/amd.cpp
index 79b44abf536..1b3198b248f 100644
--- a/core/reorder/amd.cpp
+++ b/core/reorder/amd.cpp
@@ -157,7 +157,8 @@ std::unique_ptr<LinOp> Amd<IndexType>::generate_impl(
     // row workspace
     const auto col_idxs_plus_workspace_size = nnz + nnz / 5 + 2 * num_rows;
     array<IndexType> col_idxs_plus_workspace{
-        host_exec, col_idxs_plus_workspace_size + 6 * num_rows};
+        host_exec,
+        static_cast<size_type>(col_idxs_plus_workspace_size + 6 * num_rows)};
     host_exec->copy_from(exec, nnz, pattern->get_const_col_idxs(),
                          col_idxs_plus_workspace.get_data());
 

From 8ea34a66c7f6ba40ab986f641e3f5b7cc3634f99 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 19:14:56 +0200
Subject: [PATCH 05/13] benchmark: clarify manual vs helper script

---
 BENCHMARKING.md | 112 +++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 96 insertions(+), 16 deletions(-)

diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index a26d8572068..ca681d3ed99 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -11,22 +11,25 @@ repository](https://github.com/ginkgo-project/ginkgo-data/). These results can
 also be used for performance comparison in order to ensure that you get similar
 performance as what is published on this repository.
 
-To compile the benchmarks, the flag `-GINKGO_BUILD_BENCHMARKS=ON` has to be set
+To compile the benchmarks, the flag `-DGINKGO_BUILD_BENCHMARKS=ON` has to be set
 during the `cmake` step. In addition, the [`ssget` command-line
 utility](https://github.com/ginkgo-project/ssget) has to be installed on the
 system. The purpose of this file is to explain in detail the capacities of this
 benchmarking suite as well as how to properly setup everything.
 
+There are two ways to benchmark `ginkgo`. When compiling the benchmark suite, executables are generated for collecting matrix statistics, running sparse-matrix vector product, solvers (possibly distributed) benchmarks. Another way to run benchmarks is through the convenience script `run_all_benchmarks.sh`, but not all features are exposed through this tool!
+
 Here is a short description of the content of this file:
 1. Ginkgo setup and best practice guidelines
 2. Installing and using the `ssget` tool to fetch the [SuiteSparse
    matrices](https://sparse.tamu.edu/).
-3. Benchmarking overview and how to run them in a simple way.
-4. How to publish the benchmark results online and use the [Ginkgo Performance
+3. Running benchmarks manually
+4. Benchmarking with the script utility
+5. How to publish the benchmark results online and use the [Ginkgo Performance
    Explorer (GPE)](https://ginkgo-project.github.io/gpe/) for performance
    analysis (optional).
-5. Using the benchmark suite for performance debugging thanks to the loggers.
-6. All available benchmark customization options.
+6. Using the benchmark suite for performance debugging thanks to the loggers.
+7. Available benchmark customization options with the script utility.
 
 
 ### 1: Ginkgo setup and best practice guidelines
@@ -55,13 +58,16 @@ In addition, the following specific options can be considered:
    `overhead` LinOp. If your purpose is to check Ginkgo's overhead, make sure to
    try this mode.
 
+
 ### 2: Using ssget to fetch the matrices
 
-The benchmark suite tests Ginkgo's performance using the [SuiteSparse matrix
-collection](https://sparse.tamu.edu/) and artificially generated matrices. The
-suite sparse collection will be downloaded automatically when the benchmarks are
-run. This is done thanks to the [`ssget` command-line
-utility](https://github.com/ginkgo-project/ssget).
+To benchmark `ginkgo`, matrices need to be provided as input in the `Matrix
+Market` format. A convenient way is to run benchmark with the [SuiteSparse
+matrix collection](https://sparse.tamu.edu/). A helper tool, the [`ssget`
+command-line utility](https://github.com/ginkgo-project/ssget) can be used to
+facilitate downloading and extracting matrices from the suitesparse collection.
+When running the benchmarks with the helper script `run_all_benchmarks.sh` (or
+calling `make benchmark`), the `ssget` tool is required.
 
 To install `ssget`, access the repository and copy the file `ssget` into a
 directory present in your `PATH` variable as per the tool's `README.md`
@@ -107,7 +113,80 @@ for i in $(seq 0 $(ssget -n)); do
 done
 ```
 
-### 3: Benchmarking overview
+### 3: Running benchmarks manually
+When compiling `ginkgo` with the flag `-DGINKGO_BUILD_BENCHMARKS=on`, a suite of
+executables will be generated depending on the `CMake` configuration. These
+executables are the backbone of the benchmarking suite. Note that all of these
+executables describe the available options and the required input format when
+running them with the `--help` option. All executables have multiple variants
+depending on the precision, by default `double` precision is used for the type
+of values, but variants with `single` and `complex` (single and double) value
+types are also available. Here is a non exhaustive list of the available
+benchmarks:
+
++ `blas/blas` : supports benchmarking many of Ginkgo's BLAS operations: dot
+  products, axpy, copy, etc.
++ `conversion/conversion` : conversion between matrix formats.
++ `matrix_generator/matrix_generator` : mostly allows generating block diagonal
+  matrices (to benchmark the block-jacobi preconditioner).
++ `matrix_statistics/matrix_statistics` : computes size and other matrix
+  statistics (such as variance, load imbalance, ...).
++ `preconditioner/preconditioner` : benchmarks most Ginkgo preconditioner.
++ `solver/solver` : benchmark most of Ginkgo's solver.
+  spmv benchmark.
++ `sparse_blas/sparse_blas` : benchmarks Sparse BLAS operations, such as SpGEMM,
+  SpGEAM, transpose.
++ `spmv/spmv` : benchmarks Ginkgo's matrix formats (Sparse-Matrix Vector
+  product).
+
+
+Optionally when compiling with MPI support:
++ `blas/distributed/multi_vector`: measures BLAS performance on (distributed)
+  multi-vectors.
++ `solver/distributed/solver`: distributed solver benchmarks.
++ `spmv/distributed/spmv`: distributed matrix Sparse-Matrix Vector (SpMV)
+  product benchmark.
+
+
+All benchmarks require input data as in a `JSON` format. The json file has to
+consist of exactly one array, and within that array the test cases are defined.
+The exact syntax can change between executables, the `--help` option will
+explain the necessary `JSON` input format. For example for the `spmv` benchmark
+case, and many other benchmarks the following minimal input should be provided:
+
+```
+[
+  {
+    "filename": "path/to/your/matrix",
+    "rhs": "path/to/your/rhs"
+  },
+  { ... }
+]
+```
+The files have to be in matrix market format.
+
+Some benchmarks require some extra fields. For example the solver benchmarks requires the field `"optimal": {"spmv": "matrix format (such as csr)"}`. This is automatically populated when running the `spmv` benchmark which finds the optimal (fastest) format among all requested formats.
+
+After writing the necessary data in a JSON file, the benchmark can be called by
+passing in the input via stdin, i.e.
+
+```
+solver < input.json
+```
+
+The output of our benchmarks is again JSON, and it is printed to stdout, while our status messages are printed to stderr. So, the output can be stored with
+
+```
+solver < input.json > output.json
+```
+
+Note that in most cases, the JSON output by our benchmarks is compatible with
+other benchmarks, therefore it is possible to first call the `spmv` benchmark,
+use the resulting output JSON as input to the `solver` benchmark, and finally
+use the resulting solver JSON output as input to the `preconditioner` benchmark.
+
+
+### 4: Benchmarking overview
 
 The benchmark suite is invoked using the `make benchmark` command in the build
 directory. Under the hood, this command simply calls the script
@@ -169,7 +248,7 @@ benchmark options). Here are the most important options:
     thermal2
     ```
 
-### 4: Publishing the results on Github and analyze the results with the GPE (optional)
+### 5: Publishing the results on Github and analyze the results with the GPE (optional)
 
 The previous experiments generated json files for each matrices, each containing
 timing, iteration count, achieved precision, ... depending on the type of
@@ -223,7 +302,7 @@ For the generating the plots in the GPE, here are the steps to go through:
    tabs allow to access the result of the processed data after invoking the
    processing script.
 
-### 5: Detailed performance analysis and debugging
+### 6: Detailed performance analysis and debugging
 
 Detailed performance analysis can be ran by passing the environment variable
 `DETAILED=1` to the benchmarking script. This detailed run is available for
@@ -238,7 +317,7 @@ location points which can be inspected thanks to the logger. In this fashion, it
 is easy to use these loggers also for tracking memory allocation sizes and other
 important library aspects.
 
-### 6: Available benchmark options
+### 7: Available benchmark options
 
 There are a set amount of options available for benchmarking. Most important
 options can be configured through the benchmarking script itself thanks to
@@ -311,8 +390,9 @@ The supported environment variables are described in the following list:
 * `SOLVERS_INITIAL_GUESS={rhs,0,random}` - the initial guess generation of the
     solvers. `rhs` uses the right-hand side, `0` uses a zero vector and `random`
     generates a random vector as the initial guess.
-* `DETAILED={0,1}` - selects whether detailed benchmarks should be ran for the
-    solver benchmarks, can be either `0` (off) or `1` (on). The default is `0`.
+* `DETAILED={0,1}` - selects whether detailed benchmarks should be ran. This
+  generally provides extra, verbose information at the cost of one or more extra
+  benchmark run. It can be either `0` (off) or `1` (on).
 * `GPU_TIMER={true, false}` - If set to `true`, use the gpu timer, which is
     valid for cuda/hip executor, to measure the timing. Default is `false`.
 * `SOLVERS_JACOBI_MAX_BS` - sets the maximum block size for the Jacobi

From 6aab89d518768685dafb49ff01a876a3c78c51a5 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 29 May 2024 19:33:37 +0200
Subject: [PATCH 06/13] mpi tests: disable test with unsufficient procs

---
 cmake/create_test.cmake | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 7fdbe7d4e53..0aa93a3b141 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -91,13 +91,21 @@ function(ginkgo_add_test test_name test_target_name)
     endif()
     set_target_properties(${test_target_name} PROPERTIES OUTPUT_NAME ${test_binary_name})
     if (add_test_MPI_SIZE)
-        add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
-                 COMMAND
-                     ${MPIEXEC_EXECUTABLE}
-                     ${MPIEXEC_NUMPROC_FLAG}
-                     ${add_test_MPI_SIZE}
-                     "$<TARGET_FILE:${test_target_name}>"
-                 WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+        if (add_test_MPI_SIZE LESS_EQUAL MPIEXEC_MAX_NUMPROCS)
+            add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
+                     COMMAND
+                         ${MPIEXEC_EXECUTABLE}
+                         ${MPIEXEC_NUMPROC_FLAG}
+                         ${add_test_MPI_SIZE}
+                         "$<TARGET_FILE:${test_target_name}>"
+                     WORKING_DIRECTORY "$<TARGET_FILE_DIR:ginkgo>")
+        else()
+            message(
+                    WARNING
+                        "Disabling test: ${test_target_name}. Only ${MPIEXEC_MAX_NUMPROCS} "
+                        "MPI processes available but ${add_test_MPI_SIZE} processes required."
+                   )
+        endif()
     else()
         add_test(NAME ${REL_BINARY_DIR}/${test_binary_name}
                  COMMAND ${test_target_name}

From 567bd8225cf07bd37b82af71d27cd99551009026 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 29 May 2024 14:55:27 +0200
Subject: [PATCH 07/13] move the config part to another library in msvc/shared
 due to LNK1189

---
 cmake/create_test.cmake |  3 +++
 core/CMakeLists.txt     | 28 +++++++++++++++++++++++-----
 2 files changed, 26 insertions(+), 5 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 0aa93a3b141..99f9812761a 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -41,6 +41,9 @@ function(ginkgo_set_test_target_properties test_target_name test_library_suffix)
     set_target_properties(${test_target_name} PROPERTIES CUDA_STANDARD 14)
     target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES})
     target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
+    if(MSVC AND BUILD_SHARED_LIBS)
+        target_link_libraries(${test_target_name} PRIVATE ginkgo_config)
+    endif()
 endfunction()
 
 function(ginkgo_add_resource_requirement test_name)
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 0ae740b87cf..9e6e13bf26e 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -1,6 +1,13 @@
 add_subdirectory(device_hooks) # placeholders for disabled modules
 
 add_library(ginkgo "")
+set(config_source 
+    config/factorization_config.cpp
+    config/multigrid_config.cpp
+    config/preconditioner_config.cpp
+    config/registry.cpp
+    config/solver_config.cpp
+)
 target_sources(ginkgo
     PRIVATE
     base/array.cpp
@@ -21,12 +28,7 @@ target_sources(ginkgo
     base/version.cpp
     config/config.cpp
     config/config_helper.cpp
-    config/factorization_config.cpp
-    config/preconditioner_config.cpp
     config/property_tree.cpp
-    config/registry.cpp
-    config/multigrid_config.cpp
-    config/solver_config.cpp
     config/stop_config.cpp
     config/type_descriptor.cpp
     distributed/index_map.cpp
@@ -125,6 +127,19 @@ if(GINKGO_BUILD_MPI)
         distributed/preconditioner/schwarz.cpp)
 endif()
 
+if(MSVC AND BUILD_SHARED_LIBS)
+    add_library(ginkgo_config "")
+    target_sources(ginkgo_config PRIVATE ${config_source})
+    ginkgo_compile_features(ginkgo_config)
+    ginkgo_default_includes(ginkgo_config)
+    ginkgo_install_library(ginkgo_config)
+    if(GINKGO_CHECK_CIRCULAR_DEPS)
+        ginkgo_check_headers(ginkgo_config "")
+    endif()
+else() 
+    target_sources(ginkgo PRIVATE ${config_source})
+endif()
+
 ginkgo_compile_features(ginkgo)
 
 # add a namespace alias so Ginkgo can always be included as Ginkgo::ginkgo
@@ -132,6 +147,9 @@ ginkgo_compile_features(ginkgo)
 add_library(Ginkgo::ginkgo ALIAS ginkgo)
 target_link_libraries(ginkgo
     PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp)
+if(MSVC AND BUILD_SHARED_LIBS)
+    target_link_libraries(ginkgo_config PUBLIC ginkgo)
+endif()
 if(GINKGO_HAVE_PAPI_SDE)
     target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE)
 endif()

From cb12a33327f405a1b2d1b9691b22be3b9263c4e2 Mon Sep 17 00:00:00 2001
From: "Yu-Hsiang M. Tsai" <yhmtsai@gmail.com>
Date: Wed, 29 May 2024 19:44:05 +0200
Subject: [PATCH 08/13] make ginkgo be the major library

---
 cmake/create_test.cmake |  3 ---
 core/CMakeLists.txt     | 59 ++++++++++++++++++++++++-----------------
 2 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake
index 99f9812761a..0aa93a3b141 100644
--- a/cmake/create_test.cmake
+++ b/cmake/create_test.cmake
@@ -41,9 +41,6 @@ function(ginkgo_set_test_target_properties test_target_name test_library_suffix)
     set_target_properties(${test_target_name} PROPERTIES CUDA_STANDARD 14)
     target_include_directories(${test_target_name} PRIVATE ${Ginkgo_BINARY_DIR} ${set_properties_ADDITIONAL_INCLUDES})
     target_link_libraries(${test_target_name} PRIVATE ginkgo GTest::GTest ${set_properties_ADDITIONAL_LIBRARIES})
-    if(MSVC AND BUILD_SHARED_LIBS)
-        target_link_libraries(${test_target_name} PRIVATE ginkgo_config)
-    endif()
 endfunction()
 
 function(ginkgo_add_resource_requirement test_name)
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 9e6e13bf26e..14ae6ce6592 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -1,6 +1,5 @@
 add_subdirectory(device_hooks) # placeholders for disabled modules
 
-add_library(ginkgo "")
 set(config_source 
     config/factorization_config.cpp
     config/multigrid_config.cpp
@@ -8,7 +7,16 @@ set(config_source
     config/registry.cpp
     config/solver_config.cpp
 )
-target_sources(ginkgo
+# MSVC: To solve LNK1189, we separate the library as a workaround
+# To make ginkgo still be the major library, we make the original to ginkgo_core in MSVC/shared
+# TODO: should think another way to solve it like dllexport or def file
+set(ginkgo_core "ginkgo")
+if(MSVC AND BUILD_SHARED_LIBS)
+    set(ginkgo_core "ginkgo_core")
+endif()
+
+add_library(${ginkgo_core} "")
+target_sources(${ginkgo_core}
     PRIVATE
     base/array.cpp
     base/batch_multi_vector.cpp
@@ -110,15 +118,15 @@ target_sources(ginkgo
     )
 
 if(GINKGO_HAVE_PAPI_SDE)
-    target_sources(ginkgo PRIVATE log/papi.cpp)
+    target_sources(${ginkgo_core} PRIVATE log/papi.cpp)
 endif()
 
 if(GINKGO_HAVE_METIS)
-    target_sources(ginkgo PRIVATE reorder/nested_dissection.cpp)
+    target_sources(${ginkgo_core} PRIVATE reorder/nested_dissection.cpp)
 endif()
 
 if(GINKGO_BUILD_MPI)
-    target_sources(ginkgo
+    target_sources(${ginkgo_core}
         PRIVATE
         mpi/exception.cpp
         distributed/matrix.cpp
@@ -127,54 +135,55 @@ if(GINKGO_BUILD_MPI)
         distributed/preconditioner/schwarz.cpp)
 endif()
 
+# MSVC/shared: make ginkgo be the major library
 if(MSVC AND BUILD_SHARED_LIBS)
-    add_library(ginkgo_config "")
-    target_sources(ginkgo_config PRIVATE ${config_source})
-    ginkgo_compile_features(ginkgo_config)
-    ginkgo_default_includes(ginkgo_config)
-    ginkgo_install_library(ginkgo_config)
+    add_library(ginkgo "")
+    target_sources(ginkgo PRIVATE ${config_source})
+    ginkgo_compile_features(ginkgo)
+    ginkgo_default_includes(ginkgo)
+    ginkgo_install_library(ginkgo)
     if(GINKGO_CHECK_CIRCULAR_DEPS)
-        ginkgo_check_headers(ginkgo_config "")
+        ginkgo_check_headers(ginkgo "")
     endif()
 else() 
-    target_sources(ginkgo PRIVATE ${config_source})
+    target_sources(${ginkgo_core} PRIVATE ${config_source})
 endif()
 
-ginkgo_compile_features(ginkgo)
+ginkgo_compile_features(${ginkgo_core})
 
 # add a namespace alias so Ginkgo can always be included as Ginkgo::ginkgo
 # regardless of whether it is installed or added as a subdirectory
 add_library(Ginkgo::ginkgo ALIAS ginkgo)
-target_link_libraries(ginkgo
-    PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp)
 if(MSVC AND BUILD_SHARED_LIBS)
-    target_link_libraries(ginkgo_config PUBLIC ginkgo)
+    target_link_libraries(ginkgo PUBLIC ${ginkgo_core})
 endif()
+target_link_libraries(${ginkgo_core}
+    PUBLIC ginkgo_device ginkgo_omp ginkgo_cuda ginkgo_reference ginkgo_hip ginkgo_dpcpp)
 if(GINKGO_HAVE_PAPI_SDE)
-    target_link_libraries(ginkgo PUBLIC PAPI::PAPI_SDE)
+    target_link_libraries(${ginkgo_core} PUBLIC PAPI::PAPI_SDE)
 endif()
 
 if(GINKGO_HAVE_TAU)
-    target_link_libraries(ginkgo PRIVATE perfstubs)
+    target_link_libraries(${ginkgo_core} PRIVATE perfstubs)
 endif()
 
 if(GINKGO_HAVE_VTUNE)
-    target_link_libraries(ginkgo PRIVATE VTune::ITT)
+    target_link_libraries(${ginkgo_core} PRIVATE VTune::ITT)
 endif()
 
 if(GINKGO_HAVE_METIS)
-    target_link_libraries(ginkgo PRIVATE METIS::METIS)
+    target_link_libraries(${ginkgo_core} PRIVATE METIS::METIS)
 endif()
 
 if(GINKGO_BUILD_MPI)
-    target_link_libraries(ginkgo PUBLIC MPI::MPI_CXX)
+    target_link_libraries(${ginkgo_core} PUBLIC MPI::MPI_CXX)
 endif()
 
-ginkgo_default_includes(ginkgo)
-ginkgo_install_library(ginkgo)
+ginkgo_default_includes(${ginkgo_core})
+ginkgo_install_library(${ginkgo_core})
 
 if(GINKGO_CHECK_CIRCULAR_DEPS)
-    ginkgo_check_headers(ginkgo "")
+    ginkgo_check_headers(${ginkgo_core} "")
 endif()
 
 if(GINKGO_BUILD_TESTS)
@@ -183,7 +192,7 @@ endif()
 
 if(GINKGO_DEVEL_TOOLS AND CMAKE_SYSTEM_NAME STREQUAL "Linux" AND BUILD_SHARED_LIBS)
     # Copy pretty-printer next to library
-    add_custom_command(TARGET ginkgo POST_BUILD
+    add_custom_command(TARGET ${ginkgo_core} POST_BUILD
         COMMAND "${CMAKE_COMMAND}" -E copy
         "${Ginkgo_SOURCE_DIR}/dev_tools/scripts/gdb-ginkgo.py"
         "$<TARGET_FILE:ginkgo>-gdb.py"

From 4655295c554f7861f1af06a79451c1fa04f20adc Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 4 Jun 2024 11:28:47 +0200
Subject: [PATCH 09/13] ci[msvc,cuda]: try with windows 2019

---
 .github/workflows/windows-msvc-cuda.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/windows-msvc-cuda.yml b/.github/workflows/windows-msvc-cuda.yml
index 1bf6a7bee85..b1df1aaf4ed 100644
--- a/.github/workflows/windows-msvc-cuda.yml
+++ b/.github/workflows/windows-msvc-cuda.yml
@@ -29,7 +29,7 @@ jobs:
         config:
         - {version: "latest", name: "cuda-latest/release/shared", "mixed": "ON"}
     name: msvc/${{ matrix.config.name }} (only compile)
-    runs-on: [windows-latest]
+    runs-on: [windows-2019]
 
     steps:
     - name: Checkout the latest code (shallow clone)

From 06868cb6e85f8129a3126cd6f45fadb9fb698dee Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Tue, 4 Jun 2024 16:43:33 +0200
Subject: [PATCH 10/13] ci: workaround sonarqube issues on horeka

See failure:
https://gitlab.com/ginkgo-project/ginkgo-public-ci/-/jobs/6963993256#L654
and many others.

It seems that HoreKa and NodeJS do not go well together.
---
 .gitlab-ci.yml | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index a812ad44f80..7d2612635cd 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -699,6 +699,11 @@ sonarqube_cov_:
     - .quick_test_short_lived_condition
     - .before_script_template
     - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019
+  tags:
+    - private_ci
+    - controller
+    - cpu
+    - nla-gpu
   script:
     - PR_ID=$(curl -s "https://api.github.com/search/issues?q=sha:${CI_COMMIT_SHA}"
       | jq '.items[0].number')
@@ -730,6 +735,11 @@ sonarqube_cov:
     - .deploy_condition
     - .before_script_template
     - .use_gko-cuda101-openmpi-gnu8-llvm7-intel2019
+  tags:
+    - private_ci
+    - controller
+    - cpu
+    - nla-gpu
   script:
     - ctest -S cmake/CTestScript.cmake -DCTEST_BUILD_CONFIGURATION=COVERAGE
       -DGINKGO_SONARQUBE_TEST=ON

From 08818f6b5377db81500f5bf8b94b541acd5cd563 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 5 Jun 2024 12:46:47 +0200
Subject: [PATCH 11/13] ci: fix the warnings pipeline build error

---
 .gitlab-ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 7d2612635cd..f3cecee4b71 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -633,7 +633,7 @@ warnings:
   variables:
     BUILD_OMP: "ON"
     BUILD_CUDA: "ON"
-    CXX_FLAGS: "-Werror=pedantic;-pedantic-errors"
+    CXX_FLAGS: "-Werror=pedantic -pedantic-errors"
   allow_failure: yes
 
 # Ensure kernel modules do not depend on core

From fd5cc1a659432748d403d10b13134d3d7c87d471 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 5 Jun 2024 14:34:57 +0200
Subject: [PATCH 12/13] Review updates.

Co-authored-by: Yu-Hsiang Tsai <yhmtsai@gmail.com>
Co-authored-by: Marcel Koch <marcel.koch@kit.edu>
---
 BENCHMARKING.md                | 40 +++++++++++++++++-----------------
 core/test/gtest/CMakeLists.txt |  4 ++--
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index ca681d3ed99..44c7192ca3f 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -124,28 +124,28 @@ of values, but variants with `single` and `complex` (single and double) value
 types are also available. Here is a non exhaustive list of the available
 benchmarks:
 
-+ `blas/blas` : supports benchmarking many of Ginkgo's BLAS operations: dot
-  products, axpy, copy, etc.
-+ `conversion/conversion` : conversion between matrix formats.
-+ `matrix_generator/matrix_generator` : mostly allows generating block diagonal
-  matrices (to benchmark the block-jacobi preconditioner).
-+ `matrix_statistics/matrix_statistics` : computes size and other matrix
-  statistics (such as variance, load imbalance, ...).
-+ `preconditioner/preconditioner` : benchmarks most Ginkgo preconditioner.
-+ `solver/solver` : benchmark most of Ginkgo's solver.
-  spmv benchmark.
-+ `sparse_blas/sparse_blas` : benchmarks Sparse BLAS operations, such as SpGEMM,
-  SpGEAM, transpose.
-+ `spmv/spmv` : benchmarks Ginkgo's matrix formats (Sparse-Matrix Vector
-  product).
++ `blas/blas`: supports benchmarking many of Ginkgo's BLAS operations: dot
+    products, axpy, copy, etc.
++ `conversion/conversion`: conversion between matrix formats.
++ `matrix_generator/matrix_generator`: mostly allows generating block diagonal
+    matrices (to benchmark the block-jacobi preconditioner).
++ `matrix_statistics/matrix_statistics`: computes size and other matrix
+    statistics (such as variance, load imbalance, ...).
++ `preconditioner/preconditioner`: benchmarks most Ginkgo preconditioner.
++ `solver/solver`: benchmark most of Ginkgo's solvers in a non distributed
+    setting.
++ `sparse_blas/sparse_blas`: benchmarks Sparse BLAS operations, such as SpGEMM,
+    SpGEAM, transpose.
++ `spmv/spmv`: benchmarks Ginkgo's matrix formats (Sparse-Matrix Vector
+    product).
 
 
 Optionally when compiling with MPI support:
 + `blas/distributed/multi_vector`: measures BLAS performance on (distributed)
-  multi-vectors.
+    multi-vectors.
 + `solver/distributed/solver`: distributed solver benchmarks.
 + `spmv/distributed/spmv`: distributed matrix Sparse-Matrix Vector (SpMV)
-  product benchmark.
+    product benchmark.
 
 
 All benchmarks require input data as in a `JSON` format. The json file has to
@@ -171,13 +171,13 @@ After writing the necessary data in a JSON file, the benchmark can be called by
 passing in the input via stdin, i.e.
 
 ```
-solver < input.json
+./solver < input.json
 ```
 
 The output of our benchmarks is again JSON, and it is printed to stdout, while our status messages are printed to stderr. So, the output can be stored with
 
 ```
-solver < input.json > output.json
+./solver < input.json > output.json
 ```
 
 Note that in most cases, the JSON output by our benchmarks is compatible with
@@ -391,8 +391,8 @@ The supported environment variables are described in the following list:
     solvers. `rhs` uses the right-hand side, `0` uses a zero vector and `random`
     generates a random vector as the initial guess.
 * `DETAILED={0,1}` - selects whether detailed benchmarks should be ran. This
-  generally provides extra, verbose information at the cost of one or more extra
-  benchmark run. It can be either `0` (off) or `1` (on).
+    generally provides extra, verbose information at the cost of one or more
+    extra benchmark runs. It can be either `0` (off) or `1` (on).
 * `GPU_TIMER={true, false}` - If set to `true`, use the gpu timer, which is
     valid for cuda/hip executor, to measure the timing. Default is `false`.
 * `SOLVERS_JACOBI_MAX_BS` - sets the maximum block size for the Jacobi
diff --git a/core/test/gtest/CMakeLists.txt b/core/test/gtest/CMakeLists.txt
index 824075234a5..56f83181375 100644
--- a/core/test/gtest/CMakeLists.txt
+++ b/core/test/gtest/CMakeLists.txt
@@ -10,9 +10,9 @@ function(add_gtest_main suffix definitions)
         ginkgo_compile_features(ginkgo_gtest_main_mpi${suffix})
     endif()
     if (GINKGO_BUILD_OMP)
-	target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC OpenMP::OpenMP_CXX)
+        target_link_libraries(ginkgo_gtest_main${suffix} PUBLIC OpenMP::OpenMP_CXX)
         if (GINKGO_BUILD_MPI)
-	    target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC OpenMP::OpenMP_CXX)
+            target_link_libraries(ginkgo_gtest_main_mpi${suffix} PUBLIC OpenMP::OpenMP_CXX)
         endif()
     endif()
 endfunction()

From 43189babcfad9f8a272c61ea54c5e3b03cd1f202 Mon Sep 17 00:00:00 2001
From: Terry Cojean <terry.cojean@kit.edu>
Date: Wed, 5 Jun 2024 17:19:10 +0200
Subject: [PATCH 13/13] Review update: review style of `Benchmarking.md`
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Thomas Grützmacher <thomas.gruetzmacher@kit.edu>
---
 BENCHMARKING.md | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/BENCHMARKING.md b/BENCHMARKING.md
index 44c7192ca3f..c93704532b5 100644
--- a/BENCHMARKING.md
+++ b/BENCHMARKING.md
@@ -17,7 +17,11 @@ utility](https://github.com/ginkgo-project/ssget) has to be installed on the
 system. The purpose of this file is to explain in detail the capacities of this
 benchmarking suite as well as how to properly setup everything.
 
-There are two ways to benchmark `ginkgo`. When compiling the benchmark suite, executables are generated for collecting matrix statistics, running sparse-matrix vector product, solvers (possibly distributed) benchmarks. Another way to run benchmarks is through the convenience script `run_all_benchmarks.sh`, but not all features are exposed through this tool!
+There are two ways to benchmark Ginkgo. When compiling the benchmark suite,
+executables are generated for collecting matrix statistics, running
+sparse-matrix vector product, solvers (possibly distributed) benchmarks. Another
+way to run benchmarks is through the convenience script `run_all_benchmarks.sh`,
+but not all features are exposed through this tool!
 
 Here is a short description of the content of this file:
 1. Ginkgo setup and best practice guidelines
@@ -114,8 +118,8 @@ done
 ```
 
 ### 3: Running benchmarks manually
-When compiling `ginkgo` with the flag `-DGINKGO_BUILD_BENCHMARKS=on`, a suite of
-executables will be generated depending on the `CMake` configuration. These
+When compiling Ginkgo with the flag `-DGINKGO_BUILD_BENCHMARKS=ON`, a suite of
+executables will be generated depending on the CMake configuration. These
 executables are the backbone of the benchmarking suite. Note that all of these
 executables describe the available options and the required input format when
 running them with the `--help` option. All executables have multiple variants
@@ -165,7 +169,10 @@ case, and many other benchmarks the following minimal input should be provided:
 ```
 The files have to be in matrix market format.
 
-Some benchmarks require some extra fields. For example the solver benchmarks requires the field `"optimal": {"spmv": "matrix format (such as csr)"}`. This is automatically populated when running the `spmv` benchmark which finds the optimal (fastest) format among all requested formats.
+Some benchmarks require some extra fields. For example the solver benchmarks
+requires the field `"optimal": {"spmv": "matrix format (such as csr)"}`. This is
+automatically populated when running the `spmv` benchmark which finds the
+optimal (fastest) format among all requested formats.
 
 After writing the necessary data in a JSON file, the benchmark can be called by
 passing in the input via stdin, i.e.
@@ -174,7 +181,8 @@ passing in the input via stdin, i.e.
 ./solver < input.json
 ```
 
-The output of our benchmarks is again JSON, and it is printed to stdout, while our status messages are printed to stderr. So, the output can be stored with
+The output of our benchmarks is again JSON, and it is printed to stdout, while
+our status messages are printed to stderr. So, the output can be stored with
 
 ```
 ./solver < input.json > output.json
@@ -312,10 +320,10 @@ log the time taken by all operations. These features are also available in the
 to analyze Ginkgo's performance.
 
 These features are implemented thanks to the loggers located in the file
-`${ginkgo_src_dir}/benchmark/utils/loggers.hpp`. Ginkgo possesses hooks at all important code
-location points which can be inspected thanks to the logger. In this fashion, it
-is easy to use these loggers also for tracking memory allocation sizes and other
-important library aspects.
+`${ginkgo_src_dir}/benchmark/utils/loggers.hpp`. Ginkgo possesses hooks at all
+important code location points which can be inspected thanks to the logger. In
+this fashion, it is easy to use these loggers also for tracking memory
+allocation sizes and other important library aspects.
 
 ### 7: Available benchmark options