ginkgo-project · tcojean · Jul 6, 2020 · Jun 23, 2020 · Jun 23, 2020 · Jun 23, 2020
diff --git a/CITING.md b/CITING.md
@@ -0,0 +1,94 @@
+# Citing Ginkgo                                           {#citing_ginkgo}
+
+The main Ginkgo paper describing Ginkgo's purpose, design and interface is
+available through the following reference:
+
+``` bibtex
+@misc{anzt2020ginkgo,
+    title={Ginkgo: A Modern Linear Operator Algebra Framework for High Performance Computing},
+    author={Hartwig Anzt and Terry Cojean and Goran Flegar and Fritz Göbel and Thomas Grützmacher and Pratik Nayak and Tobias Ribizel and Yuhsiang Mike Tsai and Enrique S. Quintana-Ortí},
+    year={2020},
+    eprint={2006.16852},
+    archivePrefix={arXiv},
+    primaryClass={cs.MS}
+}
+```
+
+Multiple topical papers exist on Ginkgo and its algorithms. The following papers
+can be used to cite specific aspects of the Ginkgo project.
+
+### On Portability
+
+``` bibtex
+@misc{tsai2020amdportability,
+    title={Preparing Ginkgo for AMD GPUs -- A Testimonial on Porting CUDA Code to HIP},
+    author={Yuhsiang M. Tsai and Terry Cojean and Tobias Ribizel and Hartwig Anzt},
+    year={2020},
+    eprint={2006.14290},
+    archivePrefix={arXiv},
+    primaryClass={cs.MS}
+}
+```
+
+### On Software Sustainability
+
+``` bibtex
+@inproceedings{anzt2019pasccb,
+author = {Anzt, Hartwig and Chen, Yen-Chen and Cojean, Terry and Dongarra, Jack and Flegar, Goran and Nayak, Pratik and Quintana-Ort\'{\i}, Enrique S. and Tsai, Yuhsiang M. and Wang, Weichung},
+title = {Towards Continuous Benchmarking: An Automated Performance Evaluation Framework for High Performance Software},
+year = {2019},
+isbn = {9781450367707},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+url = {https://doi.org/10.1145/3324989.3325719},
+doi = {10.1145/3324989.3325719},
+booktitle = {Proceedings of the Platform for Advanced Scientific Computing Conference},
+articleno = {9},
+numpages = {11},
+keywords = {interactive performance visualization, healthy software lifecycle, continuous integration, automated performance benchmarking},
+location = {Zurich, Switzerland},
+series = {PASC ’19}
+}
+```
+
+### On SpMV performance
+
+``` bibtex
+@InProceedings{tsai2020amdspmv,
+author="Tsai, Yuhsiang M.
+and Cojean, Terry
+and Anzt, Hartwig",
+editor="Sadayappan, Ponnuswamy
+and Chamberlain, Bradford L.
+and Juckeland, Guido
+and Ltaief, Hatem",
+title="Sparse Linear Algebra on AMD and NVIDIA GPUs -- The Race Is On",
+booktitle="High Performance Computing",
+year="2020",
+publisher="Springer International Publishing",
+address="Cham",
+pages="309--327",
+abstract="Efficiently processing sparse matrices is a central and performance-critical part of many scientific simulation codes. Recognizing the adoption of manycore accelerators in HPC, we evaluate in this paper the performance of the currently best sparse matrix-vector product (SpMV) implementations on high-end GPUs from AMD and NVIDIA. Specifically, we optimize SpMV kernels for the CSR, COO, ELL, and HYB format taking the hardware characteristics of the latest GPU technologies into account. We compare for 2,800 test matrices the performance of our kernels against AMD's hipSPARSE library and NVIDIA's cuSPARSE library, and ultimately assess how the GPU technologies from AMD and NVIDIA compare in terms of SpMV performance.",
+isbn="978-3-030-50743-5"
+}
+
+
+@article{anzt2020spmv,
+author = {Anzt, Hartwig and Cojean, Terry and Yen-Chen, Chen and Dongarra, Jack and Flegar, Goran and Nayak, Pratik and Tomov, Stanimire and Tsai, Yuhsiang M. and Wang, Weichung},
+title = {Load-Balancing Sparse Matrix Vector Product Kernels on GPUs},
+year = {2020},
+issue_date = {March 2020},
+publisher = {Association for Computing Machinery},
+address = {New York, NY, USA},
+volume = {7},
+number = {1},
+issn = {2329-4949},
+url = {https://doi.org/10.1145/3380930},
+doi = {10.1145/3380930},
+journal = {ACM Trans. Parallel Comput.},
+month = mar,
+articleno = {2},
+numpages = {26},
+keywords = {irregular matrices, GPUs, Sparse Matrix Vector Product (SpMV)}
+}
+```
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.9)
 
-project(Ginkgo LANGUAGES C CXX VERSION 1.1.1 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
+project(Ginkgo LANGUAGES C CXX VERSION 1.2.0 DESCRIPTION "A numerical linear algebra library targeting many-core architectures")
 set(Ginkgo_VERSION_TAG "develop")
 set(PROJECT_VERSION_TAG ${Ginkgo_VERSION_TAG})
 

diff --git a/README.md b/README.md
@@ -37,7 +37,7 @@ For Ginkgo core library:
 
 *   _cmake 3.9+_
 *   C++11 compliant compiler, one of:
-    *   _gcc 5.3+, 6.3+, 7.3+, 8.1+_
+    *   _gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_
     *   _clang 3.9+_
     *   _Intel compiler 2017+_
     *   _Apple LLVM 8.0+_ (__TODO__: verify)
@@ -50,6 +50,7 @@ The Ginkgo CUDA module has the following __additional__ requirements:
     [CUDA installation guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html)
     or [CUDA installation guide for Mac Os X](https://docs.nvidia.com/cuda/cuda-installation-guide-mac-os-x/index.html)
 
+
 In addition, if you want to contribute code to Ginkgo, you will also need the
 following:
 
@@ -59,6 +60,7 @@ following:
 
 The Ginkgo HIP module has the following __additional__ requirements:
 
+* _ROCm 2.8+_
 *    the HIP, hipBLAS and hipSPARSE packages compiled with either:
     * _AMD_ backend
     * _CUDA 9.0+_ backend. When using CUDA 10+, _cmake 3.12.2+_ is required.
@@ -68,8 +70,8 @@ The Ginkgo HIP module has the following __additional__ requirements:
 The prequirement needs to be verified
 *   _cmake 3.9+_
 *   C++11 compliant 64-bits compiler:
-    *   _MinGW : gcc 5.3+, 6.3+, 7.3+, 8.1+_
-    *   _Cygwin : gcc 5.3+, 6.3+, 7.3+, 8.1+_
+    *   _MinGW : gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_
+    *   _Cygwin : gcc 5.3+, 6.3+, 7.3+, all versions after 8.1+_
     *   _Microsoft Visual Studio : VS 2017 15.7+_
 
 __NOTE:__ Need to add `--autocrlf=input` after `git clone` in _Cygwin_.
@@ -140,12 +142,10 @@ Name Surname <email@domain> Institution(s)
 
 #### Contributing guidelines
 
-Contributing guidelines can be accessed in our Wiki under the [Developer's
-Homepage](https://github.com/ginkgo-project/ginkgo/wiki/Developers-Homepage).
-This page also contains other information useful to developers, such as writing
-proper commit messages, understanding Ginkgo's library design, relevant C++
-information, and more. In general, always refer to this page for developer
-information.
+Contributing guidelines can be accessed in the [CONTRIBUTING.md
+page](./CONTRIBUTING.md). This page also contains other information useful to
+developers, such as writing proper commit messages, understanding Ginkgo's
+library design, relevant C++ information, and more.
 
 ### Support
 If you have any question, bug to report or would like to propose a new feature,
@@ -164,3 +164,23 @@ Depending on the configuration options used when building Ginkgo, third party
 software may be pulled as additional dependencies, which have their own
 licensing conditions. Refer to [ABOUT-LICENSING.md](ABOUT-LICENSING.md) for
 details.
+
+Citing Ginkgo
+-------------
+
+The main Ginkgo paper describing Ginkgo's purpose, design and interface is
+available through the following reference:
+
+``` bibtex
+@misc{anzt2020ginkgo,
+    title={Ginkgo: A Modern Linear Operator Algebra Framework for High Performance Computing},
+    author={Hartwig Anzt and Terry Cojean and Goran Flegar and Fritz Göbel and Thomas Grützmacher and Pratik Nayak and Tobias Ribizel and Yuhsiang Mike Tsai and Enrique S. Quintana-Ortí},
+    year={2020},
+    eprint={2006.16852},
+    archivePrefix={arXiv},
+    primaryClass={cs.MS}
+}
+```
+
+For more information on topical subjects, please refer to the [CITING.md
+page](CITING.md).
diff --git a/benchmark/solver/solver.cpp b/benchmark/solver/solver.cpp
@@ -91,7 +91,8 @@ DEFINE_bool(overhead, false,
 
 
 // input validation
-[[noreturn]] void print_config_error_and_exit() {
+[[noreturn]] void print_config_error_and_exit()
+{
     std::cerr << "Input has to be a JSON array of matrix configurations:\n"
               << "  [\n"
               << "    { \"filename\": \"my_file.mtx\",  \"optimal\": { "

diff --git a/cuda/factorization/par_ilut_spgeam_kernel.cu b/cuda/factorization/par_ilut_spgeam_kernel.cu
@@ -159,14 +159,14 @@ void add_candidates(std::shared_ptr<const DefaultExecutor> exec,
     auto total_nnz =
         lu->get_num_stored_elements() + a->get_num_stored_elements();
     auto total_nnz_per_row = total_nnz / num_rows;
-    select_add_candidates(compiled_kernels(),
-                          [&](int compiled_subwarp_size) {
-                              return total_nnz_per_row <=
-                                         compiled_subwarp_size ||
-                                     compiled_subwarp_size == config::warp_size;
-                          },
-                          syn::value_list<int>(), syn::type_list<>(), exec, lu,
-                          a, l, u, l_new, u_new);
+    select_add_candidates(
+        compiled_kernels(),
+        [&](int compiled_subwarp_size) {
+            return total_nnz_per_row <= compiled_subwarp_size ||
+                   compiled_subwarp_size == config::warp_size;
+        },
+        syn::value_list<int>(), syn::type_list<>(), exec, lu, a, l, u, l_new,
+        u_new);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.cu
@@ -635,16 +635,17 @@ void spgeam(std::shared_ptr<const DefaultExecutor> exec,
     auto total_nnz =
         a->get_num_stored_elements() + b->get_num_stored_elements();
     auto nnz_per_row = total_nnz / a->get_size()[0];
-    select_spgeam(spgeam_kernels(),
-                  [&](int compiled_subwarp_size) {
-                      return compiled_subwarp_size >= nnz_per_row ||
-                             compiled_subwarp_size == config::warp_size;
-                  },
-                  syn::value_list<int>(), syn::type_list<>(), exec,
-                  alpha->get_const_values(), a->get_const_row_ptrs(),
-                  a->get_const_col_idxs(), a->get_const_values(),
-                  beta->get_const_values(), b->get_const_row_ptrs(),
-                  b->get_const_col_idxs(), b->get_const_values(), c);
+    select_spgeam(
+        spgeam_kernels(),
+        [&](int compiled_subwarp_size) {
+            return compiled_subwarp_size >= nnz_per_row ||
+                   compiled_subwarp_size == config::warp_size;
+        },
+        syn::value_list<int>(), syn::type_list<>(), exec,
+        alpha->get_const_values(), a->get_const_row_ptrs(),
+        a->get_const_col_idxs(), a->get_const_values(),
+        beta->get_const_values(), b->get_const_row_ptrs(),
+        b->get_const_col_idxs(), b->get_const_values(), c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);

diff --git a/cuda/preconditioner/jacobi_generate_kernel.cu b/cuda/preconditioner/jacobi_generate_kernel.cu
@@ -124,15 +124,15 @@ void generate(std::shared_ptr<const CudaExecutor> exec,
 {
     components::fill_array(exec, blocks.get_data(), blocks.get_num_elems(),
                            zero<ValueType>());
-    select_generate(compiled_kernels(),
-                    [&](int compiled_block_size) {
-                        return max_block_size <= compiled_block_size;
-                    },
-                    syn::value_list<int, config::min_warps_per_block>(),
-                    syn::type_list<>(), system_matrix, accuracy,
-                    blocks.get_data(), storage_scheme, conditioning.get_data(),
-                    block_precisions.get_data(),
-                    block_pointers.get_const_data(), num_blocks);
+    select_generate(
+        compiled_kernels(),
+        [&](int compiled_block_size) {
+            return max_block_size <= compiled_block_size;
+        },
+        syn::value_list<int, config::min_warps_per_block>(), syn::type_list<>(),
+        system_matrix, accuracy, blocks.get_data(), storage_scheme,
+        conditioning.get_data(), block_precisions.get_data(),
+        block_pointers.get_const_data(), num_blocks);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/cuda/preconditioner/jacobi_simple_apply_kernel.cu b/cuda/preconditioner/jacobi_simple_apply_kernel.cu
@@ -115,16 +115,16 @@ void simple_apply(
 {
     // TODO: write a special kernel for multiple RHS
     for (size_type col = 0; col < b->get_size()[1]; ++col) {
-        select_apply(compiled_kernels(),
-                     [&](int compiled_block_size) {
-                         return max_block_size <= compiled_block_size;
-                     },
-                     syn::value_list<int, config::min_warps_per_block>(),
-                     syn::type_list<>(), num_blocks,
-                     block_precisions.get_const_data(),
-                     block_pointers.get_const_data(), blocks.get_const_data(),
-                     storage_scheme, b->get_const_values() + col,
-                     b->get_stride(), x->get_values() + col, x->get_stride());
+        select_apply(
+            compiled_kernels(),
+            [&](int compiled_block_size) {
+                return max_block_size <= compiled_block_size;
+            },
+            syn::value_list<int, config::min_warps_per_block>(),
+            syn::type_list<>(), num_blocks, block_precisions.get_const_data(),
+            block_pointers.get_const_data(), blocks.get_const_data(),
+            storage_scheme, b->get_const_values() + col, b->get_stride(),
+            x->get_values() + col, x->get_stride());
     }
 }
 

diff --git a/doc/DoxygenLayout.xml b/doc/DoxygenLayout.xml
@@ -5,7 +5,8 @@
     <tab type="mainpage" visible="yes" title=""/>
     <tab type="user" visible="yes" title="Tutorial" url="https://github.com/ginkgo-project/ginkgo/wiki/Tutorial:-Building-a-Poisson-Solver" />
     <tab type="user" visible="yes" title="Examples" url="@ref Examples" />
-    <tab type="user" visible="yes" title="Contributing" url="@ref contributing_guidelines" />
+    <tab type="user" visible="yes" title="Citing Ginkgo" url="@ref citing_ginkgo" />
+    <tab type="user" visible="yes" title="Contributing To Ginkgo" url="@ref contributing_guidelines" />
     <tab type="user" visible="yes" title="Using Ginkgo" url="@ref install_ginkgo">
       <tab type="user" visible="yes" title="Installing Ginkgo" url="@ref install_ginkgo" />
       <tab type="user" visible="yes" title="Testing Ginkgo" url="@ref testing_ginkgo" />

diff --git a/doc/helpers.cmake b/doc/helpers.cmake
@@ -101,7 +101,7 @@ function(ginkgo_doc_gen name in pdf mainpage-in)
         ${doxygen_base_input}
         )
     # pick some markdown files we want as pages
-    set(doxygen_markdown_files "../../INSTALL.md ../../TESTING.md ../../BENCHMARKING.md ../../CONTRIBUTING.md")
+    set(doxygen_markdown_files "../../INSTALL.md ../../TESTING.md ../../BENCHMARKING.md ../../CONTRIBUTING.md ../../CITING.md")
     ginkgo_to_string(doxygen_base_input_str ${doxygen_base_input} )
     ginkgo_to_string(doxygen_dev_input_str ${doxygen_dev_input} )
     ginkgo_to_string(doxygen_image_path_str ${doxygen_image_path} )

diff --git a/examples/ginkgo-overhead/ginkgo-overhead.cpp b/examples/ginkgo-overhead/ginkgo-overhead.cpp
@@ -38,7 +38,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <iostream>
 
 
-[[noreturn]] void print_usage_and_exit(const char *name) {
+[[noreturn]] void print_usage_and_exit(const char *name)
+{
     std::cerr << "Usage: " << name << " [NUM_ITERS]" << std::endl;
     std::exit(-1);
 }

diff --git a/hip/factorization/par_ilut_spgeam_kernel.hip.cpp b/hip/factorization/par_ilut_spgeam_kernel.hip.cpp
@@ -165,14 +165,14 @@ void add_candidates(std::shared_ptr<const DefaultExecutor> exec,
     auto total_nnz =
         lu->get_num_stored_elements() + a->get_num_stored_elements();
     auto total_nnz_per_row = total_nnz / num_rows;
-    select_add_candidates(compiled_kernels(),
-                          [&](int compiled_subwarp_size) {
-                              return total_nnz_per_row <=
-                                         compiled_subwarp_size ||
-                                     compiled_subwarp_size == config::warp_size;
-                          },
-                          syn::value_list<int>(), syn::type_list<>(), exec, lu,
-                          a, l, u, l_new, u_new);
+    select_add_candidates(
+        compiled_kernels(),
+        [&](int compiled_subwarp_size) {
+            return total_nnz_per_row <= compiled_subwarp_size ||
+                   compiled_subwarp_size == config::warp_size;
+        },
+        syn::value_list<int>(), syn::type_list<>(), exec, lu, a, l, u, l_new,
+        u_new);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(

diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.hip.cpp
@@ -651,15 +651,16 @@ void advanced_spgemm(std::shared_ptr<const HipExecutor> exec,
 
         auto total_nnz = c_nnz + d->get_num_stored_elements();
         auto nnz_per_row = total_nnz / m;
-        select_spgeam(spgeam_kernels(),
-                      [&](int compiled_subwarp_size) {
-                          return compiled_subwarp_size >= nnz_per_row ||
-                                 compiled_subwarp_size == config::warp_size;
-                      },
-                      syn::value_list<int>(), syn::type_list<>(), exec,
-                      alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs,
-                      c_tmp_vals, beta->get_const_values(), d_row_ptrs,
-                      d_col_idxs, d_vals, c);
+        select_spgeam(
+            spgeam_kernels(),
+            [&](int compiled_subwarp_size) {
+                return compiled_subwarp_size >= nnz_per_row ||
+                       compiled_subwarp_size == config::warp_size;
+            },
+            syn::value_list<int>(), syn::type_list<>(), exec,
+            alpha->get_const_values(), c_tmp_row_ptrs, c_tmp_col_idxs,
+            c_tmp_vals, beta->get_const_values(), d_row_ptrs, d_col_idxs,
+            d_vals, c);
     } else {
         GKO_NOT_IMPLEMENTED;
     }
@@ -680,16 +681,17 @@ void spgeam(std::shared_ptr<const DefaultExecutor> exec,
     auto total_nnz =
         a->get_num_stored_elements() + b->get_num_stored_elements();
     auto nnz_per_row = total_nnz / a->get_size()[0];
-    select_spgeam(spgeam_kernels(),
-                  [&](int compiled_subwarp_size) {
-                      return compiled_subwarp_size >= nnz_per_row ||
-                             compiled_subwarp_size == config::warp_size;
-                  },
-                  syn::value_list<int>(), syn::type_list<>(), exec,
-                  alpha->get_const_values(), a->get_const_row_ptrs(),
-                  a->get_const_col_idxs(), a->get_const_values(),
-                  beta->get_const_values(), b->get_const_row_ptrs(),
-                  b->get_const_col_idxs(), b->get_const_values(), c);
+    select_spgeam(
+        spgeam_kernels(),
+        [&](int compiled_subwarp_size) {
+            return compiled_subwarp_size >= nnz_per_row ||
+                   compiled_subwarp_size == config::warp_size;
+        },
+        syn::value_list<int>(), syn::type_list<>(), exec,
+        alpha->get_const_values(), a->get_const_row_ptrs(),
+        a->get_const_col_idxs(), a->get_const_values(),
+        beta->get_const_values(), b->get_const_row_ptrs(),
+        b->get_const_col_idxs(), b->get_const_values(), c);
 }
 
 GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL);