diff --git a/common/cuda_hip/matrix/csr_kernels.hpp.inc b/common/cuda_hip/matrix/csr_kernels.hpp.inc index 1fca1ee7215..c370075c8a8 100644 --- a/common/cuda_hip/matrix/csr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/csr_kernels.hpp.inc @@ -937,9 +937,6 @@ void convert_to_fbcsr(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL); - namespace kernel { @@ -1122,8 +1119,6 @@ void build_lookup(std::shared_ptr exec, storage); } -GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL); - template void fallback_transpose(std::shared_ptr exec, diff --git a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc index 27314c06a59..d71d593b0a2 100644 --- a/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc +++ b/common/cuda_hip/matrix/fbcsr_kernels.hpp.inc @@ -238,9 +238,6 @@ void fill_in_matrix_data(std::shared_ptr exec, }); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL); - namespace kernel { @@ -323,9 +320,6 @@ void fill_in_dense(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL); - template void convert_to_csr(const std::shared_ptr exec, @@ -345,9 +339,6 @@ void convert_to_csr(const std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); - template void is_sorted_by_column_index( @@ -372,23 +363,14 @@ void is_sorted_by_column_index( *is_sorted = exec->copy_val_to_host(gpu_array.get_data()); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); - template void sort_by_column_index(const std::shared_ptr exec, matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); - template void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr* orig, matrix::Diagonal* diag) GKO_NOT_IMPLEMENTED; - -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index aecf4e1c2f2..6cfb83a59e8 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -1,4 +1,9 @@ add_library(ginkgo_cuda $ "") +include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake) +add_instantiation_files(. matrix/csr_kernels.instantiate.cu CSR_INSTANTIATE) +add_instantiation_files(. matrix/fbcsr_kernels.instantiate.cu FBCSR_INSTANTIATE) +# we don't split up the dense kernels into distinct compliations +list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp) target_sources(ginkgo_cuda PRIVATE base/device.cpp @@ -31,11 +36,11 @@ target_sources(ginkgo_cuda factorization/par_ilut_spgeam_kernel.cu factorization/par_ilut_sweep_kernel.cu matrix/coo_kernels.cu - matrix/csr_kernels.cu + ${CSR_INSTANTIATE} matrix/dense_kernels.cu matrix/diagonal_kernels.cu matrix/ell_kernels.cu - matrix/fbcsr_kernels.cu + ${FBCSR_INSTANTIATE} matrix/fft_kernels.cu matrix/sellp_kernels.cu matrix/sparsity_csr_kernels.cu diff --git a/cuda/matrix/csr_kernels.instantiate.cu b/cuda/matrix/csr_kernels.instantiate.cu new file mode 100644 index 00000000000..75747bf074b --- /dev/null +++ b/cuda/matrix/csr_kernels.instantiate.cu @@ -0,0 +1,99 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "cuda/matrix/csr_kernels.template.cu" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup csr + */ +namespace csr { + + +// begin +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); +// end + + +} // namespace csr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/matrix/csr_kernels.cu b/cuda/matrix/csr_kernels.template.cu similarity index 96% rename from cuda/matrix/csr_kernels.cu rename to cuda/matrix/csr_kernels.template.cu index 619ead5bbbb..1b4b20a1e75 100644 --- a/cuda/matrix/csr_kernels.cu +++ b/cuda/matrix/csr_kernels.template.cu @@ -533,9 +533,6 @@ void spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_SPMV_KERNEL); - template @@ -598,9 +595,6 @@ void advanced_spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); - template void spgemm(std::shared_ptr exec, @@ -724,8 +718,6 @@ void spgemm(std::shared_ptr exec, #endif // CUDA_VERSION >= 11000 } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); - namespace { @@ -920,9 +912,6 @@ void advanced_spgemm(std::shared_ptr exec, #endif // CUDA_VERSION >= 11000 } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL); - template void spgeam(std::shared_ptr exec, @@ -948,8 +937,6 @@ void spgeam(std::shared_ptr exec, b->get_const_col_idxs(), b->get_const_values(), c); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); - template void fill_in_dense(std::shared_ptr exec, @@ -972,9 +959,6 @@ void fill_in_dense(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL); - template void transpose(std::shared_ptr exec, @@ -1024,8 +1008,6 @@ void transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); - template void conj_transpose(std::shared_ptr exec, @@ -1083,9 +1065,6 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); - template void inv_symm_permute(std::shared_ptr exec, @@ -1116,9 +1095,6 @@ void inv_symm_permute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); - template void row_permute(std::shared_ptr exec, @@ -1149,9 +1125,6 @@ void row_permute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); - template void inverse_row_permute(std::shared_ptr exec, @@ -1182,9 +1155,6 @@ void inverse_row_permute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); - template void calculate_nonzeros_per_row_in_span( @@ -1204,9 +1174,6 @@ void calculate_nonzeros_per_row_in_span( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); - template void compute_submatrix(std::shared_ptr exec, @@ -1233,9 +1200,6 @@ void compute_submatrix(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); - template void calculate_nonzeros_per_row_in_index_set( @@ -1245,9 +1209,6 @@ void calculate_nonzeros_per_row_in_index_set( const gko::index_set& col_index_set, IndexType* row_nnz) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); - template void compute_submatrix_from_index_set( @@ -1257,9 +1218,6 @@ void compute_submatrix_from_index_set( const gko::index_set& col_index_set, matrix::Csr* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); - template void sort_by_column_index(std::shared_ptr exec, @@ -1312,9 +1270,6 @@ void sort_by_column_index(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); - template void is_sorted_by_column_index( @@ -1336,9 +1291,6 @@ void is_sorted_by_column_index( cpu_array = gpu_array; } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); - template void extract_diagonal(std::shared_ptr exec, @@ -1364,8 +1316,6 @@ void extract_diagonal(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); - template void check_diagonal_entries_exist( @@ -1389,9 +1339,6 @@ void check_diagonal_entries_exist( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST); - template void add_scaled_identity(std::shared_ptr exec, @@ -1413,9 +1360,6 @@ void add_scaled_identity(std::shared_ptr exec, as_device_type(mtx->get_values())); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); - } // namespace csr } // namespace cuda diff --git a/cuda/matrix/fbcsr_kernels.instantiate.cu b/cuda/matrix/fbcsr_kernels.instantiate.cu new file mode 100644 index 00000000000..73c3fc136ba --- /dev/null +++ b/cuda/matrix/fbcsr_kernels.instantiate.cu @@ -0,0 +1,75 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "cuda/matrix/fbcsr_kernels.template.cu" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +// begin +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); +// end + + +} // namespace fbcsr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/matrix/fbcsr_kernels.cu b/cuda/matrix/fbcsr_kernels.template.cu similarity index 97% rename from cuda/matrix/fbcsr_kernels.cu rename to cuda/matrix/fbcsr_kernels.template.cu index 8160a0ac5a5..c629b292bfb 100644 --- a/cuda/matrix/fbcsr_kernels.cu +++ b/cuda/matrix/fbcsr_kernels.template.cu @@ -180,8 +180,6 @@ void spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); - template void advanced_spmv(std::shared_ptr exec, @@ -240,9 +238,6 @@ void advanced_spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); - namespace { @@ -305,9 +300,6 @@ void transpose(const std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); - template void conj_transpose(std::shared_ptr exec, @@ -325,9 +317,6 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); - } // namespace fbcsr } // namespace cuda diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index 61b06ad4058..7e0558844cf 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,3 +1,8 @@ +include(${PROJECT_SOURCE_DIR}/cmake/template_instantiation.cmake) +add_instantiation_files(. matrix/csr_kernels.instantiate.hip.cpp CSR_INSTANTIATE) +add_instantiation_files(. matrix/fbcsr_kernels.instantiate.hip.cpp FBCSR_INSTANTIATE) +# we don't split up the dense kernels into distinct compliations +list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp) set(GINKGO_HIP_SOURCES base/device.hip.cpp base/device_matrix_data_kernels.hip.cpp @@ -29,11 +34,11 @@ set(GINKGO_HIP_SOURCES factorization/par_ilut_spgeam_kernel.hip.cpp factorization/par_ilut_sweep_kernel.hip.cpp matrix/coo_kernels.hip.cpp - matrix/csr_kernels.hip.cpp + ${CSR_INSTANTIATE} matrix/dense_kernels.hip.cpp matrix/diagonal_kernels.hip.cpp matrix/ell_kernels.hip.cpp - matrix/fbcsr_kernels.hip.cpp + ${FBCSR_INSTANTIATE} matrix/sellp_kernels.hip.cpp matrix/sparsity_csr_kernels.hip.cpp multigrid/pgm_kernels.hip.cpp diff --git a/hip/matrix/csr_kernels.instantiate.hip.cpp b/hip/matrix/csr_kernels.instantiate.hip.cpp new file mode 100644 index 00000000000..498f3ec1795 --- /dev/null +++ b/hip/matrix/csr_kernels.instantiate.hip.cpp @@ -0,0 +1,99 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "hip/matrix/csr_kernels.template.hip.cpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The Compressed sparse row matrix format namespace. + * + * @ingroup csr + */ +namespace csr { + + +// begin +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONVERT_TO_FBCSR_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_CSR_BUILD_LOOKUP_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); +// end + + +} // namespace csr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/matrix/csr_kernels.hip.cpp b/hip/matrix/csr_kernels.template.hip.cpp similarity index 96% rename from hip/matrix/csr_kernels.hip.cpp rename to hip/matrix/csr_kernels.template.hip.cpp index b18cfa0f12b..e6a4fb64041 100644 --- a/hip/matrix/csr_kernels.hip.cpp +++ b/hip/matrix/csr_kernels.template.hip.cpp @@ -493,9 +493,6 @@ void spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_SPMV_KERNEL); - template @@ -558,9 +555,6 @@ void advanced_spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADVANCED_SPMV_KERNEL); - template void spgemm(std::shared_ptr exec, @@ -634,8 +628,6 @@ void spgemm(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); - namespace { @@ -775,9 +767,6 @@ void advanced_spgemm(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADVANCED_SPGEMM_KERNEL); - template void spgeam(std::shared_ptr exec, @@ -803,8 +792,6 @@ void spgeam(std::shared_ptr exec, b->get_const_col_idxs(), b->get_const_values(), c); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); - template void fill_in_dense(std::shared_ptr exec, @@ -827,9 +814,6 @@ void fill_in_dense(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_FILL_IN_DENSE_KERNEL); - template void transpose(std::shared_ptr exec, @@ -854,8 +838,6 @@ void transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); - template void conj_transpose(std::shared_ptr exec, @@ -888,9 +870,6 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CONJ_TRANSPOSE_KERNEL); - template void inv_symm_permute(std::shared_ptr exec, @@ -921,9 +900,6 @@ void inv_symm_permute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INV_SYMM_PERMUTE_KERNEL); - template void row_permute(std::shared_ptr exec, const IndexType* perm, @@ -953,9 +929,6 @@ void row_permute(std::shared_ptr exec, const IndexType* perm, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ROW_PERMUTE_KERNEL); - template void inverse_row_permute(std::shared_ptr exec, @@ -986,9 +959,6 @@ void inverse_row_permute(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_INVERSE_ROW_PERMUTE_KERNEL); - template void calculate_nonzeros_per_row_in_span( @@ -1009,9 +979,6 @@ void calculate_nonzeros_per_row_in_span( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_SPAN_KERNEL); - template void compute_submatrix(std::shared_ptr exec, @@ -1038,9 +1005,6 @@ void compute_submatrix(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_KERNEL); - template void calculate_nonzeros_per_row_in_index_set( @@ -1050,9 +1014,6 @@ void calculate_nonzeros_per_row_in_index_set( const gko::index_set& col_index_set, IndexType* row_nnz) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CALC_NNZ_PER_ROW_IN_INDEX_SET_KERNEL); - template void compute_submatrix_from_index_set( @@ -1062,9 +1023,6 @@ void compute_submatrix_from_index_set( const gko::index_set& col_index_set, matrix::Csr* result) GKO_NOT_IMPLEMENTED; -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_COMPUTE_SUB_MATRIX_FROM_INDEX_SET_KERNEL); - template void sort_by_column_index(std::shared_ptr exec, @@ -1110,9 +1068,6 @@ void sort_by_column_index(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_SORT_BY_COLUMN_INDEX); - template void is_sorted_by_column_index( @@ -1134,9 +1089,6 @@ void is_sorted_by_column_index( cpu_array = gpu_array; } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_IS_SORTED_BY_COLUMN_INDEX); - template void extract_diagonal(std::shared_ptr exec, @@ -1161,8 +1113,6 @@ void extract_diagonal(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); - template void check_diagonal_entries_exist( @@ -1186,9 +1136,6 @@ void check_diagonal_entries_exist( } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_CHECK_DIAGONAL_ENTRIES_EXIST); - template void add_scaled_identity(std::shared_ptr exec, @@ -1210,9 +1157,6 @@ void add_scaled_identity(std::shared_ptr exec, as_device_type(mtx->get_values())); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_CSR_ADD_SCALED_IDENTITY_KERNEL); - } // namespace csr } // namespace hip diff --git a/hip/matrix/fbcsr_kernels.instantiate.hip.cpp b/hip/matrix/fbcsr_kernels.instantiate.hip.cpp new file mode 100644 index 00000000000..8cf4944e08a --- /dev/null +++ b/hip/matrix/fbcsr_kernels.instantiate.hip.cpp @@ -0,0 +1,75 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "hip/matrix/fbcsr_kernels.template.hip.cpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The fixed-size block compressed sparse row matrix format namespace. + * + * @ingroup fbcsr + */ +namespace fbcsr { + + +// begin +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_FILL_IN_MATRIX_DATA_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_FILL_IN_DENSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONVERT_TO_CSR_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_IS_SORTED_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_SORT_BY_COLUMN_INDEX); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_EXTRACT_DIAGONAL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); +// split +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( + GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); +// end + + +} // namespace fbcsr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/matrix/fbcsr_kernels.hip.cpp b/hip/matrix/fbcsr_kernels.template.hip.cpp similarity index 96% rename from hip/matrix/fbcsr_kernels.hip.cpp rename to hip/matrix/fbcsr_kernels.template.hip.cpp index 8a4d78e7e40..88cad66753c 100644 --- a/hip/matrix/fbcsr_kernels.hip.cpp +++ b/hip/matrix/fbcsr_kernels.template.hip.cpp @@ -182,8 +182,6 @@ void spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); - template void advanced_spmv(std::shared_ptr exec, @@ -242,9 +240,6 @@ void advanced_spmv(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_ADVANCED_SPMV_KERNEL); - template void transpose(const std::shared_ptr exec, @@ -254,9 +249,6 @@ void transpose(const std::shared_ptr exec, fallback_transpose(exec, input, output); } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_TRANSPOSE_KERNEL); - template void conj_transpose(std::shared_ptr exec, @@ -274,9 +266,6 @@ void conj_transpose(std::shared_ptr exec, } } -GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( - GKO_DECLARE_FBCSR_CONJ_TRANSPOSE_KERNEL); - } // namespace fbcsr } // namespace hip diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 50f46cd23cd..d552cc612bf 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -1,4 +1,6 @@ add_library(ginkgo_omp $ "") +# we don't split up the dense kernels into distinct compliations +list(APPEND GKO_UNIFIED_COMMON_SOURCES ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp) target_sources(ginkgo_omp PRIVATE base/device_matrix_data_kernels.cpp @@ -39,7 +41,6 @@ target_sources(ginkgo_omp stop/criterion_kernels.cpp stop/residual_norm_kernels.cpp ${GKO_UNIFIED_COMMON_SOURCES} - ${PROJECT_SOURCE_DIR}/common/unified/matrix/dense_kernels.instantiate.cpp ) ginkgo_compile_features(ginkgo_omp)