diff --git a/core/solver/batch_bicgstab_kernels.hpp b/core/solver/batch_bicgstab_kernels.hpp index 1eed30aba5a..615ed472597 100644 --- a/core/solver/batch_bicgstab_kernels.hpp +++ b/core/solver/batch_bicgstab_kernels.hpp @@ -15,6 +15,10 @@ #include "core/base/kernel_declaration.hpp" +// TODO: update when splitting kernels +constexpr bool bicgstab_no_shared_vecs = true; + + namespace gko { namespace kernels { namespace batch_bicgstab { @@ -138,7 +142,7 @@ storage_config compute_shared_storage(const int available_shared_mem, // {prec_shared, n_shared, n_global, gmem_stride_bytes, padded_vec_len} storage_config sconf{false, 0, num_main_vecs, 0, num_rows}; // If available shared mem is zero, set all vecs to global. - if (rem_shared <= 0) { + if (rem_shared <= 0 || bicgstab_no_shared_vecs) { set_gmem_stride_bytes(sconf, vec_size, prec_storage); return sconf; } diff --git a/core/solver/batch_cg_kernels.hpp b/core/solver/batch_cg_kernels.hpp index 6fdb595862e..b21a2c07d3e 100644 --- a/core/solver/batch_cg_kernels.hpp +++ b/core/solver/batch_cg_kernels.hpp @@ -15,6 +15,10 @@ #include "core/base/kernel_declaration.hpp" +// TODO: update when splitting compilation +constexpr bool cg_no_shared_vecs = true; + + namespace gko { namespace kernels { namespace batch_cg { @@ -126,7 +130,7 @@ storage_config compute_shared_storage(const int available_shared_mem, // {prec_shared, n_shared, n_global, gmem_stride_bytes, padded_vec_len} storage_config sconf{false, 0, num_main_vecs, 0, num_rows}; // If available shared mem is zero, set all vecs to global. - if (rem_shared <= 0) { + if (rem_shared <= 0 || cg_no_shared_vecs) { set_gmem_stride_bytes(sconf, vec_bytes, prec_storage); return sconf; } diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu index 6b3dca28607..3c7fe50709c 100644 --- a/cuda/solver/batch_bicgstab_kernels.cu +++ b/cuda/solver/batch_bicgstab_kernels.cu @@ -144,10 +144,11 @@ public: const int shmem_per_blk = get_max_dynamic_shared_memory(exec_); - const int block_size = - get_num_threads_per_block( - exec_, mat.num_rows); + // TODO + const int block_size = 256; + // get_num_threads_per_block( + // exec_, mat.num_rows); GKO_ASSERT(block_size >= 2 * config::warp_size); const size_t prec_size = PrecType::dynamic_work_size( @@ -167,68 +168,69 @@ public: value_type* const workspace_data = workspace.get_data(); + // TODO: split compilation // Template parameters launch_apply_kernel - if (sconf.prec_shared) { - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - block_size, shared_size); - } else { - switch (sconf.n_shared) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 6: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 7: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 8: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 9: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (sconf.prec_shared) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // block_size, shared_size); + // } else { + // switch (sconf.n_shared) { + // case 0: + launch_apply_kernel( + sconf, logger, prec, mat, b.values, x.values, workspace_data, + block_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 6: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 7: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 8: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 9: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: diff --git a/cuda/solver/batch_cg_kernels.cu b/cuda/solver/batch_cg_kernels.cu index 746be0365e7..b681bd13ce3 100644 --- a/cuda/solver/batch_cg_kernels.cu +++ b/cuda/solver/batch_cg_kernels.cu @@ -165,48 +165,51 @@ public: value_type* const workspace_data = workspace.get_data(); + // TODO: split compilation + // Only instantiate when full optimizations has been enabled. Otherwise, + // just use the default one with no shared memory. // Template parameters launch_apply_kernel - if (sconf.prec_shared) { - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - block_size, shared_size); - } else { - switch (sconf.n_shared) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (sconf.prec_shared) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // block_size, shared_size); + // } else { + // switch (sconf.n_shared) { + // case 0: + launch_apply_kernel( + sconf, logger, prec, mat, b.values, x.values, workspace_data, + block_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp index 344e4af56b9..bb84283b49f 100644 --- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp +++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp @@ -159,77 +159,80 @@ class kernel_caller { ValueType* const workspace_data = workspace.get_data(); int n_shared_total = sconf.n_shared + int(sconf.prec_shared); + // TODO: split compilation + // Only instantiate when full optimizations has been enabled. Otherwise, + // just use the default one with no shared memory. // template // launch_apply_kernel - if (num_rows <= 32 && n_shared_total == 10) { - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - group_size, shared_size); - } else if (num_rows <= 256 && n_shared_total == 10) { - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - group_size, shared_size); - } else { - switch (n_shared_total) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 6: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 7: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 8: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 9: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 10: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (num_rows <= 32 && n_shared_total == 10) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // group_size, shared_size); + // } else if (num_rows <= 256 && n_shared_total == 10) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // group_size, shared_size); + // } else { + // switch (n_shared_total) { + // case 0: + launch_apply_kernel(sconf, logger, prec, mat, b.values, + x.values, workspace_data, + group_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 6: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 7: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 8: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 9: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 10: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: diff --git a/dpcpp/solver/batch_cg_kernels.dp.cpp b/dpcpp/solver/batch_cg_kernels.dp.cpp index 0787afa6fd3..61591f9efb6 100644 --- a/dpcpp/solver/batch_cg_kernels.dp.cpp +++ b/dpcpp/solver/batch_cg_kernels.dp.cpp @@ -158,53 +158,55 @@ class kernel_caller { ValueType* const workspace_data = workspace.get_data(); int n_shared_total = sconf.n_shared + int(sconf.prec_shared); + // Only instantiate when full optimizations has been enabled. Otherwise, + // just use the default one with no shared memory. // template // launch_apply_kernel - if (num_rows <= 32 && n_shared_total == 6) { - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - group_size, shared_size); - } else { - switch (n_shared_total) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - case 6: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, group_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (num_rows <= 32 && n_shared_total == 6) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // group_size, shared_size); + // } else { + // switch (n_shared_total) { + // case 0: + launch_apply_kernel(sconf, logger, prec, mat, b.values, + x.values, workspace_data, + group_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // case 6: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, group_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp index 95a49953b3e..ca49fa5eb9c 100644 --- a/hip/solver/batch_bicgstab_kernels.hip.cpp +++ b/hip/solver/batch_bicgstab_kernels.hip.cpp @@ -149,68 +149,70 @@ class kernel_caller { value_type* const workspace_data = workspace.get_data(); + // Only instantiate when full optimizations has been enabled. Otherwise, + // just use the default one with no shared memory. // Template parameters launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - block_size, shared_size); - } else { - switch (sconf.n_shared) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 6: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 7: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 8: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 9: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (sconf.prec_shared) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // block_size, shared_size); + // } else { + // switch (sconf.n_shared) { + // case 0: + launch_apply_kernel( + sconf, logger, prec, mat, b.values, x.values, workspace_data, + block_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 6: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 7: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 8: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 9: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: diff --git a/hip/solver/batch_cg_kernels.hip.cpp b/hip/solver/batch_cg_kernels.hip.cpp index 6102749b988..3a1642edfea 100644 --- a/hip/solver/batch_cg_kernels.hip.cpp +++ b/hip/solver/batch_cg_kernels.hip.cpp @@ -149,48 +149,50 @@ class kernel_caller { value_type* const workspace_data = workspace.get_data(); + // Only instantiate when full optimizations has been enabled. Otherwise, + // just use the default one with no shared memory. // Template parameters launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, workspace_data, - block_size, shared_size); - } else { - switch (sconf.n_shared) { - case 0: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 1: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 2: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 3: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 4: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - case 5: - launch_apply_kernel( - sconf, logger, prec, mat, b.values, x.values, - workspace_data, block_size, shared_size); - break; - default: - GKO_NOT_IMPLEMENTED; - } - } + // if (sconf.prec_shared) { + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, workspace_data, + // block_size, shared_size); + // } else { + // switch (sconf.n_shared) { + // case 0: + launch_apply_kernel( + sconf, logger, prec, mat, b.values, x.values, workspace_data, + block_size, shared_size); + // break; + // case 1: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 2: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 3: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 4: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // case 5: + // launch_apply_kernel( + // sconf, logger, prec, mat, b.values, x.values, + // workspace_data, block_size, shared_size); + // break; + // default: + // GKO_NOT_IMPLEMENTED; + // } + // } } private: