Skip to content

Commit

Permalink
Merge(#1652): Temporarily disable optimized batched solver instantiat…
Browse files Browse the repository at this point in the history
…ions

Temporarily disable optimized batched solver instantiations

Related PR: #1652
  • Loading branch information
pratikvn authored Aug 18, 2024
2 parents 2c06c8a + e5b261f commit 9f1c41b
Show file tree
Hide file tree
Showing 8 changed files with 342 additions and 320 deletions.
6 changes: 5 additions & 1 deletion core/solver/batch_bicgstab_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#include "core/base/kernel_declaration.hpp"


// TODO: update when splitting kernels
constexpr bool bicgstab_no_shared_vecs = true;


namespace gko {
namespace kernels {
namespace batch_bicgstab {
Expand Down Expand Up @@ -138,7 +142,7 @@ storage_config compute_shared_storage(const int available_shared_mem,
// {prec_shared, n_shared, n_global, gmem_stride_bytes, padded_vec_len}
storage_config sconf{false, 0, num_main_vecs, 0, num_rows};
// If available shared mem is zero, set all vecs to global.
if (rem_shared <= 0) {
if (rem_shared <= 0 || bicgstab_no_shared_vecs) {
set_gmem_stride_bytes<align_bytes>(sconf, vec_size, prec_storage);
return sconf;
}
Expand Down
6 changes: 5 additions & 1 deletion core/solver/batch_cg_kernels.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
#include "core/base/kernel_declaration.hpp"


// TODO: update when splitting compilation
constexpr bool cg_no_shared_vecs = true;


namespace gko {
namespace kernels {
namespace batch_cg {
Expand Down Expand Up @@ -126,7 +130,7 @@ storage_config compute_shared_storage(const int available_shared_mem,
// {prec_shared, n_shared, n_global, gmem_stride_bytes, padded_vec_len}
storage_config sconf{false, 0, num_main_vecs, 0, num_rows};
// If available shared mem is zero, set all vecs to global.
if (rem_shared <= 0) {
if (rem_shared <= 0 || cg_no_shared_vecs) {
set_gmem_stride_bytes<align_bytes>(sconf, vec_bytes, prec_storage);
return sconf;
}
Expand Down
130 changes: 66 additions & 64 deletions cuda/solver/batch_bicgstab_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -144,10 +144,11 @@ public:
const int shmem_per_blk =
get_max_dynamic_shared_memory<StopType, PrecType, LogType,
BatchMatrixType, value_type>(exec_);
const int block_size =
get_num_threads_per_block<StopType, PrecType, LogType,
BatchMatrixType, value_type>(
exec_, mat.num_rows);
// TODO
const int block_size = 256;
// get_num_threads_per_block<StopType, PrecType, LogType,
// BatchMatrixType, value_type>(
// exec_, mat.num_rows);
GKO_ASSERT(block_size >= 2 * config::warp_size);
const size_t prec_size = PrecType::dynamic_work_size(
Expand All @@ -167,68 +168,69 @@ public:
value_type* const workspace_data = workspace.get_data();
// TODO: split compilation
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared>
if (sconf.prec_shared) {
launch_apply_kernel<StopType, 9, true>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<StopType, 1, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<StopType, 2, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<StopType, 3, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<StopType, 4, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<StopType, 5, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 6:
launch_apply_kernel<StopType, 6, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 7:
launch_apply_kernel<StopType, 7, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 8:
launch_apply_kernel<StopType, 8, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 9:
launch_apply_kernel<StopType, 9, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
default:
GKO_NOT_IMPLEMENTED;
}
}
// if (sconf.prec_shared) {
// launch_apply_kernel<StopType, 9, true>(
// sconf, logger, prec, mat, b.values, x.values, workspace_data,
// block_size, shared_size);
// } else {
// switch (sconf.n_shared) {
// case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
// break;
// case 1:
// launch_apply_kernel<StopType, 1, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 2:
// launch_apply_kernel<StopType, 2, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 3:
// launch_apply_kernel<StopType, 3, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 4:
// launch_apply_kernel<StopType, 4, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 5:
// launch_apply_kernel<StopType, 5, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 6:
// launch_apply_kernel<StopType, 6, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 7:
// launch_apply_kernel<StopType, 7, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 8:
// launch_apply_kernel<StopType, 8, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 9:
// launch_apply_kernel<StopType, 9, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// default:
// GKO_NOT_IMPLEMENTED;
// }
// }
}
private:
Expand Down
83 changes: 43 additions & 40 deletions cuda/solver/batch_cg_kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -165,48 +165,51 @@ public:
value_type* const workspace_data = workspace.get_data();
// TODO: split compilation
// Only instantiate when full optimizations has been enabled. Otherwise,
// just use the default one with no shared memory.
// Template parameters launch_apply_kernel<StopType, n_shared,
// prec_shared>
if (sconf.prec_shared) {
launch_apply_kernel<StopType, 5, true>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
} else {
switch (sconf.n_shared) {
case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 1:
launch_apply_kernel<StopType, 1, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 2:
launch_apply_kernel<StopType, 2, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 3:
launch_apply_kernel<StopType, 3, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 4:
launch_apply_kernel<StopType, 4, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
case 5:
launch_apply_kernel<StopType, 5, false>(
sconf, logger, prec, mat, b.values, x.values,
workspace_data, block_size, shared_size);
break;
default:
GKO_NOT_IMPLEMENTED;
}
}
// if (sconf.prec_shared) {
// launch_apply_kernel<StopType, 5, true>(
// sconf, logger, prec, mat, b.values, x.values, workspace_data,
// block_size, shared_size);
// } else {
// switch (sconf.n_shared) {
// case 0:
launch_apply_kernel<StopType, 0, false>(
sconf, logger, prec, mat, b.values, x.values, workspace_data,
block_size, shared_size);
// break;
// case 1:
// launch_apply_kernel<StopType, 1, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 2:
// launch_apply_kernel<StopType, 2, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 3:
// launch_apply_kernel<StopType, 3, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 4:
// launch_apply_kernel<StopType, 4, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// case 5:
// launch_apply_kernel<StopType, 5, false>(
// sconf, logger, prec, mat, b.values, x.values,
// workspace_data, block_size, shared_size);
// break;
// default:
// GKO_NOT_IMPLEMENTED;
// }
// }
}
private:
Expand Down
Loading

0 comments on commit 9f1c41b

Please sign in to comment.