Skip to content

Commit

Permalink
Remove MatmulParams::rotate_ldmatrix_out_of_main_loop
Browse files Browse the repository at this point in the history
I can't find any commit in which this option was ever actually used.
This is the commit where the option was originally introduced:
https://github.com/csarofeen/pytorch/pull/2488/files#diff-e7a5a84a2cfeddeb15669f07105bdb3722a796600ea9e1f2eb25afb29283457eR22
We've gone this long without the ability to disable loop rotation, so
either we should change the condition in the schedulers to respect it,
or just remove it.
  • Loading branch information
jacobhinkle committed Nov 4, 2024
1 parent 7086d52 commit f825ebc
Show file tree
Hide file tree
Showing 4 changed files with 2 additions and 16 deletions.
1 change: 0 additions & 1 deletion csrc/python_frontend/python_bindings.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -651,7 +651,6 @@ void defineHeuristicParamBindings(py::module& nvfuser) {
.PARAM(MatmulParams, circular_buffer_options)
.PARAM(MatmulParams, supported_vec_size)
.PARAM(MatmulParams, async_gmem_load_operands)
.PARAM(MatmulParams, rotate_ldmatrix_out_of_main_loop)
.PARAM(MatmulParams, grid_swizzle_factor)
.PARAM(MatmulParams, use_smem_epilogue)
.PARAM(MatmulParams, promote_prologue_smem_reuse)
Expand Down
12 changes: 2 additions & 10 deletions csrc/scheduler/matmul_heuristic.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,6 @@ class MatmulParams : public HeuristicParams {
}
} supported_vec_size;

//! Whether to rotate the ldmatrix out of the main loop
bool rotate_ldmatrix_out_of_main_loop = true;

//! (Ampere+) Use cp.async to load operands.
bool async_gmem_load_operands = false;

Expand Down Expand Up @@ -191,8 +188,6 @@ class MatmulParams : public HeuristicParams {
<< circular_buffer_options.toString() << "\n"
<< supported_vec_size.toString() << "\n"
<< nvfuser::toString(tile_sizes) << "\n"
<< "Rotate ldmatrix out of main loop: "
<< (rotate_ldmatrix_out_of_main_loop ? "true" : "false") << "\n"
<< "Async global mem load: "
<< (async_gmem_load_operands ? "true" : "false") << "\n"
<< "Indexing mode: "
Expand All @@ -216,9 +211,8 @@ class MatmulParams : public HeuristicParams {

size_t hash() const override {
// combine boolean flags for hashing
size_t attr_hash = (static_cast<size_t>(promote_prologue_smem_reuse) << 3) |
(static_cast<size_t>(use_smem_epilogue) << 2) |
(static_cast<size_t>(rotate_ldmatrix_out_of_main_loop) << 1) |
size_t attr_hash = (static_cast<size_t>(promote_prologue_smem_reuse) << 2) |
(static_cast<size_t>(use_smem_epilogue) << 1) |
(static_cast<size_t>(async_gmem_load_operands));

// combined hash
Expand All @@ -240,8 +234,6 @@ class MatmulParams : public HeuristicParams {

return other->cparams == cparams && other->mma_macro == mma_macro &&
other->async_gmem_load_operands == async_gmem_load_operands &&
other->rotate_ldmatrix_out_of_main_loop ==
rotate_ldmatrix_out_of_main_loop &&
other->tile_sizes == tile_sizes &&
other->circular_buffer_options == circular_buffer_options &&
other->supported_vec_size == supported_vec_size &&
Expand Down
4 changes: 0 additions & 4 deletions csrc/scheduler/matmul_heuristic_plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,6 @@ void copyParamsToConfig(KernelConfig* config, const MatmulParams* mparams) {
: 1;
config->circular_buffer_smem_read =
mparams->circular_buffer_options.circular_buffer_smem_read;
config->rotate_ldmatrix_out_of_main_loop =
mparams->rotate_ldmatrix_out_of_main_loop;
config->problem.supported_vec_size.a = (uint8_t)mparams->supported_vec_size.a;
config->problem.supported_vec_size.b = (uint8_t)mparams->supported_vec_size.b;
config->problem.supported_vec_size.epilogue =
Expand Down Expand Up @@ -190,8 +188,6 @@ void copyConfigToParams(MatmulParams* mparams, const KernelConfig* config) {
}
mparams->circular_buffer_options.circular_buffer_smem_read =
config->circular_buffer_smem_read;
mparams->rotate_ldmatrix_out_of_main_loop =
config->rotate_ldmatrix_out_of_main_loop;

// enable circular buffering if configured
mparams->circular_buffer_options.circular_buffer_smem_write =
Expand Down
1 change: 0 additions & 1 deletion csrc/scheduler/matmul_heuristic_plugin_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@ struct KernelConfig {
uint8_t grid_swizzle_factor = 0;
uint8_t cta_order = 0;
bool circular_buffer_smem_read = true;
bool rotate_ldmatrix_out_of_main_loop = true;
bool async_gmem_load_operands = true;

public:
Expand Down

0 comments on commit f825ebc

Please sign in to comment.