Skip to content

Commit

Permalink
fix regression
Browse files Browse the repository at this point in the history
  • Loading branch information
jayhshah committed Jul 25, 2024
1 parent cd25ee5 commit cb8b453
Showing 1 changed file with 6 additions and 5 deletions.
11 changes: 6 additions & 5 deletions hopper/mainloop_fwd_sm90_tma_gmma_ws.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -692,10 +692,8 @@ struct CollectiveMainloopFwd {
++smem_pipe_write;
++smem_pipe_read;
}

scheduler.prefetch_next_work(scheduler_params, work_tile_info);

scheduler.broadcast_next_work(work_tile_info);
// scheduler.prefetch_next_work(scheduler_params, work_tile_info);
// scheduler.broadcast_next_work(work_tile_info);

}

Expand Down Expand Up @@ -1445,7 +1443,7 @@ struct CollectiveMainloopFwd {
consumer_wait(pipeline_k, smem_pipe_read);
warp_scheduler_barrier_sync();
flash::gemm</*zero_init=*/true, /*wg_wait=*/-1>(tiled_mma0, tSrQ, tSrK(_, _, _, smem_pipe_read.index()), tSrS);
warp_scheduler_barrier_arrive();
// warp_scheduler_barrier_arrive();
if (work_idx != 0) {
int lane_predicate = cute::elect_one_sync();
if (cutlass::canonical_warp_idx_sync() == Ktraits::kNWarps - 1 && lane_predicate) {
Expand All @@ -1457,6 +1455,7 @@ struct CollectiveMainloopFwd {
}
}
warpgroup_wait<0>();
warp_scheduler_barrier_arrive();
pipeline_k.consumer_release(smem_pipe_read);

auto col_limit_causal = [&](int row, int n_block) {
Expand Down Expand Up @@ -1529,6 +1528,7 @@ struct CollectiveMainloopFwd {
++smem_pipe_read;
}
}
#if 1
else {
CUTLASS_PRAGMA_UNROLL
for (int iter = 0; iter < extra_iterations && n_block >= 0; ++iter, --n_block) {
Expand Down Expand Up @@ -1557,6 +1557,7 @@ struct CollectiveMainloopFwd {
++smem_pipe_read;
}
}
#endif

if constexpr(Delay_V_release) {
warp_scheduler_barrier_sync();
Expand Down

0 comments on commit cb8b453

Please sign in to comment.