Skip to content

Commit 0a301f8

Browse files
Antonyvanceamitchawla1
authored andcommitted
Reduce runtime check
1 parent d59e866 commit 0a301f8

File tree

2 files changed

+8
-4
lines changed

2 files changed

+8
-4
lines changed

include/cutlass/epilogue/collective/xe_array_epilogue.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,10 @@ class CollectiveEpilogue<
430430
CUTLASS_PRAGMA_UNROLL
431431
for (int epi_m = 0; epi_m < FragsM; epi_m++) {
432432

433-
if (is_C_load_needed) {
434-
if constexpr (is_source_supported) {
433+
//Instead of calling is_C_load_needed. We do heirachical check
434+
//so that runtime check not there when ElementC is void
435+
if constexpr (is_source_supported) {
436+
if (fusion_callbacks.is_C_load_needed()) {
435437
//cordinates for C and D are the same
436438
copy(params.xe_load_c.with(get<0>(load_store_tensors)), tCgD(_, epi_m, epi_n), trC);
437439
}

include/cutlass/epilogue/collective/xe_epilogue.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -411,8 +411,10 @@ class CollectiveEpilogue<
411411
for (int epi_m = 0; epi_m < FragsM; epi_m++) {
412412
cst_callbacks.begin_loop(epi_m, epi_n);
413413

414-
if (is_C_load_needed) {
415-
if constexpr (is_source_supported) {
414+
//Instead of calling is_C_load_needed. We do heirachical check
415+
//so that runtime check not there when ElementC is void
416+
if constexpr (is_source_supported) {
417+
if (fusion_callbacks.is_C_load_needed()) {
416418
copy(params.xe_load_c, tCgC(_, epi_m, epi_n), trC);
417419
}
418420
}

0 commit comments

Comments
 (0)