Skip to content

Commit 21a84e1

Browse files
Antonyvanceamitchawla1
authored andcommitted
Reduce runtime check
1 parent ddc5b6e commit 21a84e1

File tree

2 files changed

+9
-6
lines changed

2 files changed

+9
-6
lines changed

include/cutlass/epilogue/collective/xe_array_epilogue.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -430,8 +430,10 @@ class CollectiveEpilogue<
430430
CUTLASS_PRAGMA_UNROLL
431431
for (int epi_m = 0; epi_m < FragsM; epi_m++) {
432432

433-
if (is_C_load_needed) {
434-
if constexpr (is_source_supported) {
433+
//Instead of calling is_C_load_needed. We do heirachical check
434+
//so that runtime check not there when ElementC is void
435+
if constexpr (is_source_supported) {
436+
if (fusion_callbacks.is_C_load_needed()) {
435437
//cordinates for C and D are the same
436438
copy(params.xe_load_c.with(get<0>(load_store_tensors)), tCgD(_, epi_m, epi_n), trC);
437439
}

include/cutlass/epilogue/collective/xe_epilogue.hpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -410,10 +410,11 @@ class CollectiveEpilogue<
410410
CUTLASS_PRAGMA_UNROLL
411411
for (int epi_m = 0; epi_m < FragsM; epi_m++) {
412412
cst_callbacks.begin_loop(epi_m, epi_n);
413-
414-
//avoid evaluating xe_load_c when ElementC is void during compilation
415-
if (is_C_load_needed) {
416-
if constexpr (is_source_supported) {
413+
414+
//Instead of calling is_C_load_needed. We do heirachical check
415+
//so that runtime check not there when ElementC is void
416+
if constexpr (is_source_supported) {
417+
if (fusion_callbacks.is_C_load_needed()) {
417418
copy(params.xe_load_c, tCgC(_, epi_m, epi_n), trC);
418419
}
419420
}

0 commit comments

Comments
 (0)