Skip to content

Commit

Permalink
Fix __dp4a call
Browse files Browse the repository at this point in the history
  • Loading branch information
aacostadiaz committed Nov 29, 2024
1 parent 03e7fad commit 6fa4521
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions include/cutlass/numeric_conversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -3664,7 +3664,11 @@ struct NumericArrayConverter<float, int8_t, N, Round> {

CUTLASS_PRAGMA_UNROLL
for (int ii = 0; ii < PackedResultType::kElements; ++ii) {
#if defined(__CUDA_ARCH__)
t[ii] = __dp4a(x, mask[ii], 0);
#else
t[ii] = x * mask[ii];
#endif
r[ii] = static_cast<float>(t[ii]);
}
#endif
Expand Down

0 comments on commit 6fa4521

Please sign in to comment.