From 6fa452174affe3e0c741679e6120bdce3daf9802 Mon Sep 17 00:00:00 2001 From: Alejandro Acosta Date: Fri, 29 Nov 2024 14:08:27 +0000 Subject: [PATCH] Fix __dp4a call --- include/cutlass/numeric_conversion.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/include/cutlass/numeric_conversion.h b/include/cutlass/numeric_conversion.h index 664945d35..5b9fcf57b 100644 --- a/include/cutlass/numeric_conversion.h +++ b/include/cutlass/numeric_conversion.h @@ -3664,7 +3664,11 @@ struct NumericArrayConverter { CUTLASS_PRAGMA_UNROLL for (int ii = 0; ii < PackedResultType::kElements; ++ii) { +#if defined(__CUDA_ARCH__) t[ii] = __dp4a(x, mask[ii], 0); +#else + t[ii] = x * mask[ii]; +#endif r[ii] = static_cast(t[ii]); } #endif