From ae6989a3e57e7f42200c786902b1e25bbd9585f9 Mon Sep 17 00:00:00 2001 From: Atharva Dubey Date: Tue, 16 Apr 2024 15:45:09 +0100 Subject: [PATCH] Fix typo in Macro (#28) Fix typo in Macro Co-authored-by: Mehdi Goli * Cosmetic --------- Co-authored-by: Mehdi Goli * Applying the comments --------- Co-authored-by: aacostadiaz * Revert "Updating README-sycl.md to capture the 3.5 modifications (#16)" (#17) This reverts commit a726bd30735fd50956edf9c8cfc59bb6ac398b02. * fix typo in macro --------- Co-authored-by: Mehdi Goli Co-authored-by: aacostadiaz --- include/cutlass/cutlass.h | 46 +++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/include/cutlass/cutlass.h b/include/cutlass/cutlass.h index 2bd5b6d89f..32851afdb5 100644 --- a/include/cutlass/cutlass.h +++ b/include/cutlass/cutlass.h @@ -117,7 +117,7 @@ static const int NumThreadsPerQuadPair = NumThreadsPerQuad * 2; CUTLASS_HOST_DEVICE uint ThreadIdxX() { #if defined(__CUDA_ARCH__) return threadIdx.x; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::local_id::x(); #else return 0; @@ -127,7 +127,7 @@ CUTLASS_HOST_DEVICE uint ThreadIdxX() { CUTLASS_HOST_DEVICE uint ThreadIdxY() { #if defined(__CUDA_ARCH__) return threadIdx.y; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::local_id::y(); #else return 0; @@ -137,7 +137,7 @@ CUTLASS_HOST_DEVICE uint ThreadIdxY() { CUTLASS_HOST_DEVICE uint ThreadIdxZ() { #if defined(__CUDA_ARCH__) return threadIdx.z; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::local_id::z(); #else return 0; @@ -147,7 +147,7 @@ CUTLASS_HOST_DEVICE uint ThreadIdxZ() { CUTLASS_HOST_DEVICE uint BlockIdxX() { #if defined(__CUDA_ARCH__) return blockIdx.x; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_id::x(); #else return 0; @@ -157,7 +157,7 @@ CUTLASS_HOST_DEVICE uint BlockIdxX() { CUTLASS_HOST_DEVICE uint BlockIdxY() { #if defined(__CUDA_ARCH__) return blockIdx.y; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_id::y(); #else return 0; @@ -167,7 +167,7 @@ CUTLASS_HOST_DEVICE uint BlockIdxY() { CUTLASS_HOST_DEVICE uint BlockIdxZ() { #if defined(__CUDA_ARCH__) return blockIdx.z; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_id::z(); #else return 0; @@ -177,7 +177,7 @@ CUTLASS_HOST_DEVICE uint BlockIdxZ() { CUTLASS_HOST_DEVICE uint BlockDimX() { #if defined(__CUDA_ARCH__) return blockDim.x; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_range::x(); #else return 0; @@ -187,7 +187,7 @@ CUTLASS_HOST_DEVICE uint BlockDimX() { CUTLASS_HOST_DEVICE uint BlockDimY() { #if defined(__CUDA_ARCH__) return blockDim.y; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_range::y(); #else return 0; @@ -197,7 +197,7 @@ CUTLASS_HOST_DEVICE uint BlockDimY() { CUTLASS_HOST_DEVICE uint BlockDimZ() { #if defined(__CUDA_ARCH__) return blockDim.z; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::work_group_range::z(); #else return 0; @@ -207,7 +207,7 @@ CUTLASS_HOST_DEVICE uint BlockDimZ() { CUTLASS_HOST_DEVICE uint GridDimX() { #if defined(__CUDA_ARCH__) return gridDim.x; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::global_range::x(); #else return 0; @@ -217,7 +217,7 @@ CUTLASS_HOST_DEVICE uint GridDimX() { CUTLASS_HOST_DEVICE uint GridDimY() { #if defined(__CUDA_ARCH__) return gridDim.y; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::global_range::y(); #else return 0; @@ -227,7 +227,7 @@ CUTLASS_HOST_DEVICE uint GridDimY() { CUTLASS_HOST_DEVICE uint GridDimZ() { #if defined(__CUDA_ARCH__) return gridDim.z; -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) return syclcompat::global_range::z(); #else return 0; @@ -239,7 +239,7 @@ CUTLASS_HOST_DEVICE uint GridDimZ() { CUTLASS_DEVICE void syncthreads() { #if defined(__CUDA_ARCH__) __syncthreads(); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) syclcompat::wg_barrier(); #endif } @@ -247,7 +247,7 @@ CUTLASS_DEVICE void syncthreads() { CUTLASS_DEVICE int syncthreads_and(int cond) { #if defined(__CUDA_ARCH__) return __syncthreads_and(cond); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); #else @@ -258,7 +258,7 @@ CUTLASS_DEVICE int syncthreads_and(int cond) { CUTLASS_DEVICE void syncwarp() { #if defined(__CUDA_ARCH__) __syncwarp(); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); #endif @@ -267,7 +267,7 @@ CUTLASS_DEVICE void syncwarp() { CUTLASS_DEVICE void threadfence() { #if defined(__CUDA_ARCH__) __threadfence(); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); #endif @@ -279,7 +279,7 @@ CUTLASS_DEVICE uint byte_perm(uint x, uint y, uint s) { #if defined(__CUDA_ARCH__) return __byte_perm(x, y, s); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); return 0; @@ -294,7 +294,7 @@ CUTLASS_DEVICE uint shfl_up_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) { #if defined(__CUDA_ARCH__) return __shfl_up_sync(mask, var, delta, width); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); return 0; @@ -307,7 +307,7 @@ CUTLASS_DEVICE uint shfl_down_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) { #if defined(__CUDA_ARCH__) return __shfl_down_sync(mask, var, delta, width); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); return 0; @@ -320,7 +320,7 @@ CUTLASS_DEVICE uint shfl_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) { #if defined(__CUDA_ARCH__) return __shfl_sync(mask, var, delta, width); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); return 0; @@ -335,7 +335,7 @@ template CUTLASS_DEVICE T hfma2(const T a, const T b, const T c) { #if defined(__CUDA_ARCH__) return hfma2(a, b, c); -#elif defined(__SYCL_Device_ONLY__) +#elif defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); return T(0); @@ -348,7 +348,7 @@ CUTLASS_DEVICE T hfma2(const T a, const T b, const T c) { #if defined(CUTLASS_ENABLE_SYCL) CUTLASS_DEVICE int atomicAdd(int *address, int val) { -#if defined(__SYCL_Device_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); #else @@ -357,7 +357,7 @@ CUTLASS_DEVICE int atomicAdd(int *address, int val) { } CUTLASS_DEVICE int atomicCAS(int *address, int compare, int val) { -#if defined(__SYCL_Device_ONLY__) +#if defined(__SYCL_DEVICE_ONLY__) // TODO: Add SYCL equivalent function assert(false); #else