Skip to content

Commit

Permalink
fixing device only code that get called in the host side
Browse files Browse the repository at this point in the history
  • Loading branch information
mehdi-goli committed Apr 11, 2024
1 parent aa9c364 commit 2be6a60
Showing 1 changed file with 94 additions and 45 deletions.
139 changes: 94 additions & 45 deletions include/cutlass/cutlass.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,205 +115,254 @@ static const int NumThreadsPerQuadPair = NumThreadsPerQuad * 2;
////////////////////////////////////////////////////////////////////////////////////////////////////

CUTLASS_HOST_DEVICE uint ThreadIdxX() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return threadIdx.x;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::x();
#else
return threadIdx.x;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint ThreadIdxY() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return threadIdx.y;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::y();
#else
return threadIdx.y;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint ThreadIdxZ() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return threadIdx.z;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::local_id::z();
#else
return threadIdx.z;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxX() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockIdx.x;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::x();
#else
return blockIdx.x;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxY() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockIdx.y;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::y();
#else
return blockIdx.y;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockIdxZ() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockIdx.z;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_id::z();
#else
return blockIdx.z;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimX() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockDim.x;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::x();
#else
return blockDim.x;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimY() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockDim.y;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::y();
#else
return blockDim.y;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint BlockDimZ() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return blockDim.z;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::work_group_range::z();
#else
return blockDim.z;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimX() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return gridDim.x;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::x();
#else
return gridDim.x;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimY() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return gridDim.y;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::y();
#else
return gridDim.y;
return 0;
#endif
}

CUTLASS_HOST_DEVICE uint GridDimZ() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return gridDim.z;
#elif defined(__SYCL_Device_ONLY__)
return syclcompat::global_range::z();
#else
return gridDim.z;
return 0;
#endif
}

// syncthreads

CUTLASS_DEVICE void syncthreads() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
__syncthreads();
#elif defined(__SYCL_Device_ONLY__)
syclcompat::wg_barrier();
#else
__syncthreads();
#endif
}

CUTLASS_DEVICE int syncthreads_and(int cond) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return __syncthreads_and(cond);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return __syncthreads_and(cond);
return 0;
#endif
}

CUTLASS_DEVICE void syncwarp() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
__syncwarp();
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
__syncwarp();
#endif
}

CUTLASS_DEVICE void threadfence() {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
__threadfence();
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
__threadfence();
#endif
}

// byte perm

CUTLASS_DEVICE
uint byte_perm(uint x, uint y, uint s) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return __byte_perm(x, y, s);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
#else
return __byte_perm(x, y, s);
return 0;
#endif
}

// shfl

CUTLASS_DEVICE
uint shfl_up_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return __shfl_up_sync(mask, var, delta, width);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
#else
return __shfl_up_sync(mask, var, delta, width);
return 0;
#endif
}

CUTLASS_DEVICE
uint shfl_down_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return __shfl_down_sync(mask, var, delta, width);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
#else
return __shfl_down_sync(mask, var, delta, width);
return 0;
#endif
}

CUTLASS_DEVICE
uint shfl_sync(const unsigned mask, const uint var, const int delta, const int width = NumThreadsPerWarp) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return __shfl_sync(mask, var, delta, width);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return 0;
#else
return __shfl_sync(mask, var, delta, width);
return 0;
#endif
}

// math

template <typename T>
CUTLASS_DEVICE T hfma2(const T a, const T b, const T c) {
#if defined(CUTLASS_ENABLE_SYCL)
#if defined(__CUDA_ARCH__)
return hfma2(a, b, c);
#elif defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
return T(0);
#else
return hfma2(a, b, c);
return T(0);
#endif
}

// atomic

#if defined(CUTLASS_ENABLE_SYCL)
CUTLASS_DEVICE int atomicAdd(int *address, int val) {
#if defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return 0;
#endif
}

CUTLASS_DEVICE int atomicCAS(int *address, int compare, int val) {
#if defined(__SYCL_Device_ONLY__)
// TODO: Add SYCL equivalent function
assert(false);
#else
return 0;
#endif
}
#endif

Expand Down

0 comments on commit 2be6a60

Please sign in to comment.