diff --git a/fbgemm_gpu/include/fbgemm_gpu/utils/tensor_utils.h b/fbgemm_gpu/include/fbgemm_gpu/utils/tensor_utils.h index 5f15006638..e21ee9c42d 100644 --- a/fbgemm_gpu/include/fbgemm_gpu/utils/tensor_utils.h +++ b/fbgemm_gpu/include/fbgemm_gpu/utils/tensor_utils.h @@ -46,6 +46,20 @@ inline std::string torch_tensor_device_name( } } +inline const std::string torch_tensor_shape_str(const at::Tensor& ten) { + std::stringstream ss; + const auto sizes = ten.sizes(); + ss << "["; + for (auto i = 0; i < sizes.size(); ++i) { + ss << sizes[i]; + if (i != sizes.size() - 1) { + ss << ", "; + } + } + ss << "]"; + return ss.str(); +} + inline bool torch_tensor_on_same_device_check( const at::Tensor& ten1, const at::Tensor& ten2) { diff --git a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu index a0d92c4538..59991ccc7e 100644 --- a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu +++ b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops.cu @@ -69,6 +69,12 @@ Tensor permute_pooled_embs_gpu_impl( return pooled_embs; } + TORCH_CHECK( + pooled_embs.dim() == 2, + "pooled_embs must be 2-D tensor of size [B_local][Sum_T_global(D)], " + "current shape is: ", + torch_tensor_shape_str(pooled_embs)); + // inv_permute_list is not being used so it's not checked here. TENSORS_ON_SAME_CUDA_GPU_IF_NOT_OPTIONAL( pooled_embs, offset_dim_list, permute_list, inv_offset_dim_list); diff --git a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp index 0c4d246042..37665e9514 100644 --- a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp +++ b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_cpu.cpp @@ -10,6 +10,7 @@ #include #include "fbgemm_gpu/permute_pooled_embedding_ops.h" #include "fbgemm_gpu/utils/dispatch_macros.h" +#include "fbgemm_gpu/utils/tensor_utils.h" using Tensor = at::Tensor; @@ -25,12 +26,19 @@ Tensor permute_pooled_embs_cpu_impl( if (pooled_embs.numel() == 0) { return pooled_embs; } + + TORCH_CHECK( + pooled_embs.dim() == 2, + "pooled_embs must be 2-D tensor of size [B_local][Sum_T_global(D)], " + "current shape is: ", + torch_tensor_shape_str(pooled_embs)); TORCH_CHECK( offset_dim_list.scalar_type() == at::ScalarType::Long, "offset_dim_list needs to have long/int64 type") TORCH_CHECK( permute_list.scalar_type() == at::ScalarType::Long, "permute_list needs to have long/int64 type") + auto permute = permute_list.data_ptr(); const auto n = permute_list.numel(); const auto dims_size = allow_duplicates ? offset_dim_list.numel() : n; diff --git a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu index 10a4b59b94..f40fb2629a 100644 --- a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu +++ b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split.cu @@ -68,6 +68,13 @@ Tensor permute_pooled_embs_split_gpu_impl( if (pooled_embs.numel() == 0) { return pooled_embs; } + + TORCH_CHECK( + pooled_embs.dim() == 2, + "pooled_embs must be 2-D tensor of size [B_local][Sum_T_global(D)], " + "current shape is: ", + torch_tensor_shape_str(pooled_embs)); + // inv_permute_list is not being used so it's not checked here. TENSORS_ON_SAME_CUDA_GPU_IF_NOT_OPTIONAL( pooled_embs, offset_dim_list, permute_list, inv_offset_dim_list); diff --git a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp index ccc7066c52..45bfebb911 100644 --- a/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp +++ b/fbgemm_gpu/src/permute_pooled_embedding_ops/permute_pooled_embedding_ops_split_cpu.cpp @@ -15,6 +15,7 @@ #include "fbgemm_gpu/permute_pooled_embedding_ops_split.h" #include "fbgemm_gpu/permute_pooled_embs_function_split.h" #include "fbgemm_gpu/utils/ops_utils.h" +#include "fbgemm_gpu/utils/tensor_utils.h" using Tensor = at::Tensor; @@ -34,12 +35,19 @@ Tensor permute_pooled_embs_split_cpu_impl( if (pooled_embs.numel() == 0) { return pooled_embs; } + + TORCH_CHECK( + pooled_embs.dim() == 2, + "pooled_embs must be 2-D tensor of size [B_local][Sum_T_global(D)], " + "current shape is: ", + torch_tensor_shape_str(pooled_embs)); TORCH_CHECK( offset_dim_list.scalar_type() == at::ScalarType::Long, "offset_dim_list needs to have long/int64 type") TORCH_CHECK( permute_list.scalar_type() == at::ScalarType::Long, "permute_list needs to have long/int64 type") + auto permute = permute_list.data_ptr(); const auto n = permute_list.numel(); const auto dims_size = allow_duplicates ? offset_dim_list.numel() : n;