Skip to content

Commit

Permalink
Updates and fixes to tensor_accessor.h (2I/N) (#3571)
Browse files Browse the repository at this point in the history
Summary:

X-link: facebookresearch/FBGEMM#656

- Fix `TensorAccessorBase` constructor to work with empty tensors, which are used in FBGEMM code

- Add better logging for errors

Reviewed By: basilwong

Differential Revision: D68048640
  • Loading branch information
q10 authored and facebook-github-bot committed Jan 22, 2025
1 parent 6a684cb commit 9034240
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,16 @@ for (const auto t : c10::irange(num_tables)) {
int feature_begin = table_to_feature_offset[t];
int64_t hash_size = get_hash_size(feature_begin);

#ifdef FBGEMM_GPU_MEMCHECK
const auto func_name = "::internal::csr2csc";
#endif
using weight_t = at::acc_type<scalar_t, true>;
::internal::csr2csc(
cscs[t],
B,
offsets.accessor<int64_t, 1>(),
indices.accessor<int64_t, 1>(),
indice_weights.defined()
? indice_weights.accessor<at::acc_type<scalar_t, true>, 1>()
: at::TensorAccessor<at::acc_type<scalar_t, true>, 1>(nullptr, nullptr, nullptr),
MAKE_TA_WITH_NAME(func_name, offsets, int64_t, 1),
MAKE_TA_WITH_NAME(func_name, indices, int64_t, 1),
MAKE_TA_WITH_NAME(func_name, indice_weights, weight_t, 1),
pooling_mode,
table_to_feature_offset + t,
hash_size);
Expand Down
37 changes: 19 additions & 18 deletions fbgemm_gpu/codegen/training/forward/embedding_forward_split_cpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "fbgemm_gpu/utils/cpu_utils.h"
#include "fbgemm_gpu/utils/dispatch_macros.h"
#include "fbgemm_gpu/utils/ops_utils.h"
#include "fbgemm_gpu/utils/tensor_accessor.h"
#ifdef FBCODE_CAFFE2
#include <libdivide.h>
#else
Expand Down Expand Up @@ -384,9 +385,9 @@ template <typename index_t, typename scalar_t, bool IS_VALUE_PAIR>
void csr2csc_template_(
HyperCompressedSparseColumn& csc,
int B,
const at::TensorAccessor<index_t, 1>& csr_offsets,
const at::TensorAccessor<index_t, 1>& csr_indices,
const at::TensorAccessor<scalar_t, 1>& csr_weights,
const pta::TensorAccessor<index_t, 1>& csr_offsets,
const pta::TensorAccessor<index_t, 1>& csr_indices,
const pta::TensorAccessor<scalar_t, 1>& csr_weights,
int64_t pooling_mode,
const int* table_to_feature_offset,
int64_t num_embeddings) {
Expand Down Expand Up @@ -585,9 +586,9 @@ void csr2csc_template_(
template void csr2csc_template_<index_t, scalar_t, is_value_pair>( \
HyperCompressedSparseColumn & csc, \
int B, \
const at::TensorAccessor<index_t, 1>& csr_offsets, \
const at::TensorAccessor<index_t, 1>& csr_indices, \
const at::TensorAccessor<scalar_t, 1>& csr_weights, \
const pta::TensorAccessor<index_t, 1>& csr_offsets, \
const pta::TensorAccessor<index_t, 1>& csr_indices, \
const pta::TensorAccessor<scalar_t, 1>& csr_weights, \
int64_t pooling_mode, \
const int* table_to_feature_offset, \
int64_t num_embeddings);
Expand All @@ -613,9 +614,9 @@ template <typename index_t, typename scalar_t>
void csr2csc(
HyperCompressedSparseColumn& csc,
int B,
const at::TensorAccessor<index_t, 1>& csr_offsets,
const at::TensorAccessor<index_t, 1>& csr_indices,
const at::TensorAccessor<scalar_t, 1>& csr_weights,
const pta::TensorAccessor<index_t, 1>& csr_offsets,
const pta::TensorAccessor<index_t, 1>& csr_indices,
const pta::TensorAccessor<scalar_t, 1>& csr_weights,
int64_t pooling_mode,
const int* table_to_feature_offset,
int64_t num_embeddings) {
Expand Down Expand Up @@ -644,15 +645,15 @@ void csr2csc(
}
}

#define INSTANTIATE_CSR2CSC_0(index_t, scalar_t) \
template void csr2csc<index_t, scalar_t>( \
HyperCompressedSparseColumn & csc, \
int B, \
const at::TensorAccessor<index_t, 1>& csr_offsets, \
const at::TensorAccessor<index_t, 1>& csr_indices, \
const at::TensorAccessor<scalar_t, 1>& csr_weights, \
int64_t pooling_mode, \
const int* table_to_feature_offset, \
#define INSTANTIATE_CSR2CSC_0(index_t, scalar_t) \
template void csr2csc<index_t, scalar_t>( \
HyperCompressedSparseColumn & csc, \
int B, \
const pta::TensorAccessor<index_t, 1>& csr_offsets, \
const pta::TensorAccessor<index_t, 1>& csr_indices, \
const pta::TensorAccessor<scalar_t, 1>& csr_weights, \
int64_t pooling_mode, \
const int* table_to_feature_offset, \
int64_t num_embeddings);

#define INSTANTIATE_CSR2CSC_1(index_t) \
Expand Down
7 changes: 4 additions & 3 deletions fbgemm_gpu/include/fbgemm_gpu/embedding_forward_split_cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <ATen/ATen.h>
#include <ATen/Parallel.h>
#include "fbgemm/Utils.h"
#include "fbgemm_gpu/utils/tensor_accessor.h"

at::Tensor split_embedding_codegen_forward_cpu(
at::Tensor weights,
Expand Down Expand Up @@ -120,9 +121,9 @@ template <typename index_t, typename scalar_t>
void csr2csc(
HyperCompressedSparseColumn& csc,
int B,
const at::TensorAccessor<index_t, 1>& csr_offsets,
const at::TensorAccessor<index_t, 1>& csr_indices,
const at::TensorAccessor<scalar_t, 1>& csr_weights,
const pta::TensorAccessor<index_t, 1>& csr_offsets,
const pta::TensorAccessor<index_t, 1>& csr_indices,
const pta::TensorAccessor<scalar_t, 1>& csr_weights,
int64_t pooling_mode,
const int* table_to_feature_offset,
int64_t num_embeddings);
Expand Down
Loading

0 comments on commit 9034240

Please sign in to comment.