From ab7cb3ace41d24aa6794739528eb714c31402f9a Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Wed, 17 Sep 2025 16:25:35 +0330 Subject: [PATCH 1/7] resolve negative zero --- cpp/src/arrow/sparse_tensor_test.cc | 148 ++++++++++++++++++++++ cpp/src/arrow/tensor.cc | 12 +- cpp/src/arrow/tensor.h | 7 + cpp/src/arrow/tensor/converter.h | 53 ++++++++ cpp/src/arrow/tensor/converter_internal.h | 88 ------------- cpp/src/arrow/tensor/coo_converter.cc | 131 ++++++++++--------- cpp/src/arrow/tensor/csf_converter.cc | 85 ++++++------- cpp/src/arrow/tensor/csx_converter.cc | 83 ++++++------ 8 files changed, 353 insertions(+), 254 deletions(-) delete mode 100644 cpp/src/arrow/tensor/converter_internal.h diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index c9c28a11b1b39..406c72d0e2ec4 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -413,6 +413,71 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } +TEST_F(TestSparseCOOTensor, CreationFromVectorTensorWithNegZero) { + std::vector data{ + -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, -0.0, 0.0, -0.0, -1.0, -0.0, -0.0, + }; + std::vector shape = {12}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {4}); + AssertCOOIndex(si->indices(), 1, {9}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + +TEST_F(TestSparseCOOTensor, CreationFromContiguousDenseTensorWithNegZero) { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {1, 1}); + AssertCOOIndex(si->indices(), 1, {3, 0}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + +TEST_F(TestSparseCOOTensor, CreationFromNonContiguousDenseTensorWithNegZero) { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, 1.0, 2.0, + -0.0, 4.0, -0.0, 0.0, -0.0, + -0.0, 0.0, -0.0, 3.0, 4.0, + -1.0, -0.0, -0.0, 0.0, 0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, + Tensor::Make(float32(), buffer, shape, {20, 4})); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {1, 1}); + AssertCOOIndex(si->indices(), 1, {3, 0}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + template class TestSparseCOOTensorEquality : public TestSparseTensorBase { public: @@ -869,6 +934,33 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } +TEST_F(TestSparseCSRMatrix, CreationFromTensorWithNegZero) { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor, + SparseCSRMatrix::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csr_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csr_tensor->sparse_index()); + const auto* indptr = si->indptr()->data()->data_as(); + const auto* indices = si->indices()->data()->data_as(); + ASSERT_EQ(indptr[2], 1); + ASSERT_EQ(indptr[4], 2); + ASSERT_EQ(indices[0], 1); + ASSERT_EQ(indices[1], 0); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + template class TestSparseCSRMatrixEquality : public TestSparseTensorBase { public: @@ -1204,6 +1296,33 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } +TEST_F(TestSparseCSCMatrix, CreationFromTensorWithNegZero) { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor, + SparseCSCMatrix::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csc_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csc_tensor->sparse_index()); + const auto* indptr = si->indptr()->data()->data_as(); + const auto* indices = si->indices()->data()->data_as(); + ASSERT_EQ(indptr[1], 1); + ASSERT_EQ(indptr[2], 2); + ASSERT_EQ(indices[0], 3); + ASSERT_EQ(indices[1], 1); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + template class TestSparseCSCMatrixEquality : public TestSparseTensorBase { public: @@ -1479,6 +1598,35 @@ TEST_F(TestSparseCSFTensor, CreationFromZeroTensor) { ASSERT_TRUE(t->Equals(*t_zero)); } +TEST_F(TestSparseCSFTensor, CreationFromTensorWithNegZero) { + // clang-format off + std::vector data{ + -0.0, -0.0, -0.0, -0.0, + 4.0, -0.0, -0.0, -0.0, + 0.0, -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {3, 4}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor, + SparseCSFTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csf_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csf_tensor->sparse_index()); + auto indptr = si->indptr()[0]->data()->data_as(); + auto row_indices = si->indices()[0]->data()->data_as(); + auto column_indices = si->indices()[1]->data()->data_as(); + ASSERT_EQ(indptr[1], 1); + ASSERT_EQ(indptr[2], 2); + EXPECT_EQ(row_indices[0], 1); + EXPECT_EQ(row_indices[1], 2); + EXPECT_EQ(column_indices[0], 0); + EXPECT_EQ(column_indices[1], 1); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +} + template class TestSparseCSFTensorForIndexValueType : public TestSparseCSFTensorBase { diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 8cdf7f82d2642..d3dd96b81095f 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -35,6 +35,7 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/int_util_overflow.h" #include "arrow/util/logging_internal.h" +#include "arrow/util/macros.h" #include "arrow/util/unreachable.h" #include "arrow/visit_type_inline.h" @@ -85,7 +86,7 @@ Status ComputeColumnMajorStrides(const FixedWidthType& type, if (!shape.empty() && shape.back() > 0) { total = byte_width; for (size_t i = 0; i < ndim - 1; ++i) { - if (internal::MultiplyWithOverflow(total, shape[i], &total)) { + if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow(total, shape[i], &total))) { return Status::Invalid( "Column-major strides computed from shape would not fit in 64-bit " "integer"); @@ -485,13 +486,14 @@ namespace { template int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& tensor) { using c_type = typename TYPE::c_type; - const c_type zero = c_type(0); int64_t nnz = 0; if (dim_index == tensor.ndim() - 1) { for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) { const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index]; - auto& elem = *reinterpret_cast(ptr); - if (elem != zero) ++nnz; + if (auto& elem = *reinterpret_cast(ptr); + internal::is_not_zero(elem)) { + ++nnz; + } } return nnz; } @@ -507,7 +509,7 @@ int64_t ContiguousTensorCountNonZero(const Tensor& tensor) { using c_type = typename TYPE::c_type; auto* data = reinterpret_cast(tensor.raw_data()); return std::count_if(data, data + tensor.size(), - [](const c_type& x) { return x != 0; }); + [](const c_type& x) { return internal::is_not_zero(x); }); } template diff --git a/cpp/src/arrow/tensor.h b/cpp/src/arrow/tensor.h index beb62a11bdce9..7af5cefbcbf18 100644 --- a/cpp/src/arrow/tensor.h +++ b/cpp/src/arrow/tensor.h @@ -55,6 +55,13 @@ constexpr bool is_tensor_supported(Type::type type_id) { namespace internal { +// TODO(GH-47578): Enable HalfFloatType +template +inline bool is_not_zero(typename ValueDataType::c_type value) { + typename ValueDataType::c_type zero = 0; + return value != zero; +} + ARROW_EXPORT Status ComputeRowMajorStrides(const FixedWidthType& type, const std::vector& shape, diff --git a/cpp/src/arrow/tensor/converter.h b/cpp/src/arrow/tensor/converter.h index 408ab22305fff..fd23f83c8e8f0 100644 --- a/cpp/src/arrow/tensor/converter.h +++ b/cpp/src/arrow/tensor/converter.h @@ -20,6 +20,9 @@ #include "arrow/sparse_tensor.h" // IWYU pragma: export #include +#include + +#include "arrow/visit_type_inline.h" namespace arrow { namespace internal { @@ -63,5 +66,55 @@ Result> MakeTensorFromSparseCSCMatrix( Result> MakeTensorFromSparseCSFTensor( MemoryPool* pool, const SparseCSFTensor* sparse_tensor); +template +struct ConverterVisitor { + explicit ConverterVisitor(Convertor& converter) : converter(converter) {} + template + Status operator()(const ValueType& value, const IndexType& index_type) { + return converter.Convert(value, index_type); + } + + Convertor& converter; +}; + +struct ValueTypeVisitor { + template + enable_if_number Visit(const ValueType& value_type, + const IndexType& index_type, + Function&& function) { + return function(value_type, index_type); + } + + template + Status Visit(const DataType& value_type, const IndexType&, Function&&) { + return Status::Invalid("Invalid value type and the type is ", value_type.name()); + } +}; + +struct IndexAndValueTypeVisitor { + template + enable_if_integer Visit(const IndexType& index_type, + const std::shared_ptr& value_type, + Function&& function) { + ValueTypeVisitor visitor; + return VisitTypeInline(*value_type, &visitor, index_type, + std::forward(function)); + } + + template + Status Visit(const DataType& type, const std::shared_ptr&, Function&&) { + return Status::Invalid("Invalid index type and the type is ", type.name()); + } +}; + +template +Status VisitValueAndIndexType(const std::shared_ptr& value_type, + const std::shared_ptr& index_type, + Function&& function) { + IndexAndValueTypeVisitor visitor; + return VisitTypeInline(*index_type, &visitor, value_type, + std::forward(function)); +} + } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/tensor/converter_internal.h b/cpp/src/arrow/tensor/converter_internal.h deleted file mode 100644 index 3a87feaf4b346..0000000000000 --- a/cpp/src/arrow/tensor/converter_internal.h +++ /dev/null @@ -1,88 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include "arrow/tensor/converter.h" - -#define DISPATCH(ACTION, index_elsize, value_elsize, ...) \ - switch (index_elsize) { \ - case 1: \ - switch (value_elsize) { \ - case 1: \ - ACTION(uint8_t, uint8_t, __VA_ARGS__); \ - break; \ - case 2: \ - ACTION(uint8_t, uint16_t, __VA_ARGS__); \ - break; \ - case 4: \ - ACTION(uint8_t, uint32_t, __VA_ARGS__); \ - break; \ - case 8: \ - ACTION(uint8_t, uint64_t, __VA_ARGS__); \ - break; \ - } \ - break; \ - case 2: \ - switch (value_elsize) { \ - case 1: \ - ACTION(uint16_t, uint8_t, __VA_ARGS__); \ - break; \ - case 2: \ - ACTION(uint16_t, uint16_t, __VA_ARGS__); \ - break; \ - case 4: \ - ACTION(uint16_t, uint32_t, __VA_ARGS__); \ - break; \ - case 8: \ - ACTION(uint16_t, uint64_t, __VA_ARGS__); \ - break; \ - } \ - break; \ - case 4: \ - switch (value_elsize) { \ - case 1: \ - ACTION(uint32_t, uint8_t, __VA_ARGS__); \ - break; \ - case 2: \ - ACTION(uint32_t, uint16_t, __VA_ARGS__); \ - break; \ - case 4: \ - ACTION(uint32_t, uint32_t, __VA_ARGS__); \ - break; \ - case 8: \ - ACTION(uint32_t, uint64_t, __VA_ARGS__); \ - break; \ - } \ - break; \ - case 8: \ - switch (value_elsize) { \ - case 1: \ - ACTION(int64_t, uint8_t, __VA_ARGS__); \ - break; \ - case 2: \ - ACTION(int64_t, uint16_t, __VA_ARGS__); \ - break; \ - case 4: \ - ACTION(int64_t, uint32_t, __VA_ARGS__); \ - break; \ - case 8: \ - ACTION(int64_t, uint64_t, __VA_ARGS__); \ - break; \ - } \ - break; \ - } diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc index 7e29b668f53ec..57e696a0b18b5 100644 --- a/cpp/src/arrow/tensor/coo_converter.cc +++ b/cpp/src/arrow/tensor/coo_converter.cc @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/tensor/converter_internal.h" +#include "arrow/tensor/converter.h" #include #include @@ -25,26 +25,27 @@ #include "arrow/buffer.h" #include "arrow/status.h" +#include "arrow/tensor.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/macros.h" -#include "arrow/visit_type_inline.h" namespace arrow { class MemoryPool; namespace internal { + namespace { -template -inline void IncrementRowMajorIndex(std::vector& coord, +template +inline void IncrementRowMajorIndex(std::vector& coord, const std::vector& shape) { const int64_t ndim = shape.size(); ++coord[ndim - 1]; - if (coord[ndim - 1] == shape[ndim - 1]) { + if (static_cast(coord[ndim - 1]) == shape[ndim - 1]) { int64_t d = ndim - 1; - while (d > 0 && coord[d] == shape[d]) { + while (d > 0 && static_cast(coord[d]) == shape[d]) { coord[d] = 0; ++coord[d - 1]; --d; @@ -52,19 +53,20 @@ inline void IncrementRowMajorIndex(std::vector& coord, } } -template -void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices, - c_value_type* values, const int64_t size) { +template +void ConvertContinuousTensor(const Tensor& tensor, typename IndexType::c_type* indices, + typename ValueType::c_type* values) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + const auto ndim = tensor.ndim(); const auto& shape = tensor.shape(); - const c_value_type* tensor_data = - reinterpret_cast(tensor.raw_data()); + const auto* tensor_data = tensor.data()->data_as(); - constexpr c_value_type zero = 0; - std::vector coord(ndim, 0); + std::vector coord(ndim, 0); for (int64_t n = tensor.size(); n > 0; --n) { - const c_value_type x = *tensor_data; - if (ARROW_PREDICT_FALSE(x != zero)) { + auto x = *tensor_data; + if (is_not_zero(x)) { std::copy(coord.begin(), coord.end(), indices); *values++ = x; indices += ndim; @@ -75,13 +77,25 @@ void ConvertRowMajorTensor(const Tensor& tensor, c_index_type* indices, } } -template -void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices, - c_value_type* out_values, const int64_t size) { +template +void ConvertRowMajorTensor(const Tensor& tensor, typename IndexType::c_type* out_indices, + typename ValueType::c_type* out_values) { + ConvertContinuousTensor(tensor, out_indices, out_values); +} + +// TODO(GH-47580): Correct column-major tensor conversion +template +void ConvertColumnMajorTensor(const Tensor& tensor, + typename IndexType::c_type* out_indices, + typename ValueType::c_type* out_values, + const int64_t size) { + using ValueCtype = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + const auto ndim = tensor.ndim(); - std::vector indices(ndim * size); - std::vector values(size); - ConvertRowMajorTensor(tensor, indices.data(), values.data(), size); + std::vector indices(ndim * size); + std::vector values(size); + ConvertContinuousTensor(tensor, indices.data(), values.data()); // transpose indices for (int64_t i = 0; i < size; ++i) { @@ -116,23 +130,24 @@ void ConvertColumnMajorTensor(const Tensor& tensor, c_index_type* out_indices, } } -template -void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices, - c_value_type* values, const int64_t size) { - using ValueType = typename CTypeTraits::ArrowType; +template +void ConvertStridedTensor(const Tensor& tensor, typename IndexType::c_type* indices, + typename ValueType::c_type* values) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + const auto& shape = tensor.shape(); const auto ndim = tensor.ndim(); std::vector coord(ndim, 0); - constexpr c_value_type zero = 0; - c_value_type x; + ValueCType x; int64_t i; for (int64_t n = tensor.size(); n > 0; --n) { x = tensor.Value(coord); - if (ARROW_PREDICT_FALSE(x != zero)) { + if (is_not_zero(x)) { *values++ = x; for (i = 0; i < ndim; ++i) { - *indices++ = static_cast(coord[i]); + *indices++ = static_cast(coord[i]); } } @@ -140,35 +155,21 @@ void ConvertStridedTensor(const Tensor& tensor, c_index_type* indices, } } -#define CONVERT_TENSOR(func, index_type, value_type, indices, values, size) \ - func(tensor_, reinterpret_cast(indices), \ - reinterpret_cast(values), size) - -// Using ARROW_EXPAND is necessary to expand __VA_ARGS__ correctly on VC++. -#define CONVERT_ROW_MAJOR_TENSOR(index_type, value_type, ...) \ - ARROW_EXPAND(CONVERT_TENSOR(ConvertRowMajorTensor, index_type, value_type, __VA_ARGS__)) - -#define CONVERT_COLUMN_MAJOR_TENSOR(index_type, value_type, ...) \ - ARROW_EXPAND( \ - CONVERT_TENSOR(ConvertColumnMajorTensor, index_type, value_type, __VA_ARGS__)) - -#define CONVERT_STRIDED_TENSOR(index_type, value_type, ...) \ - ARROW_EXPAND(CONVERT_TENSOR(ConvertStridedTensor, index_type, value_type, __VA_ARGS__)) - // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCOOIndex -class SparseCOOTensorConverter : private SparseTensorConverterMixin { - using SparseTensorConverterMixin::AssignIndex; - using SparseTensorConverterMixin::IsNonZero; - +class SparseCOOTensorConverter { public: SparseCOOTensorConverter(const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool) : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} - Status Convert() { + template + Status Convert(const ValueType&, const IndexType&) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_, tensor_.shape())); @@ -180,34 +181,29 @@ class SparseCOOTensorConverter : private SparseTensorConverterMixin { ARROW_ASSIGN_OR_RAISE(auto indices_buffer, AllocateBuffer(index_elsize * ndim * nonzero_count, pool_)); - uint8_t* indices = indices_buffer->mutable_data(); ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateBuffer(value_elsize * nonzero_count, pool_)); - uint8_t* values = values_buffer->mutable_data(); - const uint8_t* tensor_data = tensor_.raw_data(); + auto* values = values_buffer->mutable_data_as(); + const auto* tensor_data = tensor_.data()->data_as(); + auto* indices = indices_buffer->mutable_data_as(); if (ndim <= 1) { const int64_t count = ndim == 0 ? 1 : tensor_.shape()[0]; for (int64_t i = 0; i < count; ++i) { - if (std::any_of(tensor_data, tensor_data + value_elsize, IsNonZero)) { - AssignIndex(indices, i, index_elsize); - std::copy_n(tensor_data, value_elsize, values); - - indices += index_elsize; - values += value_elsize; + if (is_not_zero(*tensor_data)) { + *indices++ = static_cast(i); + *values++ = *tensor_data; } - tensor_data += value_elsize; + ++tensor_data; } } else if (tensor_.is_row_major()) { - DISPATCH(CONVERT_ROW_MAJOR_TENSOR, index_elsize, value_elsize, indices, values, - nonzero_count); + ConvertRowMajorTensor(tensor_, indices, values); } else if (tensor_.is_column_major()) { - DISPATCH(CONVERT_COLUMN_MAJOR_TENSOR, index_elsize, value_elsize, indices, values, - nonzero_count); + ConvertColumnMajorTensor(tensor_, indices, values, + nonzero_count); } else { - DISPATCH(CONVERT_STRIDED_TENSOR, index_elsize, value_elsize, indices, values, - nonzero_count); + ConvertStridedTensor(tensor_, indices, values); } // make results @@ -281,13 +277,14 @@ Status MakeSparseCOOTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_sparse_index, std::shared_ptr* out_data) { SparseCOOTensorConverter converter(tensor, index_value_type, pool); - RETURN_NOT_OK(converter.Convert()); - + ConverterVisitor visitor{converter}; + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); } +// TODO(GH-47580): Enable column-major index tensor Result> MakeTensorFromSparseCOOTensor( MemoryPool* pool, const SparseCOOTensor* sparse_tensor) { const auto& sparse_index = diff --git a/cpp/src/arrow/tensor/csf_converter.cc b/cpp/src/arrow/tensor/csf_converter.cc index f6470e16b7818..8ed5838c4543e 100644 --- a/cpp/src/arrow/tensor/csf_converter.cc +++ b/cpp/src/arrow/tensor/csf_converter.cc @@ -27,10 +27,10 @@ #include "arrow/buffer_builder.h" #include "arrow/result.h" #include "arrow/status.h" +#include "arrow/tensor.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/util/sort_internal.h" -#include "arrow/visit_type_inline.h" namespace arrow { @@ -57,24 +57,25 @@ inline void IncrementIndex(std::vector& coord, const std::vector& index_value_type, MemoryPool* pool) : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} - Status Convert() { + template + Status Convert(const ValueType&, const IndexType&) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_, tensor_.shape())); + const int64_t ndim = tensor_.ndim(); + if (ndim <= 1) { + return Status::NotImplemented("TODO for ndim <= 1"); + } - const int index_elsize = index_value_type_->byte_width(); const int value_elsize = tensor_.type()->byte_width(); - - const int64_t ndim = tensor_.ndim(); // Axis order as ascending order of dimension size is a good heuristic but is not // necessarily optimal. std::vector axis_order = internal::ArgSort(tensor_.shape()); @@ -82,60 +83,48 @@ class SparseCSFTensorConverter : private SparseTensorConverterMixin { ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateBuffer(value_elsize * nonzero_count, pool_)); - auto* values = values_buffer->mutable_data(); std::vector counts(ndim, 0); std::vector coord(ndim, 0); std::vector previous_coord(ndim, -1); - std::vector indptr_buffer_builders(ndim - 1); - std::vector indices_buffer_builders(ndim); - const auto* tensor_data = tensor_.raw_data(); - uint8_t index_buffer[sizeof(int64_t)]; + std::vector> indptr_buffer_builders(ndim - 1); + std::vector> indices_buffer_builders(ndim); - if (ndim <= 1) { - return Status::NotImplemented("TODO for ndim <= 1"); - } else { - const auto& shape = tensor_.shape(); - for (int64_t n = tensor_.size(); n > 0; n--) { - const auto offset = tensor_.CalculateValueOffset(coord); - const auto xp = tensor_data + offset; - - if (std::any_of(xp, xp + value_elsize, IsNonZero)) { - bool tree_split = false; - - std::copy_n(xp, value_elsize, values); - values += value_elsize; - - for (int64_t i = 0; i < ndim; ++i) { - int64_t dimension = axis_order[i]; + auto* values = values_buffer->mutable_data_as(); - tree_split = tree_split || (coord[dimension] != previous_coord[dimension]); - if (tree_split) { - if (i < ndim - 1) { - AssignIndex(index_buffer, counts[i + 1], index_elsize); - RETURN_NOT_OK( - indptr_buffer_builders[i].Append(index_buffer, index_elsize)); - } + const auto& shape = tensor_.shape(); + for (int64_t n = tensor_.size(); n > 0; n--) { + const auto value = tensor_.Value(coord); - AssignIndex(index_buffer, coord[dimension], index_elsize); - RETURN_NOT_OK( - indices_buffer_builders[i].Append(index_buffer, index_elsize)); + if (is_not_zero(value)) { + bool tree_split = false; + *values++ = value; + for (int64_t i = 0; i < ndim; ++i) { + int64_t dimension = axis_order[i]; - ++counts[i]; + tree_split = tree_split || (coord[dimension] != previous_coord[dimension]); + if (tree_split) { + if (i < ndim - 1) { + RETURN_NOT_OK(indptr_buffer_builders[i].Append( + static_cast(counts[i + 1]))); } - } + RETURN_NOT_OK(indices_buffer_builders[i].Append( + static_cast(coord[dimension]))); - previous_coord = coord; + ++counts[i]; + } } - IncrementIndex(coord, shape, axis_order); + previous_coord = coord; } + + IncrementIndex(coord, shape, axis_order); } for (int64_t column = 0; column < ndim - 1; ++column) { - AssignIndex(index_buffer, counts[column + 1], index_elsize); - RETURN_NOT_OK(indptr_buffer_builders[column].Append(index_buffer, index_elsize)); + RETURN_NOT_OK(indptr_buffer_builders[column].Append( + static_cast(counts[column + 1]))); } // make results @@ -272,8 +261,8 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_sparse_index, std::shared_ptr* out_data) { SparseCSFTensorConverter converter(tensor, index_value_type, pool); - RETURN_NOT_OK(converter.Convert()); - + ConverterVisitor visitor{converter}; + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/tensor/csx_converter.cc b/cpp/src/arrow/tensor/csx_converter.cc index 679c3a0f1acd7..877021f9c9f2d 100644 --- a/cpp/src/arrow/tensor/csx_converter.cc +++ b/cpp/src/arrow/tensor/csx_converter.cc @@ -24,6 +24,7 @@ #include "arrow/buffer.h" #include "arrow/status.h" +#include "arrow/tensor.h" #include "arrow/type.h" #include "arrow/util/checked_cast.h" #include "arrow/visit_type_inline.h" @@ -33,24 +34,25 @@ namespace arrow { class MemoryPool; namespace internal { + namespace { // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSRIndex -class SparseCSXMatrixConverter : private SparseTensorConverterMixin { - using SparseTensorConverterMixin::AssignIndex; - using SparseTensorConverterMixin::IsNonZero; - +class SparseCSXMatrixConverter { public: SparseCSXMatrixConverter(SparseMatrixCompressedAxis axis, const Tensor& tensor, const std::shared_ptr& index_value_type, MemoryPool* pool) : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} - Status Convert() { + template + Status Convert(const ValueType&, const IndexType&) { RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_, tensor_.shape())); + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; const int index_elsize = index_value_type_->byte_width(); const int value_elsize = tensor_.type()->byte_width(); @@ -58,6 +60,8 @@ class SparseCSXMatrixConverter : private SparseTensorConverterMixin { const int64_t ndim = tensor_.ndim(); if (ndim > 2) { return Status::Invalid("Invalid tensor dimension"); + } else if (ndim <= 1) { + return Status::NotImplemented("TODO for ndim <= 1"); } const int major_axis = static_cast(axis_); @@ -70,47 +74,34 @@ class SparseCSXMatrixConverter : private SparseTensorConverterMixin { ARROW_ASSIGN_OR_RAISE(auto values_buffer, AllocateBuffer(value_elsize * nonzero_count, pool_)); - auto* values = values_buffer->mutable_data(); - - const auto* tensor_data = tensor_.raw_data(); - - if (ndim <= 1) { - return Status::NotImplemented("TODO for ndim <= 1"); - } else { - ARROW_ASSIGN_OR_RAISE(indptr_buffer, - AllocateBuffer(index_elsize * (n_major + 1), pool_)); - auto* indptr = indptr_buffer->mutable_data(); - - ARROW_ASSIGN_OR_RAISE(indices_buffer, - AllocateBuffer(index_elsize * nonzero_count, pool_)); - auto* indices = indices_buffer->mutable_data(); - - std::vector coords(2); - int64_t k = 0; - std::fill_n(indptr, index_elsize, 0); - indptr += index_elsize; - for (int64_t i = 0; i < n_major; ++i) { - for (int64_t j = 0; j < n_minor; ++j) { - if (axis_ == SparseMatrixCompressedAxis::ROW) { - coords = {i, j}; - } else { - coords = {j, i}; - } - const int64_t offset = tensor_.CalculateValueOffset(coords); - if (std::any_of(tensor_data + offset, tensor_data + offset + value_elsize, - IsNonZero)) { - std::copy_n(tensor_data + offset, value_elsize, values); - values += value_elsize; - - AssignIndex(indices, j, index_elsize); - indices += index_elsize; - - k++; - } + ARROW_ASSIGN_OR_RAISE(indptr_buffer, + AllocateBuffer(index_elsize * (n_major + 1), pool_)); + ARROW_ASSIGN_OR_RAISE(indices_buffer, + AllocateBuffer(index_elsize * nonzero_count, pool_)); + + auto* indptr = indptr_buffer->mutable_data_as(); + auto* values = values_buffer->mutable_data_as(); + auto* indices = indices_buffer->mutable_data_as(); + + std::vector coords(2); + int64_t k = 0; + indptr[0] = 0; + ++indptr; + for (int64_t i = 0; i < n_major; ++i) { + for (int64_t j = 0; j < n_minor; ++j) { + if (axis_ == SparseMatrixCompressedAxis::ROW) { + coords = {i, j}; + } else { + coords = {j, i}; + } + auto value = tensor_.Value(coords); + if (is_not_zero(value)) { + *values++ = value; + *indices++ = static_cast(j); + k++; } - AssignIndex(indptr, k, index_elsize); - indptr += index_elsize; } + *indptr++ = static_cast(k); } std::vector indptr_shape({n_major + 1}); @@ -150,8 +141,8 @@ Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis, std::shared_ptr* out_sparse_index, std::shared_ptr* out_data) { SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool); - RETURN_NOT_OK(converter.Convert()); - + ConverterVisitor visitor(converter); + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); *out_sparse_index = converter.sparse_index; *out_data = converter.data; return Status::OK(); From 83d3b2c2bb0688eaab15aed18a8e9ce78c1c0b93 Mon Sep 17 00:00:00 2001 From: Arash Andishgar Date: Tue, 23 Sep 2025 17:39:02 +0330 Subject: [PATCH 2/7] Apply rok suggestion --- cpp/src/arrow/sparse_tensor_test.cc | 354 ++++++++++++++++---------- cpp/src/arrow/tensor/converter.h | 11 +- cpp/src/arrow/tensor/coo_converter.cc | 12 +- 3 files changed, 227 insertions(+), 150 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 406c72d0e2ec4..83597219c70a2 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -413,69 +413,97 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } -TEST_F(TestSparseCOOTensor, CreationFromVectorTensorWithNegZero) { - std::vector data{ - -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, -0.0, 0.0, -0.0, -1.0, -0.0, -0.0, - }; - std::vector shape = {12}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); - ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, - SparseCOOTensor::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); - AssertCOOIndex(si->indices(), 0, {4}); - AssertCOOIndex(si->indices(), 1, {9}); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); -} +template +class TestSparseCOOTensorCreationFromNegativeZero + : public TestSparseTensorBase { + public: + using ValueCType = typename ValueType::c_type; -TEST_F(TestSparseCOOTensor, CreationFromContiguousDenseTensorWithNegZero) { - // clang-format off - std::vector data{ - -0.0, -0.0, 0.0, - -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, - -1.0, -0.0, -0.0, + void SetUp() override { type_ = TypeTraits::type_singleton(); } + + void FromVector() { + std::vector data{ + -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, -0.0, 0.0, -0.0, -1.0, -0.0, -0.0, }; - // clang-format on - std::vector shape = {4, 3}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); - ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, - SparseCOOTensor::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); - AssertCOOIndex(si->indices(), 0, {1, 1}); - AssertCOOIndex(si->indices(), 1, {3, 0}); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + std::vector shape = {12}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {4}); + AssertCOOIndex(si->indices(), 1, {9}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + void FromContiguousTensor() { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {1, 1}); + AssertCOOIndex(si->indices(), 1, {3, 0}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + void FromNonContiguousTensor() { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, 1.0, 2.0, + -0.0, 4.0, -0.0, 0.0, -0.0, + -0.0, 0.0, -0.0, 3.0, 4.0, + -1.0, -0.0, -0.0, 0.0, 0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, + Tensor::Make(type_, buffer, shape, + {type_->byte_width() * 5, type_->byte_width()})); + ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, + SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(12, sparse_coo_tensor->size()); + ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); + AssertCOOIndex(si->indices(), 0, {1, 1}); + AssertCOOIndex(si->indices(), 1, {3, 0}); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + private: + std::shared_ptr type_; +}; + +TYPED_TEST_SUITE(TestSparseCOOTensorCreationFromNegativeZero, RealArrowTypes); + +TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromVector) { + this->FromVector(); } -TEST_F(TestSparseCOOTensor, CreationFromNonContiguousDenseTensorWithNegZero) { - // clang-format off - std::vector data{ - -0.0, -0.0, 0.0, 1.0, 2.0, - -0.0, 4.0, -0.0, 0.0, -0.0, - -0.0, 0.0, -0.0, 3.0, 4.0, - -1.0, -0.0, -0.0, 0.0, 0.0, - }; - // clang-format on - std::vector shape = {4, 3}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, - Tensor::Make(float32(), buffer, shape, {20, 4})); - ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, - SparseCOOTensor::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); - AssertCOOIndex(si->indices(), 0, {1, 1}); - AssertCOOIndex(si->indices(), 1, {3, 0}); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromContiguousTensor) { + this->FromContiguousTensor(); +} + +TYPED_TEST(TestSparseCOOTensorCreationFromNegativeZero, FromNonContiguousTensor) { + this->FromNonContiguousTensor(); } template @@ -934,31 +962,49 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } -TEST_F(TestSparseCSRMatrix, CreationFromTensorWithNegZero) { - // clang-format off - std::vector data{ - -0.0, -0.0, 0.0, - -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, - -1.0, -0.0, -0.0, - }; - // clang-format on - std::vector shape = {4, 3}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); - ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor, - SparseCSRMatrix::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_csr_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_csr_tensor->sparse_index()); - const auto* indptr = si->indptr()->data()->data_as(); - const auto* indices = si->indices()->data()->data_as(); - ASSERT_EQ(indptr[2], 1); - ASSERT_EQ(indptr[4], 2); - ASSERT_EQ(indices[0], 1); - ASSERT_EQ(indices[1], 0); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +template +class TestSparseCSRTensorCreationFromNegativeZero + : public TestSparseTensorBase { + public: + using ValueCType = typename ValueType::c_type; + + void SetUp() override { type_ = TypeTraits::type_singleton(); } + + void FromTensor() { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor, + SparseCSRMatrix::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csr_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csr_tensor->sparse_index()); + const auto* indptr = si->indptr()->data()->template data_as(); + const auto* indices = si->indices()->data()->template data_as(); + ASSERT_EQ(indptr[2], 1); + ASSERT_EQ(indptr[4], 2); + ASSERT_EQ(indices[0], 1); + ASSERT_EQ(indices[1], 0); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + private: + std::shared_ptr type_; +}; + +TYPED_TEST_SUITE(TestSparseCSRTensorCreationFromNegativeZero, RealArrowTypes); + +TYPED_TEST(TestSparseCSRTensorCreationFromNegativeZero, FromTensor) { + this->FromTensor(); } template @@ -1296,31 +1342,49 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) { ASSERT_TRUE(tensor.Equals(*dense_tensor)); } -TEST_F(TestSparseCSCMatrix, CreationFromTensorWithNegZero) { - // clang-format off - std::vector data{ - -0.0, -0.0, 0.0, - -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, - -1.0, -0.0, -0.0, - }; - // clang-format on - std::vector shape = {4, 3}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); - ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor, - SparseCSCMatrix::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_csc_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_csc_tensor->sparse_index()); - const auto* indptr = si->indptr()->data()->data_as(); - const auto* indices = si->indices()->data()->data_as(); - ASSERT_EQ(indptr[1], 1); - ASSERT_EQ(indptr[2], 2); - ASSERT_EQ(indices[0], 3); - ASSERT_EQ(indices[1], 1); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +template +class TestSparseCSCTensorCreationFromNegativeZero + : public TestSparseTensorBase { + public: + using ValueCType = typename ValueType::c_type; + + void SetUp() override { type_ = TypeTraits::type_singleton(); } + + void FromTensor() { + // clang-format off + std::vector data{ + -0.0, -0.0, 0.0, + -0.0, 4.0, -0.0, + -0.0, 0.0, -0.0, + -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {4, 3}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor, + SparseCSCMatrix::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csc_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csc_tensor->sparse_index()); + const auto* indptr = si->indptr()->data()->template data_as(); + const auto* indices = si->indices()->data()->template data_as(); + ASSERT_EQ(indptr[1], 1); + ASSERT_EQ(indptr[2], 2); + ASSERT_EQ(indices[0], 3); + ASSERT_EQ(indices[1], 1); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + private: + std::shared_ptr type_; +}; + +TYPED_TEST_SUITE(TestSparseCSCTensorCreationFromNegativeZero, RealArrowTypes); + +TYPED_TEST(TestSparseCSCTensorCreationFromNegativeZero, FromTensor) { + this->FromTensor(); } template @@ -1598,33 +1662,51 @@ TEST_F(TestSparseCSFTensor, CreationFromZeroTensor) { ASSERT_TRUE(t->Equals(*t_zero)); } -TEST_F(TestSparseCSFTensor, CreationFromTensorWithNegZero) { - // clang-format off - std::vector data{ - -0.0, -0.0, -0.0, -0.0, - 4.0, -0.0, -0.0, -0.0, - 0.0, -1.0, -0.0, -0.0, - }; - // clang-format on - std::vector shape = {3, 4}; - auto buffer = Buffer::FromVector(data); - ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(float32(), buffer, shape)); - ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor, - SparseCSFTensor::Make(*dense_tensor, int64())); - ASSERT_EQ(2, sparse_csf_tensor->non_zero_length()); - auto si = - internal::checked_pointer_cast(sparse_csf_tensor->sparse_index()); - auto indptr = si->indptr()[0]->data()->data_as(); - auto row_indices = si->indices()[0]->data()->data_as(); - auto column_indices = si->indices()[1]->data()->data_as(); - ASSERT_EQ(indptr[1], 1); - ASSERT_EQ(indptr[2], 2); - EXPECT_EQ(row_indices[0], 1); - EXPECT_EQ(row_indices[1], 2); - EXPECT_EQ(column_indices[0], 0); - EXPECT_EQ(column_indices[1], 1); - ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor()); - ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); +template +class TestSparseCSFTensorCreationFromNegativeZero + : public TestSparseTensorBase { + public: + using ValueCType = typename ValueType::c_type; + + void SetUp() override { type_ = TypeTraits::type_singleton(); } + + void FromTensor() { + // clang-format off + std::vector data{ + -0.0, -0.0, -0.0, -0.0, + 4.0, -0.0, -0.0, -0.0, + 0.0, -1.0, -0.0, -0.0, + }; + // clang-format on + std::vector shape = {3, 4}; + auto buffer = Buffer::FromVector(data); + ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); + ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor, + SparseCSFTensor::Make(*dense_tensor, int64())); + ASSERT_EQ(2, sparse_csf_tensor->non_zero_length()); + auto si = + internal::checked_pointer_cast(sparse_csf_tensor->sparse_index()); + auto indptr = si->indptr()[0]->data()->template data_as(); + auto row_indices = si->indices()[0]->data()->template data_as(); + auto column_indices = si->indices()[1]->data()->template data_as(); + ASSERT_EQ(indptr[1], 1); + ASSERT_EQ(indptr[2], 2); + EXPECT_EQ(row_indices[0], 1); + EXPECT_EQ(row_indices[1], 2); + EXPECT_EQ(column_indices[0], 0); + EXPECT_EQ(column_indices[1], 1); + ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor()); + ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); + } + + private: + std::shared_ptr type_; +}; + +TYPED_TEST_SUITE(TestSparseCSFTensorCreationFromNegativeZero, RealArrowTypes); + +TYPED_TEST(TestSparseCSFTensorCreationFromNegativeZero, FromTensor) { + this->FromTensor(); } template diff --git a/cpp/src/arrow/tensor/converter.h b/cpp/src/arrow/tensor/converter.h index fd23f83c8e8f0..f51cb541cf1fc 100644 --- a/cpp/src/arrow/tensor/converter.h +++ b/cpp/src/arrow/tensor/converter.h @@ -66,15 +66,15 @@ Result> MakeTensorFromSparseCSCMatrix( Result> MakeTensorFromSparseCSFTensor( MemoryPool* pool, const SparseCSFTensor* sparse_tensor); -template +template struct ConverterVisitor { - explicit ConverterVisitor(Convertor& converter) : converter(converter) {} + explicit ConverterVisitor(Converter& converter) : converter(converter) {} template Status operator()(const ValueType& value, const IndexType& index_type) { return converter.Convert(value, index_type); } - Convertor& converter; + Converter& converter; }; struct ValueTypeVisitor { @@ -87,7 +87,8 @@ struct ValueTypeVisitor { template Status Visit(const DataType& value_type, const IndexType&, Function&&) { - return Status::Invalid("Invalid value type and the type is ", value_type.name()); + return Status::Invalid("Invalid value type: ", value_type.name(), + ". Expected a number."); } }; @@ -103,7 +104,7 @@ struct IndexAndValueTypeVisitor { template Status Visit(const DataType& type, const std::shared_ptr&, Function&&) { - return Status::Invalid("Invalid index type and the type is ", type.name()); + return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer."); } }; diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc index 57e696a0b18b5..149db6518d3af 100644 --- a/cpp/src/arrow/tensor/coo_converter.cc +++ b/cpp/src/arrow/tensor/coo_converter.cc @@ -54,8 +54,8 @@ inline void IncrementRowMajorIndex(std::vector& coord, } template -void ConvertContinuousTensor(const Tensor& tensor, typename IndexType::c_type* indices, - typename ValueType::c_type* values) { +void ConvertRowMajorTensor(const Tensor& tensor, typename IndexType::c_type* indices, + typename ValueType::c_type* values) { using ValueCType = typename ValueType::c_type; using IndexCType = typename IndexType::c_type; @@ -77,12 +77,6 @@ void ConvertContinuousTensor(const Tensor& tensor, typename IndexType::c_type* i } } -template -void ConvertRowMajorTensor(const Tensor& tensor, typename IndexType::c_type* out_indices, - typename ValueType::c_type* out_values) { - ConvertContinuousTensor(tensor, out_indices, out_values); -} - // TODO(GH-47580): Correct column-major tensor conversion template void ConvertColumnMajorTensor(const Tensor& tensor, @@ -95,7 +89,7 @@ void ConvertColumnMajorTensor(const Tensor& tensor, const auto ndim = tensor.ndim(); std::vector indices(ndim * size); std::vector values(size); - ConvertContinuousTensor(tensor, indices.data(), values.data()); + ConvertRowMajorTensor(tensor, indices.data(), values.data()); // transpose indices for (int64_t i = 0; i < size; ++i) { From a7e614db8ea2941a0cb4e87326a1bdcb08edbb22 Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Fri, 26 Sep 2025 12:11:26 +0330 Subject: [PATCH 3/7] apply rok suggestion --- cpp/src/arrow/sparse_tensor_test.cc | 24 ++++++++++++------------ cpp/src/arrow/tensor.cc | 4 ++-- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 83597219c70a2..2d5b2561c5ded 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -423,7 +423,7 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromVector() { std::vector data{ - -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, -0.0, 0.0, -0.0, -1.0, -0.0, -0.0, + -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, -0.0, -0.0, -0.0, -1.0, -0.0, -0.0, }; std::vector shape = {12}; auto buffer = Buffer::FromVector(data); @@ -442,9 +442,9 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromContiguousTensor() { // clang-format off std::vector data{ - -0.0, -0.0, 0.0, + -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, + -0.0, -0.0, -0.0, -1.0, -0.0, -0.0, }; // clang-format on @@ -465,10 +465,10 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromNonContiguousTensor() { // clang-format off std::vector data{ - -0.0, -0.0, 0.0, 1.0, 2.0, - -0.0, 4.0, -0.0, 0.0, -0.0, - -0.0, 0.0, -0.0, 3.0, 4.0, - -1.0, -0.0, -0.0, 0.0, 0.0, + -0.0, -0.0, -0.0, 1.0, 2.0, + -0.0, 4.0, -0.0, -0.0, -0.0, + -0.0, -0.0, -0.0, 3.0, 4.0, + -1.0, -0.0, -0.0, -0.0, -0.0, }; // clang-format on std::vector shape = {4, 3}; @@ -973,9 +973,9 @@ class TestSparseCSRTensorCreationFromNegativeZero void FromTensor() { // clang-format off std::vector data{ - -0.0, -0.0, 0.0, + -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, + -0.0, -0.0, -0.0, -1.0, -0.0, -0.0, }; // clang-format on @@ -1353,9 +1353,9 @@ class TestSparseCSCTensorCreationFromNegativeZero void FromTensor() { // clang-format off std::vector data{ - -0.0, -0.0, 0.0, + -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, - -0.0, 0.0, -0.0, + -0.0, -0.0, -0.0, -1.0, -0.0, -0.0, }; // clang-format on @@ -1675,7 +1675,7 @@ class TestSparseCSFTensorCreationFromNegativeZero std::vector data{ -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, -0.0, -0.0, - 0.0, -1.0, -0.0, -0.0, + -0.0, -1.0, -0.0, -0.0, }; // clang-format on std::vector shape = {3, 4}; diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index d3dd96b81095f..7483d079b0b66 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -490,8 +490,8 @@ int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& t if (dim_index == tensor.ndim() - 1) { for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) { const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index]; - if (auto& elem = *reinterpret_cast(ptr); - internal::is_not_zero(elem)) { + auto& elem = *reinterpret_cast(ptr); + if (internal::is_not_zero(elem)) { ++nnz; } } From fef77e12fcb5564999fb9f2b2fe20b4f030059b9 Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Fri, 26 Sep 2025 15:17:21 +0330 Subject: [PATCH 4/7] Apply +0.0,-0.0,0.0 to relevant test cases --- cpp/src/arrow/sparse_tensor_test.cc | 32 ++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 2d5b2561c5ded..635a858e54196 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -423,7 +423,7 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromVector() { std::vector data{ - -0.0, -0.0, -0.0, -0.0, 4.0, -0.0, -0.0, -0.0, -0.0, -1.0, -0.0, -0.0, + -0.0, -0.0, 0.0, -0.0, 4.0, +0.0, -0.0, -0.0, -0.0, -1.0, 0.0, -0.0, }; std::vector shape = {12}; auto buffer = Buffer::FromVector(data); @@ -442,9 +442,9 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromContiguousTensor() { // clang-format off std::vector data{ - -0.0, -0.0, -0.0, - -0.0, 4.0, -0.0, - -0.0, -0.0, -0.0, + -0.0, 0.0, -0.0, + +0.0, 4.0, -0.0, + -0.0, -0.0, 0.0, -1.0, -0.0, -0.0, }; // clang-format on @@ -465,10 +465,10 @@ class TestSparseCOOTensorCreationFromNegativeZero void FromNonContiguousTensor() { // clang-format off std::vector data{ - -0.0, -0.0, -0.0, 1.0, 2.0, - -0.0, 4.0, -0.0, -0.0, -0.0, - -0.0, -0.0, -0.0, 3.0, 4.0, - -1.0, -0.0, -0.0, -0.0, -0.0, + -0.0, -0.0, 0.0, 1.0, 2.0, + -0.0, 4.0, 0.0, 0.0, -0.0, + -0.0, +0.0, -0.0, 3.0, 4.0, + -1.0, -0.0, -0.0, -0.0, +0.0, }; // clang-format on std::vector shape = {4, 3}; @@ -973,10 +973,10 @@ class TestSparseCSRTensorCreationFromNegativeZero void FromTensor() { // clang-format off std::vector data{ - -0.0, -0.0, -0.0, + -0.0, -0.0, 0.0, -0.0, 4.0, -0.0, - -0.0, -0.0, -0.0, - -1.0, -0.0, -0.0, + +0.0, -0.0, -0.0, + -1.0, -0.0, +0.0, }; // clang-format on std::vector shape = {4, 3}; @@ -1353,9 +1353,9 @@ class TestSparseCSCTensorCreationFromNegativeZero void FromTensor() { // clang-format off std::vector data{ - -0.0, -0.0, -0.0, + -0.0, -0.0, +0.0, -0.0, 4.0, -0.0, - -0.0, -0.0, -0.0, + -0.0, 0.0, -0.0, -1.0, -0.0, -0.0, }; // clang-format on @@ -1673,9 +1673,9 @@ class TestSparseCSFTensorCreationFromNegativeZero void FromTensor() { // clang-format off std::vector data{ - -0.0, -0.0, -0.0, -0.0, - 4.0, -0.0, -0.0, -0.0, - -0.0, -1.0, -0.0, -0.0, + -0.0, -0.0, 0.0, -0.0, + 4.0, +0.0, -0.0, -0.0, + 0.0, -1.0, -0.0, -0.0, }; // clang-format on std::vector shape = {3, 4}; From deb3686dbe2f8df0c5768dc667e3a35a0068a14e Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Wed, 8 Oct 2025 15:44:31 +0330 Subject: [PATCH 5/7] apply pitrou suggestion --- cpp/src/arrow/sparse_tensor.cc | 2 +- cpp/src/arrow/sparse_tensor_test.cc | 6 ++ .../{converter.h => converter_internal.h} | 17 ++-- cpp/src/arrow/tensor/coo_converter.cc | 62 +++++++++++- cpp/src/arrow/tensor/csf_converter.cc | 99 ++++++++++++++++++- cpp/src/arrow/tensor/csx_converter.cc | 82 ++++++++++++++- 6 files changed, 249 insertions(+), 19 deletions(-) rename cpp/src/arrow/tensor/{converter.h => converter_internal.h} (90%) diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index b84070b3d288d..900dbd6820547 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -16,7 +16,7 @@ // under the License. #include "arrow/sparse_tensor.h" -#include "arrow/tensor/converter.h" +#include "arrow/tensor/converter_internal.h" #include #include diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 635a858e54196..8ba4417aa615b 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -436,6 +436,7 @@ class TestSparseCOOTensorCreationFromNegativeZero AssertCOOIndex(si->indices(), 0, {4}); AssertCOOIndex(si->indices(), 1, {9}); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } @@ -459,6 +460,7 @@ class TestSparseCOOTensorCreationFromNegativeZero AssertCOOIndex(si->indices(), 0, {1, 1}); AssertCOOIndex(si->indices(), 1, {3, 0}); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } @@ -485,6 +487,7 @@ class TestSparseCOOTensorCreationFromNegativeZero AssertCOOIndex(si->indices(), 0, {1, 1}); AssertCOOIndex(si->indices(), 1, {3, 0}); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_coo_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } @@ -994,6 +997,7 @@ class TestSparseCSRTensorCreationFromNegativeZero ASSERT_EQ(indices[0], 1); ASSERT_EQ(indices[1], 0); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csr_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } @@ -1374,6 +1378,7 @@ class TestSparseCSCTensorCreationFromNegativeZero ASSERT_EQ(indices[0], 3); ASSERT_EQ(indices[1], 1); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csc_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } @@ -1696,6 +1701,7 @@ class TestSparseCSFTensorCreationFromNegativeZero EXPECT_EQ(column_indices[0], 0); EXPECT_EQ(column_indices[1], 1); ASSERT_OK_AND_ASSIGN(auto new_tensor, sparse_csf_tensor->ToTensor()); + ASSERT_OK(new_tensor->Validate()); ASSERT_TRUE(new_tensor->Equals(*dense_tensor)); } diff --git a/cpp/src/arrow/tensor/converter.h b/cpp/src/arrow/tensor/converter_internal.h similarity index 90% rename from cpp/src/arrow/tensor/converter.h rename to cpp/src/arrow/tensor/converter_internal.h index f51cb541cf1fc..74e02cc015a45 100644 --- a/cpp/src/arrow/tensor/converter.h +++ b/cpp/src/arrow/tensor/converter_internal.h @@ -22,9 +22,11 @@ #include #include -#include "arrow/visit_type_inline.h" - namespace arrow { + +template +Status VisitTypeInline(const DataType& type, VISITOR* visitor, ARGS&&... args); + namespace internal { struct SparseTensorConverterMixin { @@ -95,25 +97,24 @@ struct ValueTypeVisitor { struct IndexAndValueTypeVisitor { template enable_if_integer Visit(const IndexType& index_type, - const std::shared_ptr& value_type, + const DataType& value_type, Function&& function) { ValueTypeVisitor visitor; - return VisitTypeInline(*value_type, &visitor, index_type, + return VisitTypeInline(value_type, &visitor, index_type, std::forward(function)); } template - Status Visit(const DataType& type, const std::shared_ptr&, Function&&) { + Status Visit(const DataType& type, const DataType&, Function&&) { return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer."); } }; template -Status VisitValueAndIndexType(const std::shared_ptr& value_type, - const std::shared_ptr& index_type, +Status VisitValueAndIndexType(const DataType& value_type, const DataType& index_type, Function&& function) { IndexAndValueTypeVisitor visitor; - return VisitTypeInline(*index_type, &visitor, value_type, + return VisitTypeInline(index_type, &visitor, value_type, std::forward(function)); } diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc index 149db6518d3af..d7bc64d227e08 100644 --- a/cpp/src/arrow/tensor/coo_converter.cc +++ b/cpp/src/arrow/tensor/coo_converter.cc @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/tensor/converter.h" +#include "arrow/tensor/converter_internal.h" #include +#include #include #include #include @@ -27,8 +28,11 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/logging_internal.h" #include "arrow/util/macros.h" +#include "arrow/visit_type_inline.h" namespace arrow { @@ -38,6 +42,57 @@ namespace internal { namespace { +template +Status ValidateSparseCooTensorCreation(const SparseCOOIndex& sparse_coo_index, + const Buffer& sparse_coo_values_buffer, + const Tensor& tensor) { + using IndexCType = typename IndexType::c_type; + using ValueCType = typename ValueType::c_type; + + const auto& indices = sparse_coo_index.indices(); + const auto* indices_data = sparse_coo_index.indices()->data()->data_as(); + const auto* sparse_coo_values = sparse_coo_values_buffer.data_as(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + + if (indices->shape()[0] != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices->shape()[0], ") and dense tensor (", non_zero_count, + ")"); + } else if (indices->shape()[1] != static_cast(tensor.shape().size())) { + return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", + indices->shape()[1], ") and tensor shape (", + tensor.shape().size(), ")"); + } + + auto coord_size = indices->shape()[1]; + std::vector coord(coord_size); + for (int64_t i = 0; i < indices->shape()[0]; i++) { + if (!is_not_zero(sparse_coo_values[i])) { + return Status::Invalid("Sparse tensor values must be non-zero"); + } + + for (int64_t j = 0; j < coord_size; j++) { + coord[j] = static_cast(indices_data[i * coord_size + j]); + } + + if (sparse_coo_values[i] != tensor.Value(coord)) { + if constexpr (is_floating_type::value) { + if (!std::isnan(tensor.Value(coord)) || + !std::isnan(sparse_coo_values[i])) { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } else { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } + } + + return Status::OK(); +} + template inline void IncrementRowMajorIndex(std::vector& coord, const std::vector& shape) { @@ -210,7 +265,8 @@ class SparseCOOTensorConverter { indices_shape, indices_strides); ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true)); data = std::move(values_buffer); - + DCHECK_OK((ValidateSparseCooTensorCreation(*sparse_index, *data, + tensor_))); return Status::OK(); } @@ -272,7 +328,7 @@ Status MakeSparseCOOTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_data) { SparseCOOTensorConverter converter(tensor, index_value_type, pool); ConverterVisitor visitor{converter}; - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/tensor/csf_converter.cc b/cpp/src/arrow/tensor/csf_converter.cc index 8ed5838c4543e..b1abff26b1885 100644 --- a/cpp/src/arrow/tensor/csf_converter.cc +++ b/cpp/src/arrow/tensor/csf_converter.cc @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/tensor/converter.h" +#include "arrow/tensor/converter_internal.h" #include +#include #include #include #include @@ -29,8 +30,11 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/logging_internal.h" #include "arrow/util/sort_internal.h" +#include "arrow/visit_type_inline.h" namespace arrow { @@ -54,6 +58,89 @@ inline void IncrementIndex(std::vector& coord, const std::vector +Status CheckValues(const SparseCSFIndex& sparse_csf_index, + const typename ValueType::c_type* values, const Tensor& tensor, + const int64_t dim, const int64_t dim_offset, const int64_t start, + const int64_t stop) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + + const auto& indices = sparse_csf_index.indices(); + const auto& indptr = sparse_csf_index.indptr(); + const auto& axis_order = sparse_csf_index.axis_order(); + auto ndim = indices.size(); + auto strides = tensor.strides(); + + const auto& cur_indices = indices[dim]; + const auto* indices_data = cur_indices->data()->data_as() + start; + + if (dim == static_cast(ndim) - 1) { + for (auto i = start; i < stop; ++i) { + auto index = static_cast(*indices_data); + const int64_t offset = dim_offset + index * strides[axis_order[dim]]; + + auto sparse_value = values[i]; + auto tensor_value = + *reinterpret_cast(tensor.raw_data() + offset); + if (!is_not_zero(sparse_value)) { + return Status::Invalid("Sparse tensor values must be non-zero"); + } else if (sparse_value != tensor_value) { + if constexpr (is_floating_type::value) { + if (!std::isnan(tensor_value) || !std::isnan(sparse_value)) { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } else { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } + ++indices_data; + } + } else { + const auto& cur_indptr = indptr[dim]; + const auto* indptr_data = cur_indptr->data()->data_as() + start; + + for (int64_t i = start; i < stop; ++i) { + const int64_t index = *indices_data; + int64_t offset = dim_offset + index * strides[axis_order[dim]]; + auto next_start = static_cast(*indptr_data); + auto next_stop = static_cast(*(indptr_data + 1)); + + ARROW_RETURN_NOT_OK((CheckValues( + sparse_csf_index, values, tensor, dim + 1, offset, next_start, next_stop))); + + ++indices_data; + ++indptr_data; + } + } + return Status::OK(); +} + +template +Status ValidateSparseTensorCSFCreation(const SparseIndex& sparse_index, + const Buffer& values_buffer, + const Tensor& tensor) { + auto sparse_csf_index = checked_cast(sparse_index); + const auto* values = values_buffer.data_as(); + const auto& indices = sparse_csf_index.indices(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + if (indices.back()->size() != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices.back()->size(), ") and dense tensor (", non_zero_count, + ")"); + } else if (indices.size() != tensor.shape().size()) { + return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", + indices.size(), ") and tensor shape (", tensor.shape().size(), + ")"); + } else { + return CheckValues(sparse_csf_index, values, tensor, 0, 0, 0, + sparse_csf_index.indptr()[0]->size() - 1); + } +} + // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSFIndex @@ -88,8 +175,10 @@ class SparseCSFTensorConverter { std::vector coord(ndim, 0); std::vector previous_coord(ndim, -1); - std::vector> indptr_buffer_builders(ndim - 1); - std::vector> indices_buffer_builders(ndim); + std::vector> indptr_buffer_builders( + ndim - 1, TypedBufferBuilder(pool_)); + std::vector> indices_buffer_builders( + ndim, TypedBufferBuilder(pool_)); auto* values = values_buffer->mutable_data_as(); @@ -146,6 +235,8 @@ class SparseCSFTensorConverter { ARROW_ASSIGN_OR_RAISE( sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order, indptr_buffers, indices_buffers)); + DCHECK_OK((ValidateSparseTensorCSFCreation(*sparse_index, *data, + tensor_))); return Status::OK(); } @@ -262,7 +353,7 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_data) { SparseCSFTensorConverter converter(tensor, index_value_type, pool); ConverterVisitor visitor{converter}; - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/tensor/csx_converter.cc b/cpp/src/arrow/tensor/csx_converter.cc index 877021f9c9f2d..0af54cc3a3e4e 100644 --- a/cpp/src/arrow/tensor/csx_converter.cc +++ b/cpp/src/arrow/tensor/csx_converter.cc @@ -15,8 +15,9 @@ // specific language governing permissions and limitations // under the License. -#include "arrow/tensor/converter.h" +#include "arrow/tensor/converter_internal.h" +#include #include #include #include @@ -26,7 +27,9 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" +#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" +#include "arrow/util/logging_internal.h" #include "arrow/visit_type_inline.h" namespace arrow { @@ -37,6 +40,78 @@ namespace internal { namespace { +template +Status ValidateSparseCSXTensorCreation(const SparseIndexType& sparse_csx_index, + const Buffer& values_buffer, + const Tensor& tensor) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + auto axis = sparse_csx_index.kCompressedAxis; + + auto& indptr = sparse_csx_index.indptr(); + auto& indices = sparse_csx_index.indices(); + auto indptr_data = indptr->data()->template data_as(); + auto indices_data = indices->data()->template data_as(); + auto sparse_csx_values = values_buffer.data_as(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + if (indices->shape()[0] != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices->shape()[0], ") and dense tensor (", non_zero_count, + ")"); + } + + for (int64_t i = 0; i < indptr->size() - 1; ++i) { + const auto start = static_cast(indptr_data[i]); + const auto stop = static_cast(indptr_data[i + 1]); + std::vector coord(2); + for (int64_t j = start; j < stop; ++j) { + if (!is_not_zero(sparse_csx_values[j])) { + return Status::Invalid("Sparse tensor values must be non-zero"); + } + + switch (axis) { + case SparseMatrixCompressedAxis::ROW: + coord[0] = i; + coord[1] = static_cast(indices_data[j]); + break; + case SparseMatrixCompressedAxis::COLUMN: + coord[0] = static_cast(indices_data[j]); + coord[1] = i; + break; + } + if (sparse_csx_values[j] != tensor.Value(coord)) { + if constexpr (is_floating_type::value) { + if (!std::isnan(sparse_csx_values[j]) || + !std::isnan(tensor.Value(coord))) { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } else { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } + } + } + return Status::OK(); +} + +template +Status ValidateSparseCSXTensorCreation(const SparseIndex& sparse_index, + const Buffer& values_buffer, + const Tensor& tensor) { + if (sparse_index.format_id() == SparseTensorFormat::CSC) { + auto sparse_csc_index = checked_cast(sparse_index); + return ValidateSparseCSXTensorCreation( + sparse_csc_index, values_buffer, tensor); + } else { + auto sparse_csr_index = checked_cast(sparse_index); + return ValidateSparseCSXTensorCreation( + sparse_csr_index, values_buffer, tensor); + } +} + // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSRIndex @@ -118,7 +193,8 @@ class SparseCSXMatrixConverter { sparse_index = std::make_shared(indptr_tensor, indices_tensor); } data = std::move(values_buffer); - + DCHECK_OK((ValidateSparseCSXTensorCreation(*sparse_index, *data, + tensor_))); return Status::OK(); } @@ -142,7 +218,7 @@ Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis, std::shared_ptr* out_data) { SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool); ConverterVisitor visitor(converter); - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(tensor.type(), index_value_type, visitor)); + ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); *out_sparse_index = converter.sparse_index; *out_data = converter.data; return Status::OK(); From b6d145c94c86b7e83cb92a5e5eb13cd8f5fa3b59 Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Fri, 10 Oct 2025 21:13:59 +0330 Subject: [PATCH 6/7] Unified Validator and Visitor --- cpp/src/arrow/sparse_tensor.cc | 378 ++++++++++++++++++++-- cpp/src/arrow/sparse_tensor.h | 6 + cpp/src/arrow/sparse_tensor_test.cc | 113 ++++++- cpp/src/arrow/tensor.cc | 2 +- cpp/src/arrow/tensor/converter_internal.h | 49 +-- cpp/src/arrow/tensor/coo_converter.cc | 60 +--- cpp/src/arrow/tensor/csf_converter.cc | 98 +----- cpp/src/arrow/tensor/csx_converter.cc | 87 +---- cpp/src/arrow/util/sparse_tensor_util.h | 94 ++++++ 9 files changed, 587 insertions(+), 300 deletions(-) create mode 100644 cpp/src/arrow/util/sparse_tensor_util.h diff --git a/cpp/src/arrow/sparse_tensor.cc b/cpp/src/arrow/sparse_tensor.cc index 900dbd6820547..f10ab4275ba1b 100644 --- a/cpp/src/arrow/sparse_tensor.cc +++ b/cpp/src/arrow/sparse_tensor.cc @@ -19,15 +19,18 @@ #include "arrow/tensor/converter_internal.h" #include +#include #include #include #include +#include #include "arrow/compare.h" #include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging_internal.h" #include "arrow/visit_type_inline.h" +#include "util/sparse_tensor_util.h" namespace arrow { @@ -337,25 +340,68 @@ void CheckSparseCSXIndexValidity(const std::shared_ptr& indptr_type, namespace { -inline Status CheckSparseCSFIndexValidity(const std::shared_ptr& indptr_type, - const std::shared_ptr& indices_type, - const int64_t num_indptrs, - const int64_t num_indices, - const int64_t axis_order_size) { +inline Status CheckSparseCSFIndexValidity( + const std::vector>& indptr, + const std::vector>& indices, + const std::vector& axis_order) { + auto indptr_type = indptr.front()->type(); + auto indices_type = indices.front()->type(); + if (!is_integer(indptr_type->id())) { return Status::TypeError("Type of SparseCSFIndex indptr must be integer"); } if (!is_integer(indices_type->id())) { return Status::TypeError("Type of SparseCSFIndex indices must be integer"); } - if (num_indptrs + 1 != num_indices) { + if (indptr.size() + 1 != indices.size()) { return Status::Invalid( "Length of indices must be equal to length of indptrs + 1 for SparseCSFIndex."); } - if (axis_order_size != num_indices) { + if (axis_order.size() != indices.size()) { return Status::Invalid( "Length of indices must be equal to number of dimensions for SparseCSFIndex."); } + + for (int64_t i = 1; i < static_cast(indptr.size()); i++) { + if (!indptr_type->Equals(indptr[i]->type())) { + return Status::Invalid("All index pointers must have the same data type"); + } + } + + for (int64_t i = 1; i < static_cast(indices.size()); i++) { + if (!indices_type->Equals(indices[i]->type())) { + return Status::Invalid("All indices must have the same data type"); + } + } + + for (const auto& tensor : indptr) { + RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, tensor->shape())); + } + + for (const auto& tensor : indices) { + RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, tensor->shape())); + } + + for (const auto& tensor : indptr) { + if (tensor->shape().size() != 1) { + return Status::Invalid("Each index pointer tensor must be 1-dimensional"); + } + } + + for (const auto& tensor : indices) { + if (tensor->shape().size() != 1) { + return Status::Invalid("Each index tensor must be 1-dimensional"); + } + } + + for (int64_t i = 1; i < static_cast(indptr.size()); i++) { + if (indptr[i]->size() != indices[i]->size() + 1) { + return Status::Invalid( + "Index pointer at dimension ", i, " must have size equal to indices[", i, + "] size plus one (got ", indptr[i]->size(), " and ", indices[i]->size(), ")"); + } + } + return Status::OK(); } @@ -368,6 +414,20 @@ Result> SparseCSFIndex::Make( const std::vector>& indptr_data, const std::vector>& indices_data) { int64_t ndim = axis_order.size(); + + if (axis_order.size() != indices_shapes.size()) { + return Status::Invalid("Mismatched axis_order size and indices_shapes size"); + } + + if (indices_shapes.size() != indices_data.size()) { + return Status::Invalid("Mismatched indices_shapes size and indices_data size"); + } + + if (indices_shapes.size() != indptr_data.size() + 1) { + return Status::Invalid( + "indices_shapes size must be one greater than indptr_data size"); + } + std::vector> indptr(ndim - 1); std::vector> indices(ndim); @@ -378,17 +438,7 @@ Result> SparseCSFIndex::Make( indices[i] = std::make_shared(indices_type, indices_data[i], std::vector({indices_shapes[i]})); - RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr_type, indices_type, indptr.size(), - indices.size(), axis_order.size())); - - for (auto tensor : indptr) { - RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indptr_type, tensor->shape())); - } - - for (auto tensor : indices) { - RETURN_NOT_OK(internal::CheckSparseIndexMaximumValue(indices_type, tensor->shape())); - } - + RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order)); return std::make_shared(indptr, indices, axis_order); } @@ -397,9 +447,7 @@ SparseCSFIndex::SparseCSFIndex(const std::vector>& indpt const std::vector>& indices, const std::vector& axis_order) : SparseIndexBase(), indptr_(indptr), indices_(indices), axis_order_(axis_order) { - ARROW_CHECK_OK(CheckSparseCSFIndexValidity(indptr_.front()->type(), - indices_.front()->type(), indptr_.size(), - indices_.size(), axis_order_.size())); + ARROW_CHECK_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order)); } std::string SparseCSFIndex::ToString() const { return std::string("SparseCSFIndex"); } @@ -475,4 +523,292 @@ Result> SparseTensor::ToTensor(MemoryPool* pool) const { } } +namespace { + +struct SparseTensorValidatorBase { + SparseTensorValidatorBase(const Tensor& tensor, const SparseTensor& sparse_tensor) + : tensor(tensor), sparse_tensor(sparse_tensor) {} + + template + Status ValidateValue(typename ValueType::c_type sparse_tensor_value, + typename ValueType::c_type tensor_value) { + if (!internal::is_not_zero(sparse_tensor_value)) { + return Status::Invalid("Sparse tensor values must be non-zero"); + } else if (sparse_tensor_value != tensor_value) { + if constexpr (is_floating_type::value) { + if (!std::isnan(tensor_value) || !std::isnan(sparse_tensor_value)) { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } else { + return Status::Invalid( + "Inconsistent values between sparse tensor and dense tensor"); + } + } + return Status::OK(); + } + + const Tensor& tensor; + const SparseTensor& sparse_tensor; +}; + +struct SparseCOOValidator : public SparseTensorValidatorBase { + using SparseTensorValidatorBase::SparseTensorValidatorBase; + + Status Validate() { + auto sparse_coo_index = + internal::checked_pointer_cast(sparse_tensor.sparse_index()); + auto indices = sparse_coo_index->indices(); + RETURN_NOT_OK(CheckSparseCOOIndexValidity(indices->type(), indices->shape(), + indices->strides())); + // Validate Values + return util::VisitCOOTensorType(*sparse_tensor.type(), *indices->type(), *this); + } + + template + Status operator()(const ValueType& value_type, const IndexType& index_type) { + return ValidateSparseCooTensorValues(value_type, index_type); + } + + template + Status ValidateSparseCooTensorValues(const ValueType&, const IndexType&) { + using IndexCType = typename IndexType::c_type; + using ValueCType = typename ValueType::c_type; + + auto sparse_coo_index = + internal::checked_pointer_cast(sparse_tensor.sparse_index()); + auto sparse_coo_values_buffer = sparse_tensor.data(); + + const auto& indices = sparse_coo_index->indices(); + const auto* indices_data = sparse_coo_index->indices()->data()->data_as(); + const auto* sparse_coo_values = sparse_coo_values_buffer->data_as(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + + if (indices->shape()[0] != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices->shape()[0], ") and dense tensor (", non_zero_count, + ")"); + } else if (indices->shape()[1] != static_cast(tensor.shape().size())) { + return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", + indices->shape()[1], ") and tensor shape (", + tensor.shape().size(), ")"); + } + + auto coord_size = indices->shape()[1]; + std::vector coord(coord_size); + for (int64_t i = 0; i < indices->shape()[0]; i++) { + for (int64_t j = 0; j < coord_size; j++) { + coord[j] = static_cast(indices_data[i * coord_size + j]); + } + ARROW_RETURN_NOT_OK( + ValidateValue(sparse_coo_values[i], tensor.Value(coord))); + } + + return Status::OK(); + } +}; + +template +struct SparseCSXValidator : public SparseTensorValidatorBase { + SparseCSXValidator(const Tensor& tensor, const SparseTensor& sparse_tensor) + : SparseTensorValidatorBase(tensor, sparse_tensor) { + sparse_csx_index = + internal::checked_pointer_cast(sparse_tensor.sparse_index()); + } + + Status Validate() { + auto indptr = sparse_csx_index->indptr(); + auto indices = sparse_csx_index->indices(); + ARROW_RETURN_NOT_OK( + internal::ValidateSparseCSXIndex(indptr->type(), indices->type(), indptr->shape(), + indices->shape(), sparse_csx_index->kTypeName)); + return util::VisitCSXType(*sparse_tensor.type(), *indices->type(), *indptr->type(), + *this); + } + + template + Status operator()(const ValueType& value_type, const IndexType& index_type, + const IndexPointerType& index_pointer_type) { + return ValidateSparseCSXTensorValues(value_type, index_type, index_pointer_type); + } + + template + Status ValidateSparseCSXTensorValues(const ValueType&, const IndexType&, + const IndexPointerType&) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + using IndexPointerCType = typename IndexPointerType::c_type; + auto axis = sparse_csx_index->kCompressedAxis; + + auto& indptr = sparse_csx_index->indptr(); + auto& indices = sparse_csx_index->indices(); + auto indptr_data = indptr->data()->template data_as(); + auto indices_data = indices->data()->template data_as(); + auto sparse_csx_values = sparse_tensor.data()->template data_as(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + if (indices->shape()[0] != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices->shape()[0], ") and dense tensor (", non_zero_count, + ")"); + } + + for (int64_t i = 0; i < indptr->size() - 1; ++i) { + const auto start = static_cast(indptr_data[i]); + const auto stop = static_cast(indptr_data[i + 1]); + std::vector coord(2); + for (int64_t j = start; j < stop; ++j) { + switch (axis) { + case internal::SparseMatrixCompressedAxis::ROW: + coord[0] = i; + coord[1] = static_cast(indices_data[j]); + break; + case internal::SparseMatrixCompressedAxis::COLUMN: + coord[0] = static_cast(indices_data[j]); + coord[1] = i; + break; + } + ARROW_RETURN_NOT_OK(ValidateValue(sparse_csx_values[j], + tensor.Value(coord))); + } + } + return Status::OK(); + } + + std::shared_ptr sparse_csx_index; +}; + +struct SparseCSFValidator : public SparseTensorValidatorBase { + SparseCSFValidator(const Tensor& tensor, const SparseTensor& sparse_tensor) + : SparseTensorValidatorBase(tensor, sparse_tensor) { + sparse_csf_index = + internal::checked_pointer_cast(sparse_tensor.sparse_index()); + } + + Status Validate() { + const auto& indptr = sparse_csf_index->indptr(); + const auto& indices = sparse_csf_index->indices(); + const auto& axis_order = sparse_csf_index->axis_order(); + + RETURN_NOT_OK(CheckSparseCSFIndexValidity(indptr, indices, axis_order)); + return util::VisitCSXType(*sparse_tensor.type(), *indices.front()->type(), + *indptr.front()->type(), *this); + } + + template + Status operator()(const ValueType& value_type, const IndexType& index_type, + const IndexPointerType& index_pointer_type) { + return ValidateSparseTensorCSFValues(value_type, index_type, index_pointer_type); + } + + template + Status ValidateSparseTensorCSFValues(const ValueType&, const IndexType&, + const IndexPointerType&) { + const auto& indices = sparse_csf_index->indices(); + + ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); + if (indices.back()->size() != non_zero_count) { + return Status::Invalid("Mismatch between non-zero count in sparse tensor (", + indices.back()->size(), ") and dense tensor (", + non_zero_count, ")"); + } else if (indices.size() != tensor.shape().size()) { + return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", + indices.size(), ") and tensor shape (", + tensor.shape().size(), ")"); + } else { + return CheckValues( + 0, 0, 0, sparse_csf_index->indptr()[0]->size() - 1); + } + } + + template + Status CheckValues(const int64_t dim, const int64_t dim_offset, const int64_t start, + const int64_t stop) { + using ValueCType = typename ValueType::c_type; + using IndexCType = typename IndexType::c_type; + using IndexPointerCType = typename IndexPointerType::c_type; + + const auto& indices = sparse_csf_index->indices(); + const auto& indptr = sparse_csf_index->indptr(); + const auto& axis_order = sparse_csf_index->axis_order(); + const auto* values = sparse_tensor.data()->data_as(); + auto ndim = indices.size(); + auto strides = tensor.strides(); + + const auto& cur_indices = indices[dim]; + const auto* indices_data = cur_indices->data()->data_as() + start; + + if (dim == static_cast(ndim) - 1) { + for (auto i = start; i < stop; ++i) { + auto index = static_cast(*indices_data); + const int64_t offset = dim_offset + index * strides[axis_order[dim]]; + + auto sparse_value = values[i]; + auto tensor_value = + *reinterpret_cast(tensor.raw_data() + offset); + ARROW_RETURN_NOT_OK(ValidateValue(sparse_value, tensor_value)); + ++indices_data; + } + } else { + const auto& cur_indptr = indptr[dim]; + const auto* indptr_data = cur_indptr->data()->data_as() + start; + + for (int64_t i = start; i < stop; ++i) { + const int64_t index = *indices_data; + int64_t offset = dim_offset + index * strides[axis_order[dim]]; + auto next_start = static_cast(*indptr_data); + auto next_stop = static_cast(*(indptr_data + 1)); + + ARROW_RETURN_NOT_OK((CheckValues( + dim + 1, offset, next_start, next_stop))); + + ++indices_data; + ++indptr_data; + } + } + return Status::OK(); + } + + std::shared_ptr sparse_csf_index; +}; + +} // namespace + +Status SparseTensor::Validate(const Tensor& tensor) const { + if (!is_tensor_supported(type_->id())) { + return Status::NotImplemented("SparseTensor values only support numeric types"); + } else if (!tensor.type()->Equals(type_)) { + return Status::Invalid("SparseTensor value types do not match"); + } else if (tensor.shape() != shape_) { + return Status::Invalid("SparseTensor shape do not match"); + } else if (tensor.dim_names() != dim_names_) { + return Status::Invalid("SparseTensor dim_names do not match"); + } + + switch (format_id()) { + case SparseTensorFormat::COO: { + SparseCOOValidator validator(tensor, *this); + return validator.Validate(); + } + + case SparseTensorFormat::CSR: { + SparseCSXValidator validator(tensor, *this); + return validator.Validate(); + } + + case SparseTensorFormat::CSC: { + SparseCSXValidator validator(tensor, *this); + return validator.Validate(); + } + + case SparseTensorFormat::CSF: { + SparseCSFValidator validator(tensor, *this); + return validator.Validate(); + } + default: + return Status::Invalid("Invalid sparse tensor format"); + } +} + } // namespace arrow diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 5faae16bb25cc..57594868c6140 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -508,6 +508,10 @@ class ARROW_EXPORT SparseTensor { return ToTensor(default_memory_pool()); } + /// \brief Check whether the sparse tensor is valid and is the + /// correct compressed form of the given tensor. + Status Validate(const Tensor& tensor) const; + protected: // Constructor with all attributes SparseTensor(const std::shared_ptr& type, const std::shared_ptr& data, @@ -588,6 +592,8 @@ class SparseTensorImpl : public SparseTensor { ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor( tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index, &data)); + // TODO CHECK SparseTensorCreation. + return std::make_shared>( internal::checked_pointer_cast(sparse_index), tensor.type(), data, tensor.shape(), tensor.dim_names_); diff --git a/cpp/src/arrow/sparse_tensor_test.cc b/cpp/src/arrow/sparse_tensor_test.cc index 8ba4417aa615b..77adcca3b7459 100644 --- a/cpp/src/arrow/sparse_tensor_test.cc +++ b/cpp/src/arrow/sparse_tensor_test.cc @@ -216,6 +216,58 @@ TEST(TestSparseCSCIndex, Make) { indices_data)); } +TEST(TestSparseCSFIndex, Make) { + std::vector axis_order = {0, 1, 2}; + + auto indpr_0_buffer = Buffer::FromVector(std::vector{0, 2, 4}); + auto indptr_1_buffer = Buffer::FromVector(std::vector{0, 2, 4, 6, 8}); + + auto indices_0_buffer = Buffer::FromVector(std::vector{1, 2}); + auto indices_1_buffer = Buffer::FromVector(std::vector{1, 2, 3, 4}); + auto indices_2_buffer = + Buffer::FromVector(std::vector{1, 2, 3, 4, 5, 6, 7, 8}); + ASSERT_OK(SparseCSFIndex::Make(int32(), int32(), {2, 4, 8}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer})); + + // Non-integer indptr type + ASSERT_RAISES(TypeError, SparseCSFIndex::Make( + float32(), int32(), {2, 4, 8}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer})); + + // Non-integer indices type + ASSERT_RAISES(TypeError, SparseCSFIndex::Make( + int32(), float32(), {2, 4, 8}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer})); + + // axis_order length mismatch + ASSERT_RAISES( + Invalid, SparseCSFIndex::Make( + int32(), int32(), {2, 4, 8}, {0, 2}, {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer})); + + // indptr_data length mismatch + ASSERT_RAISES(Invalid, SparseCSFIndex::Make( + int32(), int32(), {2, 4}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer})); + + // indices_data length mismatch + ASSERT_RAISES(Invalid, SparseCSFIndex::Make(int32(), int32(), {2, 4, 8}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, + indices_2_buffer, indices_2_buffer})); + // The shape value exceeds the limit of the type + ASSERT_RAISES( + Invalid, + SparseCSFIndex::Make( + int32(), int32(), {2, 4, std::numeric_limits::max()}, {0, 1, 2}, + {indpr_0_buffer, indptr_1_buffer}, + {indices_0_buffer, indices_1_buffer, indices_2_buffer, indices_2_buffer})); +} + template class TestSparseTensorBase : public ::testing::Test { protected: @@ -255,6 +307,7 @@ class TestSparseCOOTensorBase : public TestSparseTensorBase { ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_, SparseCOOTensor::Make( dense_tensor, TypeTraits::type_singleton())); + ASSERT_OK(sparse_tensor_from_dense_->Validate(dense_tensor)); } protected: @@ -296,6 +349,7 @@ TEST_F(TestSparseCOOTensor, CreationFromZeroTensor) { Tensor::Make(int64(), Buffer::Wrap(dense_values), this->shape_)); ASSERT_OK_AND_ASSIGN(std::shared_ptr st_zero, SparseCOOTensor::Make(*t_zero, int64())); + ASSERT_OK(st_zero->Validate(*t_zero)); ASSERT_EQ(0, st_zero->non_zero_length()); ASSERT_EQ(dense_size, st_zero->size()); @@ -336,6 +390,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNumericTensor1D) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(dense_vector)); + ASSERT_OK(st->Validate(dense_vector)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -362,6 +417,7 @@ TEST_F(TestSparseCOOTensor, CreationFromTensor) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(tensor)); + ASSERT_OK(st->Validate(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -387,6 +443,7 @@ TEST_F(TestSparseCOOTensor, CreationFromNonContiguousTensor) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN(st, SparseCOOTensor::Make(tensor)); + ASSERT_OK(st->Validate(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -406,6 +463,7 @@ TEST_F(TestSparseCOOTensor, TestToTensor) { std::shared_ptr sparse_tensor; ASSERT_OK_AND_ASSIGN(sparse_tensor, SparseCOOTensor::Make(tensor)); + ASSERT_OK(sparse_tensor->Validate(tensor)); ASSERT_EQ(5, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -430,6 +488,7 @@ class TestSparseCOOTensorCreationFromNegativeZero ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_coo_tensor->Validate(*dense_tensor)); ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); auto si = internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); @@ -454,6 +513,7 @@ class TestSparseCOOTensorCreationFromNegativeZero ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_coo_tensor->Validate(*dense_tensor)); ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); auto si = internal::checked_pointer_cast(sparse_coo_tensor->sparse_index()); @@ -480,6 +540,7 @@ class TestSparseCOOTensorCreationFromNegativeZero {type_->byte_width() * 5, type_->byte_width()})); ASSERT_OK_AND_ASSIGN(auto sparse_coo_tensor, SparseCOOTensor::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_coo_tensor->Validate(*dense_tensor)); ASSERT_EQ(12, sparse_coo_tensor->size()); ASSERT_EQ(2, sparse_coo_tensor->non_zero_length()); auto si = @@ -546,7 +607,9 @@ TYPED_TEST_P(TestIntegerSparseCOOTensorEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st1, SparseCOOTensor::Make(*this->tensor1_)); ASSERT_OK_AND_ASSIGN(st2, SparseCOOTensor::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCOOTensor::Make(*this->tensor1_)); - + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); ASSERT_TRUE(st1->Equals(*st3)); @@ -581,6 +644,9 @@ TYPED_TEST_P(TestFloatingSparseCOOTensorEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st1, SparseCOOTensor::Make(*this->tensor1_)); ASSERT_OK_AND_ASSIGN(st2, SparseCOOTensor::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCOOTensor::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); @@ -834,6 +900,7 @@ class TestSparseCSRMatrixBase : public TestSparseTensorBase { ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_, SparseCSRMatrix::Make( dense_tensor, TypeTraits::type_singleton())); + ASSERT_OK(sparse_tensor_from_dense_->Validate(dense_tensor)); } protected: @@ -852,6 +919,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromZeroTensor) { Tensor::Make(int64(), Buffer::Wrap(dense_values), this->shape_)); ASSERT_OK_AND_ASSIGN(std::shared_ptr st_zero, SparseCSRMatrix::Make(*t_zero, int64())); + ASSERT_OK(st_zero->Validate(*t_zero)); ASSERT_EQ(0, st_zero->non_zero_length()); ASSERT_EQ(dense_size, st_zero->size()); @@ -866,6 +934,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNumericTensor2D) { std::shared_ptr st1; ASSERT_OK_AND_ASSIGN(st1, SparseCSRMatrix::Make(tensor)); + ASSERT_OK(st1->Validate(tensor)); auto st2 = this->sparse_tensor_from_dense_; @@ -918,6 +987,7 @@ TEST_F(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN(st, SparseCSRMatrix::Make(tensor)); + ASSERT_OK(st->Validate(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -957,6 +1027,7 @@ TEST_F(TestSparseCSRMatrix, TestToTensor) { std::shared_ptr sparse_tensor; ASSERT_OK_AND_ASSIGN(sparse_tensor, SparseCSRMatrix::Make(tensor)); + ASSERT_OK(sparse_tensor->Validate(tensor)); ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -987,6 +1058,7 @@ class TestSparseCSRTensorCreationFromNegativeZero ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); ASSERT_OK_AND_ASSIGN(auto sparse_csr_tensor, SparseCSRMatrix::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_csr_tensor->Validate(*dense_tensor)); ASSERT_EQ(2, sparse_csr_tensor->non_zero_length()); auto si = internal::checked_pointer_cast(sparse_csr_tensor->sparse_index()); @@ -1048,6 +1120,9 @@ TYPED_TEST_P(TestIntegerSparseCSRMatrixEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st1, SparseCSRMatrix::Make(*this->tensor1_)); ASSERT_OK_AND_ASSIGN(st2, SparseCSRMatrix::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSRMatrix::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); @@ -1083,6 +1158,9 @@ TYPED_TEST_P(TestFloatingSparseCSRMatrixEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st1, SparseCSRMatrix::Make(*this->tensor1_)); ASSERT_OK_AND_ASSIGN(st2, SparseCSRMatrix::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSRMatrix::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); @@ -1215,6 +1293,7 @@ class TestSparseCSCMatrixBase : public TestSparseTensorBase { ASSERT_OK_AND_ASSIGN(sparse_tensor_from_dense_, SparseCSCMatrix::Make( dense_tensor, TypeTraits::type_singleton())); + ASSERT_OK(sparse_tensor_from_dense_->Validate(dense_tensor)); } protected: @@ -1233,7 +1312,7 @@ TEST_F(TestSparseCSCMatrix, CreationFromZeroTensor) { Tensor::Make(int64(), Buffer::Wrap(dense_values), this->shape_)); ASSERT_OK_AND_ASSIGN(std::shared_ptr st_zero, SparseCSCMatrix::Make(*t_zero, int64())); - + ASSERT_OK(st_zero->Validate(*t_zero)); ASSERT_EQ(0, st_zero->non_zero_length()); ASSERT_EQ(dense_size, st_zero->size()); @@ -1247,7 +1326,7 @@ TEST_F(TestSparseCSCMatrix, CreationFromNumericTensor2D) { std::shared_ptr st1; ASSERT_OK_AND_ASSIGN(st1, SparseCSCMatrix::Make(tensor)); - + ASSERT_OK(st1->Validate(tensor)); auto st2 = this->sparse_tensor_from_dense_; CheckSparseIndexFormatType(SparseTensorFormat::CSC, *st1); @@ -1299,7 +1378,7 @@ TEST_F(TestSparseCSCMatrix, CreationFromNonContiguousTensor) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN(st, SparseCSCMatrix::Make(tensor)); - + ASSERT_OK(st->Validate(tensor)); ASSERT_EQ(12, st->non_zero_length()); ASSERT_TRUE(st->is_mutable()); @@ -1339,6 +1418,7 @@ TEST_F(TestSparseCSCMatrix, TestToTensor) { std::shared_ptr sparse_tensor; ASSERT_OK_AND_ASSIGN(sparse_tensor, SparseCSCMatrix::Make(tensor)); + ASSERT_OK(sparse_tensor->Validate(tensor)); ASSERT_EQ(7, sparse_tensor->non_zero_length()); ASSERT_TRUE(sparse_tensor->is_mutable()); @@ -1368,6 +1448,7 @@ class TestSparseCSCTensorCreationFromNegativeZero ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); ASSERT_OK_AND_ASSIGN(auto sparse_csc_tensor, SparseCSCMatrix::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_csc_tensor->Validate(*dense_tensor)); ASSERT_EQ(2, sparse_csc_tensor->non_zero_length()); auto si = internal::checked_pointer_cast(sparse_csc_tensor->sparse_index()); @@ -1430,6 +1511,10 @@ TYPED_TEST_P(TestIntegerSparseCSCMatrixEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st2, SparseCSCMatrix::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSCMatrix::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); + ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); ASSERT_TRUE(st1->Equals(*st3)); @@ -1465,6 +1550,10 @@ TYPED_TEST_P(TestFloatingSparseCSCMatrixEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st2, SparseCSCMatrix::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSCMatrix::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); + ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); ASSERT_TRUE(st1->Equals(*st3)); @@ -1551,6 +1640,10 @@ TYPED_TEST_P(TestIntegerSparseCSFTensorEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st2, SparseCSFTensor::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSFTensor::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); + ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); ASSERT_TRUE(st1->Equals(*st3)); @@ -1586,6 +1679,10 @@ TYPED_TEST_P(TestFloatingSparseCSFTensorEquality, TestEquality) { ASSERT_OK_AND_ASSIGN(st2, SparseCSFTensor::Make(*this->tensor2_)); ASSERT_OK_AND_ASSIGN(st3, SparseCSFTensor::Make(*this->tensor1_)); + ASSERT_OK(st1->Validate(*this->tensor1_)); + ASSERT_OK(st2->Validate(*this->tensor2_)); + ASSERT_OK(st3->Validate(*this->tensor1_)); + ASSERT_TRUE(st1->Equals(*st1)); ASSERT_FALSE(st1->Equals(*st2)); ASSERT_TRUE(st1->Equals(*st3)); @@ -1639,6 +1736,7 @@ class TestSparseCSFTensorBase : public TestSparseTensorBase { sparse_tensor_from_dense_, SparseCSFTensor::Make(dense_tensor_, TypeTraits::type_singleton())); + ASSERT_OK(sparse_tensor_from_dense_->Validate(dense_tensor_)); } protected: @@ -1660,6 +1758,7 @@ TEST_F(TestSparseCSFTensor, CreationFromZeroTensor) { ASSERT_OK_AND_ASSIGN(std::shared_ptr st_zero, SparseCSFTensor::Make(*t_zero, int64())); + ASSERT_OK(st_zero->Validate(*t_zero)); ASSERT_EQ(0, st_zero->non_zero_length()); ASSERT_EQ(dense_size, st_zero->size()); @@ -1688,6 +1787,7 @@ class TestSparseCSFTensorCreationFromNegativeZero ASSERT_OK_AND_ASSIGN(auto dense_tensor, Tensor::Make(type_, buffer, shape)); ASSERT_OK_AND_ASSIGN(auto sparse_csf_tensor, SparseCSFTensor::Make(*dense_tensor, int64())); + ASSERT_OK(sparse_csf_tensor->Validate(*dense_tensor)); ASSERT_EQ(2, sparse_csf_tensor->non_zero_length()); auto si = internal::checked_pointer_cast(sparse_csf_tensor->sparse_index()); @@ -1799,7 +1899,7 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestRoundTrip) { std::shared_ptr st; ASSERT_OK_AND_ASSIGN( st, SparseCSFTensor::Make(*dt, TypeTraits::type_singleton())); - + ASSERT_OK(st->Validate(*dt)); ASSERT_TRUE(st->Equals(*this->sparse_tensor_from_dense_)); } @@ -1822,6 +1922,7 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestAlternativeAxisOrder) { {0, 3, 1, 3, 5}}; auto si_1 = this->MakeSparseCSFIndex(axis_order_1, indptr_values_1, indices_values_1); auto st_1 = this->MakeSparseTensor(si_1, sparse_values_1, shape, dim_names); + ASSERT_OK(st_1->Validate(tensor)); // Axis order 2 std::vector axis_order_2 = {1, 0}; @@ -1831,6 +1932,7 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestAlternativeAxisOrder) { {0, 1, 0, 3, 3}}; auto si_2 = this->MakeSparseCSFIndex(axis_order_2, indptr_values_2, indices_values_2); auto st_2 = this->MakeSparseTensor(si_2, sparse_values_2, shape, dim_names); + ASSERT_OK(st_2->Validate(tensor)); std::shared_ptr dt_1, dt_2; ASSERT_OK_AND_ASSIGN(dt_1, st_1->ToTensor()); @@ -1862,6 +1964,7 @@ TYPED_TEST_P(TestSparseCSFTensorForIndexValueType, TestNonAscendingShape) { ASSERT_OK_AND_ASSIGN( sparse_tensor, SparseCSFTensor::Make(dense_tensor, TypeTraits::type_singleton())); + ASSERT_OK(sparse_tensor->Validate(dense_tensor)); std::vector> indptr_values = { {0, 1, 3}, {0, 2, 4, 7}, {0, 1, 2, 3, 4, 6, 7, 8}}; diff --git a/cpp/src/arrow/tensor.cc b/cpp/src/arrow/tensor.cc index 7483d079b0b66..2f6bcda037f8f 100644 --- a/cpp/src/arrow/tensor.cc +++ b/cpp/src/arrow/tensor.cc @@ -490,7 +490,7 @@ int64_t StridedTensorCountNonZero(int dim_index, int64_t offset, const Tensor& t if (dim_index == tensor.ndim() - 1) { for (int64_t i = 0; i < tensor.shape()[dim_index]; ++i) { const auto* ptr = tensor.raw_data() + offset + i * tensor.strides()[dim_index]; - auto& elem = *reinterpret_cast(ptr); + auto elem = *reinterpret_cast(ptr); if (internal::is_not_zero(elem)) { ++nnz; } diff --git a/cpp/src/arrow/tensor/converter_internal.h b/cpp/src/arrow/tensor/converter_internal.h index 74e02cc015a45..12eeb33ceb1b3 100644 --- a/cpp/src/arrow/tensor/converter_internal.h +++ b/cpp/src/arrow/tensor/converter_internal.h @@ -24,9 +24,6 @@ namespace arrow { -template -Status VisitTypeInline(const DataType& type, VISITOR* visitor, ARGS&&... args); - namespace internal { struct SparseTensorConverterMixin { @@ -71,52 +68,14 @@ Result> MakeTensorFromSparseCSFTensor( template struct ConverterVisitor { explicit ConverterVisitor(Converter& converter) : converter(converter) {} - template - Status operator()(const ValueType& value, const IndexType& index_type) { - return converter.Convert(value, index_type); - } - Converter& converter; -}; - -struct ValueTypeVisitor { - template - enable_if_number Visit(const ValueType& value_type, - const IndexType& index_type, - Function&& function) { - return function(value_type, index_type); + template + Status operator()(Args&&... args) { + return converter.Convert(std::forward(args)...); } - template - Status Visit(const DataType& value_type, const IndexType&, Function&&) { - return Status::Invalid("Invalid value type: ", value_type.name(), - ". Expected a number."); - } -}; - -struct IndexAndValueTypeVisitor { - template - enable_if_integer Visit(const IndexType& index_type, - const DataType& value_type, - Function&& function) { - ValueTypeVisitor visitor; - return VisitTypeInline(value_type, &visitor, index_type, - std::forward(function)); - } - - template - Status Visit(const DataType& type, const DataType&, Function&&) { - return Status::Invalid("Invalid index type: ", type.name(), ". Expected integer."); - } + Converter& converter; }; -template -Status VisitValueAndIndexType(const DataType& value_type, const DataType& index_type, - Function&& function) { - IndexAndValueTypeVisitor visitor; - return VisitTypeInline(index_type, &visitor, value_type, - std::forward(function)); -} - } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/tensor/coo_converter.cc b/cpp/src/arrow/tensor/coo_converter.cc index d7bc64d227e08..d8f908138f837 100644 --- a/cpp/src/arrow/tensor/coo_converter.cc +++ b/cpp/src/arrow/tensor/coo_converter.cc @@ -18,7 +18,6 @@ #include "arrow/tensor/converter_internal.h" #include -#include #include #include #include @@ -28,11 +27,10 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" -#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging_internal.h" #include "arrow/util/macros.h" -#include "arrow/visit_type_inline.h" +#include "arrow/util/sparse_tensor_util.h" namespace arrow { @@ -42,57 +40,6 @@ namespace internal { namespace { -template -Status ValidateSparseCooTensorCreation(const SparseCOOIndex& sparse_coo_index, - const Buffer& sparse_coo_values_buffer, - const Tensor& tensor) { - using IndexCType = typename IndexType::c_type; - using ValueCType = typename ValueType::c_type; - - const auto& indices = sparse_coo_index.indices(); - const auto* indices_data = sparse_coo_index.indices()->data()->data_as(); - const auto* sparse_coo_values = sparse_coo_values_buffer.data_as(); - - ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); - - if (indices->shape()[0] != non_zero_count) { - return Status::Invalid("Mismatch between non-zero count in sparse tensor (", - indices->shape()[0], ") and dense tensor (", non_zero_count, - ")"); - } else if (indices->shape()[1] != static_cast(tensor.shape().size())) { - return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", - indices->shape()[1], ") and tensor shape (", - tensor.shape().size(), ")"); - } - - auto coord_size = indices->shape()[1]; - std::vector coord(coord_size); - for (int64_t i = 0; i < indices->shape()[0]; i++) { - if (!is_not_zero(sparse_coo_values[i])) { - return Status::Invalid("Sparse tensor values must be non-zero"); - } - - for (int64_t j = 0; j < coord_size; j++) { - coord[j] = static_cast(indices_data[i * coord_size + j]); - } - - if (sparse_coo_values[i] != tensor.Value(coord)) { - if constexpr (is_floating_type::value) { - if (!std::isnan(tensor.Value(coord)) || - !std::isnan(sparse_coo_values[i])) { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } else { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } - } - - return Status::OK(); -} - template inline void IncrementRowMajorIndex(std::vector& coord, const std::vector& shape) { @@ -265,8 +212,6 @@ class SparseCOOTensorConverter { indices_shape, indices_strides); ARROW_ASSIGN_OR_RAISE(sparse_index, SparseCOOIndex::Make(coords, true)); data = std::move(values_buffer); - DCHECK_OK((ValidateSparseCooTensorCreation(*sparse_index, *data, - tensor_))); return Status::OK(); } @@ -328,7 +273,8 @@ Status MakeSparseCOOTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_data) { SparseCOOTensorConverter converter(tensor, index_value_type, pool); ConverterVisitor visitor{converter}; - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); + ARROW_RETURN_NOT_OK( + util::VisitCOOTensorType(*tensor.type(), *index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/tensor/csf_converter.cc b/cpp/src/arrow/tensor/csf_converter.cc index b1abff26b1885..e4c77a44fe50a 100644 --- a/cpp/src/arrow/tensor/csf_converter.cc +++ b/cpp/src/arrow/tensor/csf_converter.cc @@ -18,7 +18,6 @@ #include "arrow/tensor/converter_internal.h" #include -#include #include #include #include @@ -30,11 +29,10 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" -#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging_internal.h" #include "arrow/util/sort_internal.h" -#include "arrow/visit_type_inline.h" +#include "arrow/util/sparse_tensor_util.h" namespace arrow { @@ -58,89 +56,6 @@ inline void IncrementIndex(std::vector& coord, const std::vector -Status CheckValues(const SparseCSFIndex& sparse_csf_index, - const typename ValueType::c_type* values, const Tensor& tensor, - const int64_t dim, const int64_t dim_offset, const int64_t start, - const int64_t stop) { - using ValueCType = typename ValueType::c_type; - using IndexCType = typename IndexType::c_type; - - const auto& indices = sparse_csf_index.indices(); - const auto& indptr = sparse_csf_index.indptr(); - const auto& axis_order = sparse_csf_index.axis_order(); - auto ndim = indices.size(); - auto strides = tensor.strides(); - - const auto& cur_indices = indices[dim]; - const auto* indices_data = cur_indices->data()->data_as() + start; - - if (dim == static_cast(ndim) - 1) { - for (auto i = start; i < stop; ++i) { - auto index = static_cast(*indices_data); - const int64_t offset = dim_offset + index * strides[axis_order[dim]]; - - auto sparse_value = values[i]; - auto tensor_value = - *reinterpret_cast(tensor.raw_data() + offset); - if (!is_not_zero(sparse_value)) { - return Status::Invalid("Sparse tensor values must be non-zero"); - } else if (sparse_value != tensor_value) { - if constexpr (is_floating_type::value) { - if (!std::isnan(tensor_value) || !std::isnan(sparse_value)) { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } else { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } - ++indices_data; - } - } else { - const auto& cur_indptr = indptr[dim]; - const auto* indptr_data = cur_indptr->data()->data_as() + start; - - for (int64_t i = start; i < stop; ++i) { - const int64_t index = *indices_data; - int64_t offset = dim_offset + index * strides[axis_order[dim]]; - auto next_start = static_cast(*indptr_data); - auto next_stop = static_cast(*(indptr_data + 1)); - - ARROW_RETURN_NOT_OK((CheckValues( - sparse_csf_index, values, tensor, dim + 1, offset, next_start, next_stop))); - - ++indices_data; - ++indptr_data; - } - } - return Status::OK(); -} - -template -Status ValidateSparseTensorCSFCreation(const SparseIndex& sparse_index, - const Buffer& values_buffer, - const Tensor& tensor) { - auto sparse_csf_index = checked_cast(sparse_index); - const auto* values = values_buffer.data_as(); - const auto& indices = sparse_csf_index.indices(); - - ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); - if (indices.back()->size() != non_zero_count) { - return Status::Invalid("Mismatch between non-zero count in sparse tensor (", - indices.back()->size(), ") and dense tensor (", non_zero_count, - ")"); - } else if (indices.size() != tensor.shape().size()) { - return Status::Invalid("Mismatch between coordinate dimension in sparse tensor (", - indices.size(), ") and tensor shape (", tensor.shape().size(), - ")"); - } else { - return CheckValues(sparse_csf_index, values, tensor, 0, 0, 0, - sparse_csf_index.indptr()[0]->size() - 1); - } -} - // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSFIndex @@ -151,8 +66,10 @@ class SparseCSFTensorConverter { MemoryPool* pool) : tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} - template - Status Convert(const ValueType&, const IndexType&) { + // Note: The same type is considered for both indices and indptr during + // tensor-to-CSF-tensor conversion. + template + Status Convert(const ValueType&, const IndexType&, const IndexPointerType&) { using ValueCType = typename ValueType::c_type; using IndexCType = typename IndexType::c_type; RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_, @@ -235,8 +152,6 @@ class SparseCSFTensorConverter { ARROW_ASSIGN_OR_RAISE( sparse_index, SparseCSFIndex::Make(index_value_type_, indices_shapes, axis_order, indptr_buffers, indices_buffers)); - DCHECK_OK((ValidateSparseTensorCSFCreation(*sparse_index, *data, - tensor_))); return Status::OK(); } @@ -353,7 +268,8 @@ Status MakeSparseCSFTensorFromTensor(const Tensor& tensor, std::shared_ptr* out_data) { SparseCSFTensorConverter converter(tensor, index_value_type, pool); ConverterVisitor visitor{converter}; - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); + ARROW_RETURN_NOT_OK( + util::VisitCSXType(*tensor.type(), *index_value_type, *index_value_type, visitor)); *out_sparse_index = checked_pointer_cast(converter.sparse_index); *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/tensor/csx_converter.cc b/cpp/src/arrow/tensor/csx_converter.cc index 0af54cc3a3e4e..de1f3c31eb145 100644 --- a/cpp/src/arrow/tensor/csx_converter.cc +++ b/cpp/src/arrow/tensor/csx_converter.cc @@ -17,7 +17,6 @@ #include "arrow/tensor/converter_internal.h" -#include #include #include #include @@ -27,10 +26,9 @@ #include "arrow/status.h" #include "arrow/tensor.h" #include "arrow/type.h" -#include "arrow/type_traits.h" #include "arrow/util/checked_cast.h" #include "arrow/util/logging_internal.h" -#include "arrow/visit_type_inline.h" +#include "arrow/util/sparse_tensor_util.h" namespace arrow { @@ -40,78 +38,6 @@ namespace internal { namespace { -template -Status ValidateSparseCSXTensorCreation(const SparseIndexType& sparse_csx_index, - const Buffer& values_buffer, - const Tensor& tensor) { - using ValueCType = typename ValueType::c_type; - using IndexCType = typename IndexType::c_type; - auto axis = sparse_csx_index.kCompressedAxis; - - auto& indptr = sparse_csx_index.indptr(); - auto& indices = sparse_csx_index.indices(); - auto indptr_data = indptr->data()->template data_as(); - auto indices_data = indices->data()->template data_as(); - auto sparse_csx_values = values_buffer.data_as(); - - ARROW_ASSIGN_OR_RAISE(auto non_zero_count, tensor.CountNonZero()); - if (indices->shape()[0] != non_zero_count) { - return Status::Invalid("Mismatch between non-zero count in sparse tensor (", - indices->shape()[0], ") and dense tensor (", non_zero_count, - ")"); - } - - for (int64_t i = 0; i < indptr->size() - 1; ++i) { - const auto start = static_cast(indptr_data[i]); - const auto stop = static_cast(indptr_data[i + 1]); - std::vector coord(2); - for (int64_t j = start; j < stop; ++j) { - if (!is_not_zero(sparse_csx_values[j])) { - return Status::Invalid("Sparse tensor values must be non-zero"); - } - - switch (axis) { - case SparseMatrixCompressedAxis::ROW: - coord[0] = i; - coord[1] = static_cast(indices_data[j]); - break; - case SparseMatrixCompressedAxis::COLUMN: - coord[0] = static_cast(indices_data[j]); - coord[1] = i; - break; - } - if (sparse_csx_values[j] != tensor.Value(coord)) { - if constexpr (is_floating_type::value) { - if (!std::isnan(sparse_csx_values[j]) || - !std::isnan(tensor.Value(coord))) { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } else { - return Status::Invalid( - "Inconsistent values between sparse tensor and dense tensor"); - } - } - } - } - return Status::OK(); -} - -template -Status ValidateSparseCSXTensorCreation(const SparseIndex& sparse_index, - const Buffer& values_buffer, - const Tensor& tensor) { - if (sparse_index.format_id() == SparseTensorFormat::CSC) { - auto sparse_csc_index = checked_cast(sparse_index); - return ValidateSparseCSXTensorCreation( - sparse_csc_index, values_buffer, tensor); - } else { - auto sparse_csr_index = checked_cast(sparse_index); - return ValidateSparseCSXTensorCreation( - sparse_csr_index, values_buffer, tensor); - } -} - // ---------------------------------------------------------------------- // SparseTensorConverter for SparseCSRIndex @@ -122,8 +48,10 @@ class SparseCSXMatrixConverter { MemoryPool* pool) : axis_(axis), tensor_(tensor), index_value_type_(index_value_type), pool_(pool) {} - template - Status Convert(const ValueType&, const IndexType&) { + // Note: The same type is considered for both indices and indptr during + // tensor-to-CSX-tensor conversion. + template + Status Convert(const ValueType&, const IndexType&, const IndexPointerType&) { RETURN_NOT_OK(::arrow::internal::CheckSparseIndexMaximumValue(index_value_type_, tensor_.shape())); using ValueCType = typename ValueType::c_type; @@ -193,8 +121,6 @@ class SparseCSXMatrixConverter { sparse_index = std::make_shared(indptr_tensor, indices_tensor); } data = std::move(values_buffer); - DCHECK_OK((ValidateSparseCSXTensorCreation(*sparse_index, *data, - tensor_))); return Status::OK(); } @@ -218,7 +144,8 @@ Status MakeSparseCSXMatrixFromTensor(SparseMatrixCompressedAxis axis, std::shared_ptr* out_data) { SparseCSXMatrixConverter converter(axis, tensor, index_value_type, pool); ConverterVisitor visitor(converter); - ARROW_RETURN_NOT_OK(VisitValueAndIndexType(*tensor.type(), *index_value_type, visitor)); + ARROW_RETURN_NOT_OK( + util::VisitCSXType(*tensor.type(), *index_value_type, *index_value_type, visitor)); *out_sparse_index = converter.sparse_index; *out_data = converter.data; return Status::OK(); diff --git a/cpp/src/arrow/util/sparse_tensor_util.h b/cpp/src/arrow/util/sparse_tensor_util.h new file mode 100644 index 0000000000000..4665c94a2a3e0 --- /dev/null +++ b/cpp/src/arrow/util/sparse_tensor_util.h @@ -0,0 +1,94 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/sparse_tensor.h" +#include "arrow/status.h" +#include "arrow/type.h" +#include "arrow/visit_type_inline.h" + +namespace arrow::util { + +namespace detail { + +struct ValueVisitor { + template + enable_if_number Visit(const ValueType& value_type, + Function&& function, Args&&... args) { + return function(value_type, std::forward(args)...); + } + + template + Status Visit(const DataType& value_type, Args&&... args) { + return Status::TypeError("Invalid value type: ", value_type.name(), + ". Expected a number."); + } +}; + +struct IndexVisitor { + template + enable_if_integer Visit(const IndexType& index_type, + Function&& function, + const DataType& value_type, Args&&... args) { + ValueVisitor visitor; + return VisitTypeInline(value_type, &visitor, std::forward(function), + index_type, std::forward(args)...); + } + + template + Status Visit(const DataType& index_type, Args&&...) { + return Status::TypeError("Invalid index pointer type: ", index_type.name(), + ". Expected integer."); + } +}; + +struct IndexPointerVisitor { + template + enable_if_integer Visit( + const IndexPointerType& index_pointer_type, Function&& function, + const DataType& index_type, const DataType& value_type) { + IndexVisitor visitor; + return VisitTypeInline(index_type, &visitor, std::forward(function), + value_type, index_pointer_type); + } + + template + Status Visit(const DataType& index_pointer_type, Args&&...) { + return Status::TypeError("Invalid index pointer type: ", index_pointer_type.name(), + ". Expected integer."); + } +}; +} // namespace detail + +template +inline Status VisitCSXType(const DataType& value_type, const DataType& index_type, + const DataType& indptr_type, Function&& function) { + detail::IndexPointerVisitor visitor; + return VisitTypeInline(indptr_type, &visitor, std::forward(function), + index_type, value_type); +} + +template +inline Status VisitCOOTensorType(const DataType& value_type, const DataType& index_type, + Function&& function) { + detail::IndexVisitor visitor; + return VisitTypeInline(index_type, &visitor, std::forward(function), + value_type); +} + +} // namespace arrow::util From deb9a895bbc77d22159161dea5a751899fa21c7c Mon Sep 17 00:00:00 2001 From: arash andishgar Date: Fri, 10 Oct 2025 22:19:25 +0330 Subject: [PATCH 7/7] fix typo --- cpp/src/arrow/sparse_tensor.h | 2 -- cpp/src/arrow/util/sparse_tensor_util.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/sparse_tensor.h b/cpp/src/arrow/sparse_tensor.h index 57594868c6140..5e80ca46cfcee 100644 --- a/cpp/src/arrow/sparse_tensor.h +++ b/cpp/src/arrow/sparse_tensor.h @@ -592,8 +592,6 @@ class SparseTensorImpl : public SparseTensor { ARROW_RETURN_NOT_OK(internal::MakeSparseTensorFromTensor( tensor, SparseIndexType::format_id, index_value_type, pool, &sparse_index, &data)); - // TODO CHECK SparseTensorCreation. - return std::make_shared>( internal::checked_pointer_cast(sparse_index), tensor.type(), data, tensor.shape(), tensor.dim_names_); diff --git a/cpp/src/arrow/util/sparse_tensor_util.h b/cpp/src/arrow/util/sparse_tensor_util.h index 4665c94a2a3e0..085f43efa46fc 100644 --- a/cpp/src/arrow/util/sparse_tensor_util.h +++ b/cpp/src/arrow/util/sparse_tensor_util.h @@ -73,6 +73,7 @@ struct IndexPointerVisitor { ". Expected integer."); } }; + } // namespace detail template