From a78760f995b11d3f14c035696fc567e019321243 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Tue, 23 Apr 2024 15:03:49 -0300 Subject: [PATCH] GH-41114: [C++] Add is_validity_defined_by_bitmap() predicate (#41115) ### Rationale for this change To make it easier to find bugs that are very likely to be silent in the codebas because users rarely use unions and REE types. ### What changes are included in this PR? Adding the type predicate and two usages in `builder_nested.h`. ### Are these changes tested? By the compilation process, since they are both `static_asserts`. * GitHub Issue: #41114 Authored-by: Felipe Oliveira Carvalho Signed-off-by: Felipe Oliveira Carvalho --- cpp/src/arrow/array/array_test.cc | 7 ++++--- cpp/src/arrow/array/builder_nested.h | 18 +++++++----------- cpp/src/arrow/array/concatenate.cc | 2 +- cpp/src/arrow/array/data.cc | 6 +++--- cpp/src/arrow/array/data.h | 1 + cpp/src/arrow/array/util.cc | 2 +- cpp/src/arrow/array/validate.cc | 2 +- cpp/src/arrow/c/bridge.cc | 2 +- cpp/src/arrow/c/bridge_test.cc | 2 +- cpp/src/arrow/compute/exec.cc | 2 +- cpp/src/arrow/integration/json_internal.cc | 2 +- cpp/src/arrow/ipc/metadata_internal.cc | 5 +++-- cpp/src/arrow/type.h | 5 ++++- 13 files changed, 29 insertions(+), 27 deletions(-) diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index b0d7fe740a0a1..af64908b59582 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -604,11 +604,11 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr& scalar) ASSERT_EQ(out->length(), 9); auto out_type_id = out->type()->id(); - const bool has_validity = internal::HasValidityBitmap(out_type_id); + const bool can_check_nulls = internal::may_have_validity_bitmap(out_type_id); // For a dictionary builder, the output dictionary won't necessarily be the same const bool can_check_values = !is_dictionary(out_type_id); - if (has_validity) { + if (can_check_nulls) { ASSERT_EQ(out->null_count(), 4); } else { ASSERT_EQ(out->null_count(), 0); @@ -891,7 +891,8 @@ TEST_F(TestArray, TestAppendArraySlice) { span.SetMembers(*nulls->data()); ASSERT_OK(builder->AppendArraySlice(span, 0, 4)); ASSERT_EQ(12, builder->length()); - const bool has_validity_bitmap = internal::HasValidityBitmap(scalar->type->id()); + const bool has_validity_bitmap = + internal::may_have_validity_bitmap(scalar->type->id()); if (has_validity_bitmap) { ASSERT_EQ(4, builder->null_count()); } diff --git a/cpp/src/arrow/array/builder_nested.h b/cpp/src/arrow/array/builder_nested.h index 2c8c41c365f6a..9f7b0fcdbce07 100644 --- a/cpp/src/arrow/array/builder_nested.h +++ b/cpp/src/arrow/array/builder_nested.h @@ -181,13 +181,11 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public ArrayBuilder { if constexpr (is_list_view(TYPE::type_id)) { sizes = array.GetValues(2); } - const bool all_valid = !array.MayHaveLogicalNulls(); - const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; + static_assert(internal::may_have_validity_bitmap(TYPE::type_id)); + const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; ARROW_RETURN_NOT_OK(Reserve(length)); for (int64_t row = offset; row < offset + length; row++) { - const bool is_valid = - all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) || - array.IsValid(row); + const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row); int64_t size = 0; if (is_valid) { if constexpr (is_list_view(TYPE::type_id)) { @@ -569,13 +567,11 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder { Status AppendArraySlice(const ArraySpan& array, int64_t offset, int64_t length) override { - const int32_t* offsets = array.GetValues(1); - const bool all_valid = !array.MayHaveLogicalNulls(); - const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR; + const auto* offsets = array.GetValues(1); + static_assert(internal::may_have_validity_bitmap(MapType::type_id)); + const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR; for (int64_t row = offset; row < offset + length; row++) { - const bool is_valid = - all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) || - array.IsValid(row); + const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row); if (is_valid) { ARROW_RETURN_NOT_OK(Append()); const int64_t slot_length = offsets[row + 1] - offsets[row]; diff --git a/cpp/src/arrow/array/concatenate.cc b/cpp/src/arrow/array/concatenate.cc index ff9ed66d1149f..44d58cc0bdebc 100644 --- a/cpp/src/arrow/array/concatenate.cc +++ b/cpp/src/arrow/array/concatenate.cc @@ -317,7 +317,7 @@ class ConcatenateImpl { } Status Concatenate(std::shared_ptr* out) && { - if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) { + if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) { RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0])); } RETURN_NOT_OK(VisitTypeInline(*out_->type, this)); diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index ff3112ec1fcc2..ac828a9c35c67 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -53,7 +53,7 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length, if (type_id == Type::NA) { *null_count = length; (*buffers)[0] = nullptr; - } else if (internal::HasValidityBitmap(type_id)) { + } else if (internal::may_have_validity_bitmap(type_id)) { if (*null_count == 0) { // In case there are no nulls, don't keep an allocated null bitmap around (*buffers)[0] = nullptr; @@ -335,7 +335,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* span) { span->buffers[i].size = 0; } - if (!HasValidityBitmap(type->id())) { + if (!may_have_validity_bitmap(type->id())) { span->buffers[0] = {}; } @@ -370,7 +370,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) { if (type_id == Type::NA) { this->null_count = 1; - } else if (!internal::HasValidityBitmap(type_id)) { + } else if (!internal::may_have_validity_bitmap(type_id)) { this->null_count = 0; } else { // Populate null count and validity bitmap diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index d8a6663cec580..beec29789ad1e 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -46,6 +46,7 @@ ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, int64_t i); ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data); ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data); ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data); + } // namespace internal // When slicing, we do not know the null count of the sliced range without diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index 86e2ffcae4de7..bdba92c9a11fb 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -95,7 +95,7 @@ class ArrayDataEndianSwapper { Status SwapType(const DataType& type) { RETURN_NOT_OK(VisitTypeInline(type, this)); RETURN_NOT_OK(SwapChildren(type.fields())); - if (internal::HasValidityBitmap(type.id())) { + if (internal::may_have_validity_bitmap(type.id())) { // Copy null bitmap out_->buffers[0] = data_->buffers[0]; } diff --git a/cpp/src/arrow/array/validate.cc b/cpp/src/arrow/array/validate.cc index 8dd3eb3f90c15..0d940d3bc869e 100644 --- a/cpp/src/arrow/array/validate.cc +++ b/cpp/src/arrow/array/validate.cc @@ -550,7 +550,7 @@ struct ValidateArrayImpl { if (full_validation) { if (data.null_count != kUnknownNullCount) { int64_t actual_null_count; - if (HasValidityBitmap(data.type->id()) && data.buffers[0]) { + if (may_have_validity_bitmap(data.type->id()) && data.buffers[0]) { // Do not call GetNullCount() as it would also set the `null_count` member actual_null_count = data.length - CountSetBits(data.buffers[0]->data(), data.offset, data.length); diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index d004de7a2ea9f..8a530b3798d41 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -576,7 +576,7 @@ struct ArrayExporter { // Store buffer pointers size_t n_buffers = data->buffers.size(); auto buffers_begin = data->buffers.begin(); - if (n_buffers > 0 && !internal::HasValidityBitmap(data->type->id())) { + if (n_buffers > 0 && !internal::may_have_validity_bitmap(data->type->id())) { --n_buffers; ++buffers_begin; } diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index dba6e4736b673..d64fe67accde0 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -565,7 +565,7 @@ struct ArrayExportChecker { auto expected_n_buffers = static_cast(expected_data.buffers.size()); auto expected_buffers = expected_data.buffers.data(); - if (!internal::HasValidityBitmap(expected_data.type->id())) { + if (!internal::may_have_validity_bitmap(expected_data.type->id())) { --expected_n_buffers; ++expected_buffers; } diff --git a/cpp/src/arrow/compute/exec.cc b/cpp/src/arrow/compute/exec.cc index 28dcf493fa294..f2e4578383122 100644 --- a/cpp/src/arrow/compute/exec.cc +++ b/cpp/src/arrow/compute/exec.cc @@ -480,7 +480,7 @@ struct NullGeneralization { if (dtype_id == Type::NA) { return ALL_NULL; } - if (!arrow::internal::HasValidityBitmap(dtype_id)) { + if (!arrow::internal::may_have_validity_bitmap(dtype_id)) { return ALL_VALID; } if (value.is_scalar()) { diff --git a/cpp/src/arrow/integration/json_internal.cc b/cpp/src/arrow/integration/json_internal.cc index 64eb342d5bd47..4b75e84bfccb6 100644 --- a/cpp/src/arrow/integration/json_internal.cc +++ b/cpp/src/arrow/integration/json_internal.cc @@ -1849,7 +1849,7 @@ class ArrayReader { Result> Parse() { ARROW_ASSIGN_OR_RAISE(length_, GetMemberInt(obj_, "count")); - if (::arrow::internal::HasValidityBitmap(type_->id())) { + if (::arrow::internal::may_have_validity_bitmap(type_->id())) { // Null and union types don't have a validity bitmap RETURN_NOT_OK(ParseValidityBitmap()); } diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index 4154b594d9507..e20b352d18d95 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -109,8 +109,9 @@ flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version) { bool HasValidityBitmap(Type::type type_id, MetadataVersion version) { // In V4, null types have no validity bitmap // In V5 and later, null and union types have no validity bitmap - return (version < MetadataVersion::V5) ? (type_id != Type::NA) - : ::arrow::internal::HasValidityBitmap(type_id); + return (version < MetadataVersion::V5) + ? (type_id != Type::NA) + : ::arrow::internal::may_have_validity_bitmap(type_id); } namespace { diff --git a/cpp/src/arrow/type.h b/cpp/src/arrow/type.h index 5629cade42335..58c9df04ec5c3 100644 --- a/cpp/src/arrow/type.h +++ b/cpp/src/arrow/type.h @@ -2488,7 +2488,7 @@ Result> UnifySchemas( namespace internal { -constexpr bool HasValidityBitmap(Type::type id) { +constexpr bool may_have_validity_bitmap(Type::type id) { switch (id) { case Type::NA: case Type::DENSE_UNION: @@ -2500,6 +2500,9 @@ constexpr bool HasValidityBitmap(Type::type id) { } } +ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() instead.") +constexpr bool HasValidityBitmap(Type::type id) { return may_have_validity_bitmap(id); } + ARROW_EXPORT std::string ToString(Type::type id);