Skip to content

Commit

Permalink
apacheGH-41114: [C++] Add is_validity_defined_by_bitmap() predicate (a…
Browse files Browse the repository at this point in the history
…pache#41115)

### Rationale for this change

To make it easier to find bugs that are very likely to be silent in the codebas because users rarely use unions and REE types.

### What changes are included in this PR?

Adding the type predicate and two usages in `builder_nested.h`.

### Are these changes tested?

By the compilation process, since they are both `static_asserts`.
* GitHub Issue: apache#41114

Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
  • Loading branch information
felipecrv authored Apr 23, 2024
1 parent f1bc82f commit a78760f
Show file tree
Hide file tree
Showing 13 changed files with 29 additions and 27 deletions.
7 changes: 4 additions & 3 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -604,11 +604,11 @@ void AssertAppendScalar(MemoryPool* pool, const std::shared_ptr<Scalar>& scalar)
ASSERT_EQ(out->length(), 9);

auto out_type_id = out->type()->id();
const bool has_validity = internal::HasValidityBitmap(out_type_id);
const bool can_check_nulls = internal::may_have_validity_bitmap(out_type_id);
// For a dictionary builder, the output dictionary won't necessarily be the same
const bool can_check_values = !is_dictionary(out_type_id);

if (has_validity) {
if (can_check_nulls) {
ASSERT_EQ(out->null_count(), 4);
} else {
ASSERT_EQ(out->null_count(), 0);
Expand Down Expand Up @@ -891,7 +891,8 @@ TEST_F(TestArray, TestAppendArraySlice) {
span.SetMembers(*nulls->data());
ASSERT_OK(builder->AppendArraySlice(span, 0, 4));
ASSERT_EQ(12, builder->length());
const bool has_validity_bitmap = internal::HasValidityBitmap(scalar->type->id());
const bool has_validity_bitmap =
internal::may_have_validity_bitmap(scalar->type->id());
if (has_validity_bitmap) {
ASSERT_EQ(4, builder->null_count());
}
Expand Down
18 changes: 7 additions & 11 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,13 +181,11 @@ class ARROW_EXPORT VarLengthListLikeBuilder : public ArrayBuilder {
if constexpr (is_list_view(TYPE::type_id)) {
sizes = array.GetValues<offset_type>(2);
}
const bool all_valid = !array.MayHaveLogicalNulls();
const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR;
static_assert(internal::may_have_validity_bitmap(TYPE::type_id));
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
ARROW_RETURN_NOT_OK(Reserve(length));
for (int64_t row = offset; row < offset + length; row++) {
const bool is_valid =
all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) ||
array.IsValid(row);
const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
int64_t size = 0;
if (is_valid) {
if constexpr (is_list_view(TYPE::type_id)) {
Expand Down Expand Up @@ -569,13 +567,11 @@ class ARROW_EXPORT MapBuilder : public ArrayBuilder {

Status AppendArraySlice(const ArraySpan& array, int64_t offset,
int64_t length) override {
const int32_t* offsets = array.GetValues<int32_t>(1);
const bool all_valid = !array.MayHaveLogicalNulls();
const uint8_t* validity = array.HasValidityBitmap() ? array.buffers[0].data : NULLPTR;
const auto* offsets = array.GetValues<int32_t>(1);
static_assert(internal::may_have_validity_bitmap(MapType::type_id));
const uint8_t* validity = array.MayHaveNulls() ? array.buffers[0].data : NULLPTR;
for (int64_t row = offset; row < offset + length; row++) {
const bool is_valid =
all_valid || (validity && bit_util::GetBit(validity, array.offset + row)) ||
array.IsValid(row);
const bool is_valid = !validity || bit_util::GetBit(validity, array.offset + row);
if (is_valid) {
ARROW_RETURN_NOT_OK(Append());
const int64_t slot_length = offsets[row + 1] - offsets[row];
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/concatenate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ class ConcatenateImpl {
}

Status Concatenate(std::shared_ptr<ArrayData>* out) && {
if (out_->null_count != 0 && internal::HasValidityBitmap(out_->type->id())) {
if (out_->null_count != 0 && internal::may_have_validity_bitmap(out_->type->id())) {
RETURN_NOT_OK(ConcatenateBitmaps(Bitmaps(0), pool_, &out_->buffers[0]));
}
RETURN_NOT_OK(VisitTypeInline(*out_->type, this));
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/arrow/array/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ static inline void AdjustNonNullable(Type::type type_id, int64_t length,
if (type_id == Type::NA) {
*null_count = length;
(*buffers)[0] = nullptr;
} else if (internal::HasValidityBitmap(type_id)) {
} else if (internal::may_have_validity_bitmap(type_id)) {
if (*null_count == 0) {
// In case there are no nulls, don't keep an allocated null bitmap around
(*buffers)[0] = nullptr;
Expand Down Expand Up @@ -335,7 +335,7 @@ void FillZeroLengthArray(const DataType* type, ArraySpan* span) {
span->buffers[i].size = 0;
}

if (!HasValidityBitmap(type->id())) {
if (!may_have_validity_bitmap(type->id())) {
span->buffers[0] = {};
}

Expand Down Expand Up @@ -370,7 +370,7 @@ void ArraySpan::FillFromScalar(const Scalar& value) {

if (type_id == Type::NA) {
this->null_count = 1;
} else if (!internal::HasValidityBitmap(type_id)) {
} else if (!internal::may_have_validity_bitmap(type_id)) {
this->null_count = 0;
} else {
// Populate null count and validity bitmap
Expand Down
1 change: 1 addition & 0 deletions cpp/src/arrow/array/data.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ ARROW_EXPORT bool IsNullRunEndEncoded(const ArrayData& data, int64_t i);
ARROW_EXPORT bool UnionMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool RunEndEncodedMayHaveLogicalNulls(const ArrayData& data);
ARROW_EXPORT bool DictionaryMayHaveLogicalNulls(const ArrayData& data);

} // namespace internal

// When slicing, we do not know the null count of the sliced range without
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ class ArrayDataEndianSwapper {
Status SwapType(const DataType& type) {
RETURN_NOT_OK(VisitTypeInline(type, this));
RETURN_NOT_OK(SwapChildren(type.fields()));
if (internal::HasValidityBitmap(type.id())) {
if (internal::may_have_validity_bitmap(type.id())) {
// Copy null bitmap
out_->buffers[0] = data_->buffers[0];
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ struct ValidateArrayImpl {
if (full_validation) {
if (data.null_count != kUnknownNullCount) {
int64_t actual_null_count;
if (HasValidityBitmap(data.type->id()) && data.buffers[0]) {
if (may_have_validity_bitmap(data.type->id()) && data.buffers[0]) {
// Do not call GetNullCount() as it would also set the `null_count` member
actual_null_count = data.length - CountSetBits(data.buffers[0]->data(),
data.offset, data.length);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge.cc
Original file line number Diff line number Diff line change
Expand Up @@ -576,7 +576,7 @@ struct ArrayExporter {
// Store buffer pointers
size_t n_buffers = data->buffers.size();
auto buffers_begin = data->buffers.begin();
if (n_buffers > 0 && !internal::HasValidityBitmap(data->type->id())) {
if (n_buffers > 0 && !internal::may_have_validity_bitmap(data->type->id())) {
--n_buffers;
++buffers_begin;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/c/bridge_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ struct ArrayExportChecker {

auto expected_n_buffers = static_cast<int64_t>(expected_data.buffers.size());
auto expected_buffers = expected_data.buffers.data();
if (!internal::HasValidityBitmap(expected_data.type->id())) {
if (!internal::may_have_validity_bitmap(expected_data.type->id())) {
--expected_n_buffers;
++expected_buffers;
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/compute/exec.cc
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ struct NullGeneralization {
if (dtype_id == Type::NA) {
return ALL_NULL;
}
if (!arrow::internal::HasValidityBitmap(dtype_id)) {
if (!arrow::internal::may_have_validity_bitmap(dtype_id)) {
return ALL_VALID;
}
if (value.is_scalar()) {
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/integration/json_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1849,7 +1849,7 @@ class ArrayReader {
Result<std::shared_ptr<ArrayData>> Parse() {
ARROW_ASSIGN_OR_RAISE(length_, GetMemberInt<int32_t>(obj_, "count"));

if (::arrow::internal::HasValidityBitmap(type_->id())) {
if (::arrow::internal::may_have_validity_bitmap(type_->id())) {
// Null and union types don't have a validity bitmap
RETURN_NOT_OK(ParseValidityBitmap());
}
Expand Down
5 changes: 3 additions & 2 deletions cpp/src/arrow/ipc/metadata_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,9 @@ flatbuf::MetadataVersion MetadataVersionToFlatbuffer(MetadataVersion version) {
bool HasValidityBitmap(Type::type type_id, MetadataVersion version) {
// In V4, null types have no validity bitmap
// In V5 and later, null and union types have no validity bitmap
return (version < MetadataVersion::V5) ? (type_id != Type::NA)
: ::arrow::internal::HasValidityBitmap(type_id);
return (version < MetadataVersion::V5)
? (type_id != Type::NA)
: ::arrow::internal::may_have_validity_bitmap(type_id);
}

namespace {
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/arrow/type.h
Original file line number Diff line number Diff line change
Expand Up @@ -2488,7 +2488,7 @@ Result<std::shared_ptr<Schema>> UnifySchemas(

namespace internal {

constexpr bool HasValidityBitmap(Type::type id) {
constexpr bool may_have_validity_bitmap(Type::type id) {
switch (id) {
case Type::NA:
case Type::DENSE_UNION:
Expand All @@ -2500,6 +2500,9 @@ constexpr bool HasValidityBitmap(Type::type id) {
}
}

ARROW_DEPRECATED("Deprecated in 17.0.0. Use may_have_validity_bitmap() instead.")
constexpr bool HasValidityBitmap(Type::type id) { return may_have_validity_bitmap(id); }

ARROW_EXPORT
std::string ToString(Type::type id);

Expand Down

0 comments on commit a78760f

Please sign in to comment.