Skip to content

Commit

Permalink
trying to add unsafe append
Browse files Browse the repository at this point in the history
  • Loading branch information
mapleFU committed Apr 4, 2024
1 parent 72d20ad commit 3521e32
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 12 deletions.
7 changes: 6 additions & 1 deletion cpp/src/arrow/array/builder_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,14 @@ Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
const uint8_t* validity, int64_t offset) {
RETURN_NOT_OK(Reserve(length));
UnsafeAppendValues(values, length, validity, offset);
return Status::OK();
}

void BooleanBuilder::UnsafeAppendValues(const uint8_t* values, int64_t length,
const uint8_t* validity, int64_t offset) {
data_builder_.UnsafeAppend(values, offset, length);
ArrayBuilder::UnsafeAppendToBitmap(validity, offset, length);
return Status::OK();
}

Status BooleanBuilder::AppendValues(const uint8_t* values, int64_t length,
Expand Down
32 changes: 28 additions & 4 deletions cpp/src/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,6 +373,12 @@ class ARROW_EXPORT BooleanBuilder
return Status::OK();
}

/// Write nulls as uint8_t* (0 value indicates null) into pre-allocated memory
void UnsafeAppendNulls(int64_t length) {
data_builder_.UnsafeAppend(length, false);
UnsafeSetNotNull(length);
}

Status AppendNull() final {
ARROW_RETURN_NOT_OK(Reserve(1));
UnsafeAppendNull();
Expand Down Expand Up @@ -433,6 +439,14 @@ class ARROW_EXPORT BooleanBuilder
Status AppendValues(const uint8_t* values, int64_t length, const uint8_t* validity,
int64_t offset);

/// \brief Append a sequence of elements in one shot with pre-allocated contents.
/// \param[in] values a bitmap of values
/// \param[in] length the number of values to append
/// \param[in] validity a validity bitmap to copy (may be null)
/// \param[in] offset an offset into the values and validity bitmaps
void UnsafeAppendValues(const uint8_t* values, int64_t length, const uint8_t* validity,
int64_t offset);

/// \brief Append a sequence of elements in one shot
/// \param[in] values a contiguous C array of values
/// \param[in] length the number of values to append
Expand Down Expand Up @@ -467,19 +481,29 @@ class ARROW_EXPORT BooleanBuilder
/// \return Status
Status AppendValues(const std::vector<bool>& values);

/// \brief Append a sequence of elements in one shot
/// \brief Append a sequence of elements in one shot with pre-allocated builder.
/// \param[in] values_begin InputIterator to the beginning of the values
/// \param[in] values_end InputIterator pointing to the end of the values
/// or null(0) values
/// \return Status
template <typename ValuesIter>
Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
void UnsafeAppendValues(ValuesIter values_begin, ValuesIter values_end) {
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
ARROW_RETURN_NOT_OK(Reserve(length));
data_builder_.UnsafeAppend<false>(
length, [&values_begin]() -> bool { return *values_begin++; });
// this updates length_
UnsafeSetNotNull(length);
}

/// \brief Append a sequence of elements in one shot
/// \param[in] values_begin InputIterator to the beginning of the values
/// \param[in] values_end InputIterator pointing to the end of the values
/// or null(0) values
/// \return Status
template <typename ValuesIter>
Status AppendValues(ValuesIter values_begin, ValuesIter values_end) {
int64_t length = static_cast<int64_t>(std::distance(values_begin, values_end));
ARROW_RETURN_NOT_OK(Reserve(length));
UnsafeAppendValues(values_begin, values_end);
return Status::OK();
}

Expand Down
9 changes: 2 additions & 7 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1212,15 +1212,10 @@ int PlainBooleanDecoder::DecodeArrow(
while (value_position < num_values) {
auto block = bit_counter.NextWord();
if (block.AllSet()) {
// GH-40978: We don't have UnsafeAppendValues for booleans currently,
// so using `AppendValues` here.
PARQUET_THROW_NOT_OK(
builder->AppendValues(data_, block.length, NULLPTR, previous_value_offset));
builder->UnsafeAppendValues(data_, block.length, NULLPTR, previous_value_offset);
previous_value_offset += block.length;
} else if (block.NoneSet()) {
// GH-40978: We don't have UnsafeAppendNulls for booleans currently,
// so using `AppendNulls` here.
PARQUET_THROW_NOT_OK(builder->AppendNulls(block.length));
builder->UnsafeAppendNulls(block.length);
} else {
for (int64_t i = 0; i < block.length; ++i) {
if (bit_util::GetBit(valid_bits, valid_bits_offset_position + i)) {
Expand Down

0 comments on commit 3521e32

Please sign in to comment.