Skip to content

Commit

Permalink
GH-38432: [C++][Parquet] Encoding: Dict Arrow Decoder tiny regression…
Browse files Browse the repository at this point in the history
… fix
  • Loading branch information
mapleFU committed Nov 19, 2023
1 parent 46c226e commit a50f3fa
Showing 1 changed file with 14 additions and 2 deletions.
16 changes: 14 additions & 2 deletions cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -904,7 +904,12 @@ class DecoderImpl : virtual public Decoder {

protected:
explicit DecoderImpl(const ColumnDescriptor* descr, Encoding::type encoding)
: descr_(descr), encoding_(encoding), num_values_(0), data_(NULLPTR), len_(0) {}
: descr_(descr),
encoding_(encoding),
num_values_(0),
data_(NULLPTR),
len_(0),
type_length_(0) {}

// For accessing type-specific metadata, like FIXED_LEN_BYTE_ARRAY
const ColumnDescriptor* descr_;
Expand Down Expand Up @@ -1196,6 +1201,10 @@ struct ArrowBinaryHelper<ByteArrayType> {
chunk_space_remaining_(::arrow::kBinaryMemoryLimit -
acc_->builder->value_data_length()) {}

// Prepare will Reserve the number of entries remaining in the current chunk.
// If estimated_data_length is provided, it will also Reserve the estimated data length,
// and the caller should remember to call `UnsafeAppend` instead of `Append` to avoid
// double counting the data length.
Status Prepare(std::optional<int64_t> estimated_data_length = {}) {
RETURN_NOT_OK(acc_->builder->Reserve(entries_remaining_));
if (estimated_data_length.has_value()) {
Expand All @@ -1205,6 +1214,9 @@ struct ArrowBinaryHelper<ByteArrayType> {
return Status::OK();
}

// If estimated_remaining_data_length is provided, it will also Reserve the estimated
// data length, and the caller should remember to call `UnsafeAppend` instead of
// `Append` to avoid double counting the data length.
Status PrepareNextInput(int64_t next_value_length,
std::optional<int64_t> estimated_remaining_data_length = {}) {
if (ARROW_PREDICT_FALSE(!CanFit(next_value_length))) {
Expand Down Expand Up @@ -1983,7 +1995,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
int values_decoded = 0;

ArrowBinaryHelper<ByteArrayType> helper(out, num_values);
RETURN_NOT_OK(helper.Prepare(len_));
RETURN_NOT_OK(helper.Prepare());

auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());

Expand Down

0 comments on commit a50f3fa

Please sign in to comment.