Skip to content

Commit

Permalink
Offset buffer can be pre-grown in Parquet ByteArray reader (apache#68)
Browse files Browse the repository at this point in the history
* Offset buffer can be pre-grown in Parquet ByteArray reader

* nit
  • Loading branch information
zhztheplayer authored Jun 11, 2020
1 parent 6f1d8e1 commit 4d166c4
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion cpp/src/parquet/encoding.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1885,7 +1885,12 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
int32_t indices[kBufferSize];

ArrowBinaryHelper helper(out);


RETURN_NOT_OK(helper.builder->Reserve(num_values));
// defaulting to allocating 4 bytes per element
RETURN_NOT_OK(helper.builder->ReserveData(
std::min<int64_t>(num_values - null_count, helper.chunk_space_remaining)));

arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values);

auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());
Expand Down Expand Up @@ -1948,6 +1953,12 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType>,
int values_decoded = 0;

ArrowBinaryHelper helper(out);

RETURN_NOT_OK(helper.builder->Reserve(num_values));
// defaulting to allocating 4 bytes per element
RETURN_NOT_OK(helper.builder->ReserveData(
std::min<int64_t>(num_values, helper.chunk_space_remaining)));

auto dict_values = reinterpret_cast<const ByteArray*>(dictionary_->data());

while (values_decoded < num_values) {
Expand Down

0 comments on commit 4d166c4

Please sign in to comment.