Skip to content

Commit

Permalink
ORC-1551: Use orc-format 1.0.0-beta
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR aims to use `orc-format` `1.0.0-beta`.

### Why are the changes needed?

`1.0.0-beta` has the following changes.
- apache/orc-format#5
- apache/orc-format#4
- apache/orc-format#7
- apache/orc-format#9

### How was this patch tested?

Pass the CIs.

Closes #1688 from dongjoon-hyun/ORC-1551.

Authored-by: Dongjoon Hyun <dongjoon@apache.org>
Signed-off-by: Dongjoon Hyun <dongjoon@apache.org>
  • Loading branch information
dongjoon-hyun committed Dec 9, 2023
1 parent fede95d commit 40abb9a
Show file tree
Hide file tree
Showing 25 changed files with 455 additions and 449 deletions.
8 changes: 4 additions & 4 deletions c++/src/BloomFilter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ namespace orc {
// caller should make sure input proto::BloomFilter is valid since
// no check will be performed in the following constructor
BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) {
mNumHashFunctions = static_cast<int32_t>(bloomFilter.numhashfunctions());
mNumHashFunctions = static_cast<int32_t>(bloomFilter.num_hash_functions());

const std::string& bitsetStr = bloomFilter.utf8bitset();
mNumBits = bitsetStr.size() << SHIFT_3_BITS;
Expand Down Expand Up @@ -263,7 +263,7 @@ namespace orc {
}

void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const {
bloomFilter.set_numhashfunctions(static_cast<uint32_t>(mNumHashFunctions));
bloomFilter.set_num_hash_functions(static_cast<uint32_t>(mNumHashFunctions));

// According to ORC standard, the encoding is a sequence of bytes with
// a little endian encoding in the utf8bitset field.
Expand Down Expand Up @@ -304,12 +304,12 @@ namespace orc {
}

// make sure we don't use unknown encodings or original timestamp encodings
if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) {
if (!encoding.has_bloom_encoding() || encoding.bloom_encoding() != 1) {
return nullptr;
}

// make sure all required fields exist
if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) {
if (!bloomFilter.has_num_hash_functions() || !bloomFilter.has_utf8bitset()) {
return nullptr;
}

Expand Down
4 changes: 2 additions & 2 deletions c++/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ include_directories (

add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc
COMMAND ${PROTOBUF_EXECUTABLE}
-I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto
-I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto
--cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc_proto.proto
../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto/orc_proto.proto
)

set(SOURCE_FILES
Expand Down
2 changes: 1 addition & 1 deletion c++/src/ColumnReader.cc
Original file line number Diff line number Diff line change
Expand Up @@ -567,7 +567,7 @@ namespace orc {
StripeStreams& stripe)
: ColumnReader(type, stripe), dictionary(new StringDictionary(stripe.getMemoryPool())) {
RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId).kind());
uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize();
uint32_t dictSize = stripe.getEncoding(columnId).dictionary_size();
std::unique_ptr<SeekableInputStream> stream =
stripe.getStream(columnId, proto::Stream_Kind_DATA, true);
if (stream == nullptr) {
Expand Down
50 changes: 25 additions & 25 deletions c++/src/ColumnWriter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ namespace orc {

void ColumnWriter::addBloomFilterEntry() {
if (enableBloomFilter) {
BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloomfilter());
BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloom_filter());
bloomFilter->reset();
}
}
Expand Down Expand Up @@ -244,7 +244,7 @@ namespace orc {

if (enableBloomFilter) {
bloomFilter->reset();
bloomFilterIndex->clear_bloomfilter();
bloomFilterIndex->clear_bloom_filter();
}
}

Expand Down Expand Up @@ -353,7 +353,7 @@ namespace orc {
void StructColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
encodings.push_back(encoding);
for (uint32_t i = 0; i < children.size(); ++i) {
children[i]->getColumnEncoding(encodings);
Expand Down Expand Up @@ -513,9 +513,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -622,9 +622,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -735,9 +735,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -863,9 +863,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -1201,9 +1201,9 @@ namespace orc {
encoding.set_kind(rleVersion == RleVersion_1 ? proto::ColumnEncoding_Kind_DICTIONARY
: proto::ColumnEncoding_Kind_DICTIONARY_V2);
}
encoding.set_dictionarysize(static_cast<uint32_t>(dictionary.size()));
encoding.set_dictionary_size(static_cast<uint32_t>(dictionary.size()));
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -1765,9 +1765,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -1952,9 +1952,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -2059,9 +2059,9 @@ namespace orc {
std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(RleVersion_2));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
}
Expand Down Expand Up @@ -2299,9 +2299,9 @@ namespace orc {
void ListColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
if (child.get()) {
Expand Down Expand Up @@ -2525,9 +2525,9 @@ namespace orc {
void MapColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(RleVersionMapper(rleVersion));
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
if (keyWriter.get()) {
Expand Down Expand Up @@ -2752,9 +2752,9 @@ namespace orc {
void UnionColumnWriter::getColumnEncoding(std::vector<proto::ColumnEncoding>& encodings) const {
proto::ColumnEncoding encoding;
encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT);
encoding.set_dictionarysize(0);
encoding.set_dictionary_size(0);
if (enableBloomFilter) {
encoding.set_bloomencoding(BloomFilterVersion::UTF8);
encoding.set_bloom_encoding(BloomFilterVersion::UTF8);
}
encodings.push_back(encoding);
for (uint32_t i = 0; i < children.size(); ++i) {
Expand Down
Loading

0 comments on commit 40abb9a

Please sign in to comment.