From f191fa8501b3ac9ebf39a42c130e70bda2ddb361 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 8 Dec 2023 16:43:15 -0800 Subject: [PATCH] ORC-1551: [C++] Use orc-format 1.0.0-beta --- c++/src/BloomFilter.cc | 8 +- c++/src/CMakeLists.txt | 4 +- c++/src/ColumnReader.cc | 2 +- c++/src/ColumnWriter.cc | 50 +++--- c++/src/Reader.cc | 168 +++++++++--------- c++/src/Statistics.cc | 142 +++++++-------- c++/src/Statistics.hh | 86 ++++----- c++/src/StripeStream.cc | 6 +- c++/src/StripeStream.hh | 4 +- c++/src/TypeImpl.cc | 8 +- c++/src/Writer.cc | 50 +++--- c++/src/sargs/PredicateLeaf.cc | 84 ++++----- c++/src/sargs/SargsApplier.cc | 4 +- c++/test/CreateTestFiles.cc | 20 +-- c++/test/TestBloomFilter.cc | 2 +- c++/test/TestBufferedOutputStream.cc | 12 +- c++/test/TestColumnReader.cc | 10 +- c++/test/TestColumnStatistics.cc | 20 +-- c++/test/TestCompression.cc | 12 +- c++/test/TestPredicateLeaf.cc | 62 +++---- c++/test/TestSargsApplier.cc | 26 +-- c++/test/TestType.cc | 28 +-- cmake_modules/ThirdpartyToolchain.cmake | 4 +- .../org/apache/orc/TestVectorOrcFile.java | 6 +- tools/test/TestFileMetadata.cc | 86 ++++----- 25 files changed, 455 insertions(+), 449 deletions(-) diff --git a/c++/src/BloomFilter.cc b/c++/src/BloomFilter.cc index e7ef6575ea..882c6f4252 100644 --- a/c++/src/BloomFilter.cc +++ b/c++/src/BloomFilter.cc @@ -175,7 +175,7 @@ namespace orc { // caller should make sure input proto::BloomFilter is valid since // no check will be performed in the following constructor BloomFilterImpl::BloomFilterImpl(const proto::BloomFilter& bloomFilter) { - mNumHashFunctions = static_cast(bloomFilter.numhashfunctions()); + mNumHashFunctions = static_cast(bloomFilter.num_hash_functions()); const std::string& bitsetStr = bloomFilter.utf8bitset(); mNumBits = bitsetStr.size() << SHIFT_3_BITS; @@ -263,7 +263,7 @@ namespace orc { } void BloomFilterImpl::serialize(proto::BloomFilter& bloomFilter) const { - bloomFilter.set_numhashfunctions(static_cast(mNumHashFunctions)); + bloomFilter.set_num_hash_functions(static_cast(mNumHashFunctions)); // According to ORC standard, the encoding is a sequence of bytes with // a little endian encoding in the utf8bitset field. @@ -304,12 +304,12 @@ namespace orc { } // make sure we don't use unknown encodings or original timestamp encodings - if (!encoding.has_bloomencoding() || encoding.bloomencoding() != 1) { + if (!encoding.has_bloom_encoding() || encoding.bloom_encoding() != 1) { return nullptr; } // make sure all required fields exist - if (!bloomFilter.has_numhashfunctions() || !bloomFilter.has_utf8bitset()) { + if (!bloomFilter.has_num_hash_functions() || !bloomFilter.has_utf8bitset()) { return nullptr; } diff --git a/c++/src/CMakeLists.txt b/c++/src/CMakeLists.txt index 63c2043af2..90f3ed87a4 100644 --- a/c++/src/CMakeLists.txt +++ b/c++/src/CMakeLists.txt @@ -146,9 +146,9 @@ include_directories ( add_custom_command(OUTPUT orc_proto.pb.h orc_proto.pb.cc COMMAND ${PROTOBUF_EXECUTABLE} - -I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto + -I ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto --cpp_out="${CMAKE_CURRENT_BINARY_DIR}" - ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc_proto.proto + ../../orc-format_ep-prefix/src/orc-format_ep/src/main/proto/orc/proto/orc_proto.proto ) set(SOURCE_FILES diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index 7fdcd530f6..dc0ecb147a 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc @@ -567,7 +567,7 @@ namespace orc { StripeStreams& stripe) : ColumnReader(type, stripe), dictionary(new StringDictionary(stripe.getMemoryPool())) { RleVersion rleVersion = convertRleVersion(stripe.getEncoding(columnId).kind()); - uint32_t dictSize = stripe.getEncoding(columnId).dictionarysize(); + uint32_t dictSize = stripe.getEncoding(columnId).dictionary_size(); std::unique_ptr stream = stripe.getStream(columnId, proto::Stream_Kind_DATA, true); if (stream == nullptr) { diff --git a/c++/src/ColumnWriter.cc b/c++/src/ColumnWriter.cc index a7412c0e46..5c7ad21799 100644 --- a/c++/src/ColumnWriter.cc +++ b/c++/src/ColumnWriter.cc @@ -184,7 +184,7 @@ namespace orc { void ColumnWriter::addBloomFilterEntry() { if (enableBloomFilter) { - BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloomfilter()); + BloomFilterUTF8Utils::serialize(*bloomFilter, *bloomFilterIndex->add_bloom_filter()); bloomFilter->reset(); } } @@ -244,7 +244,7 @@ namespace orc { if (enableBloomFilter) { bloomFilter->reset(); - bloomFilterIndex->clear_bloomfilter(); + bloomFilterIndex->clear_bloom_filter(); } } @@ -353,7 +353,7 @@ namespace orc { void StructColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); encodings.push_back(encoding); for (uint32_t i = 0; i < children.size(); ++i) { children[i]->getColumnEncoding(encodings); @@ -513,9 +513,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(rleVersion)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -622,9 +622,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -735,9 +735,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -863,9 +863,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -1201,9 +1201,9 @@ namespace orc { encoding.set_kind(rleVersion == RleVersion_1 ? proto::ColumnEncoding_Kind_DICTIONARY : proto::ColumnEncoding_Kind_DICTIONARY_V2); } - encoding.set_dictionarysize(static_cast(dictionary.size())); + encoding.set_dictionary_size(static_cast(dictionary.size())); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -1765,9 +1765,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(rleVersion)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -1952,9 +1952,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(rleVersion)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -2059,9 +2059,9 @@ namespace orc { std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(RleVersion_2)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); } @@ -2299,9 +2299,9 @@ namespace orc { void ListColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(rleVersion)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); if (child.get()) { @@ -2525,9 +2525,9 @@ namespace orc { void MapColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(RleVersionMapper(rleVersion)); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); if (keyWriter.get()) { @@ -2752,9 +2752,9 @@ namespace orc { void UnionColumnWriter::getColumnEncoding(std::vector& encodings) const { proto::ColumnEncoding encoding; encoding.set_kind(proto::ColumnEncoding_Kind_DIRECT); - encoding.set_dictionarysize(0); + encoding.set_dictionary_size(0); if (enableBloomFilter) { - encoding.set_bloomencoding(BloomFilterVersion::UTF8); + encoding.set_bloom_encoding(BloomFilterVersion::UTF8); } encodings.push_back(encoding); for (uint32_t i = 0; i < children.size(); ++i) { diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc index 386793f0b2..162f69e107 100644 --- a/c++/src/Reader.cc +++ b/c++/src/Reader.cc @@ -57,8 +57,8 @@ namespace orc { } uint64_t getCompressionBlockSize(const proto::PostScript& ps) { - if (ps.has_compressionblocksize()) { - return ps.compressionblocksize(); + if (ps.has_compression_block_size()) { + return ps.compression_block_size(); } else { return 256 * 1024; } @@ -84,10 +84,10 @@ namespace orc { } WriterVersion getWriterVersionImpl(const FileContents* contents) { - if (!contents->postscript->has_writerversion()) { + if (!contents->postscript->has_writer_version()) { return WriterVersion_ORIGINAL; } - return static_cast(contents->postscript->writerversion()); + return static_cast(contents->postscript->writer_version()); } void ColumnSelector::selectChildren(std::vector& selectedColumns, const Type& type) { @@ -272,7 +272,7 @@ namespace orc { for (size_t i = 0; i < numberOfStripes; ++i) { firstRowOfStripe[i] = rowTotal; proto::StripeInformation stripeInfo = footer->stripes(static_cast(i)); - rowTotal += stripeInfo.numberofrows(); + rowTotal += stripeInfo.number_of_rows(); bool isStripeInRange = stripeInfo.offset() >= opts.getOffset() && stripeInfo.offset() < opts.getOffset() + opts.getLength(); if (isStripeInRange) { @@ -282,9 +282,10 @@ namespace orc { if (i >= lastStripe) { lastStripe = i + 1; } - if (footer->rowindexstride() > 0) { + if (footer->row_index_stride() > 0) { numRowGroupsInStripeRange += - (stripeInfo.numberofrows() + footer->rowindexstride() - 1) / footer->rowindexstride(); + (stripeInfo.number_of_rows() + footer->row_index_stride() - 1) / + footer->row_index_stride(); } } } @@ -294,7 +295,7 @@ namespace orc { if (currentStripe == 0) { previousRow = (std::numeric_limits::max)(); } else if (currentStripe == numberOfStripes) { - previousRow = footer->numberofrows(); + previousRow = footer->number_of_rows(); } else { previousRow = firstRowOfStripe[firstStripe] - 1; } @@ -303,11 +304,11 @@ namespace orc { column_selector.updateSelected(selectedColumns, opts); // prepare SargsApplier if SearchArgument is available - if (opts.getSearchArgument() && footer->rowindexstride() > 0) { + if (opts.getSearchArgument() && footer->row_index_stride() > 0) { sargs = opts.getSearchArgument(); - sargsApplier.reset(new SargsApplier(*contents->schema, sargs.get(), footer->rowindexstride(), - getWriterVersionImpl(_contents.get()), - contents->readerMetrics)); + sargsApplier.reset( + new SargsApplier(*contents->schema, sargs.get(), footer->row_index_stride(), + getWriterVersionImpl(_contents.get()), contents->readerMetrics)); } skipBloomFilters = hasBadBloomFilters(); @@ -321,9 +322,9 @@ namespace orc { // 1.6.x releases before 1.6.11 won't have it. On the other side, the C++ writer // supports writing bloom filters since 1.6.0. So files written by the C++ writer // and with 'softwareVersion' unset would have bad bloom filters. - if (!footer->has_softwareversion()) return true; + if (!footer->has_software_version()) return true; - const std::string& fullVersion = footer->softwareversion(); + const std::string& fullVersion = footer->software_version(); std::string version; // Deal with snapshot versions, e.g. 1.6.12-SNAPSHOT. if (fullVersion.find('-') != std::string::npos) { @@ -375,10 +376,10 @@ namespace orc { // seeking past lastStripe uint64_t num_stripes = static_cast(footer->stripes_size()); - if ((lastStripe == num_stripes && rowNumber >= footer->numberofrows()) || + if ((lastStripe == num_stripes && rowNumber >= footer->number_of_rows()) || (lastStripe < num_stripes && rowNumber >= firstRowOfStripe[lastStripe])) { currentStripe = num_stripes; - previousRow = footer->numberofrows(); + previousRow = footer->number_of_rows(); return; } @@ -390,14 +391,14 @@ namespace orc { // seeking before the first stripe if (seekToStripe < firstStripe) { currentStripe = num_stripes; - previousRow = footer->numberofrows(); + previousRow = footer->number_of_rows(); return; } previousRow = rowNumber; - auto rowIndexStride = footer->rowindexstride(); + auto rowIndexStride = footer->row_index_stride(); if (!isCurrentStripeInited() || currentStripe != seekToStripe || rowIndexStride == 0 || - currentStripeInfo.indexlength() == 0) { + currentStripeInfo.index_length() == 0) { // current stripe is not initialized or // target stripe is not current stripe or // current stripe doesn't have row indexes @@ -412,14 +413,14 @@ namespace orc { if (sargsApplier) { // advance to selected row group if predicate pushdown is enabled currentRowInStripe = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->rowindexstride(), - sargsApplier->getNextSkippedRows()); + advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, + footer->row_index_stride(), sargsApplier->getNextSkippedRows()); } } uint64_t rowsToSkip = currentRowInStripe; // seek to the target row group if row indexes exists - if (rowIndexStride > 0 && currentStripeInfo.indexlength() > 0) { + if (rowIndexStride > 0 && currentStripeInfo.index_length() > 0) { if (rowIndexes.empty()) { loadStripeIndex(); } @@ -466,10 +467,10 @@ namespace orc { throw ParseError("Failed to parse bloom filter index"); } BloomFilterIndex bfIndex; - for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) { + for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) { bfIndex.entries.push_back(BloomFilterUTF8Utils::deserialize( pbStream.kind(), currentStripeFooter.columns(static_cast(pbStream.column())), - pbBFIndex.bloomfilter(j))); + pbBFIndex.bloom_filter(j))); } // add bloom filters to result for one column bloomFilterIndex[pbStream.column()] = bfIndex; @@ -520,8 +521,8 @@ namespace orc { proto::StripeFooter getStripeFooter(const proto::StripeInformation& info, const FileContents& contents) { - uint64_t stripeFooterStart = info.offset() + info.indexlength() + info.datalength(); - uint64_t stripeFooterLength = info.footerlength(); + uint64_t stripeFooterStart = info.offset() + info.index_length() + info.data_length(); + uint64_t stripeFooterLength = info.footer_length(); std::unique_ptr pbStream = createDecompressor( contents.compression, std::make_unique(contents.stream.get(), stripeFooterStart, @@ -562,8 +563,8 @@ namespace orc { mutable_ps->CopyFrom(*contents->postscript); proto::Footer* mutableFooter = tail.mutable_footer(); mutableFooter->CopyFrom(*footer); - tail.set_filelength(fileLength); - tail.set_postscriptlength(postscriptLength); + tail.set_file_length(fileLength); + tail.set_postscript_length(postscriptLength); std::string result; if (!tail.SerializeToString(&result)) { throw ParseError("Failed to serialize file tail"); @@ -593,7 +594,7 @@ namespace orc { } return contents->metadata == nullptr ? 0 - : static_cast(contents->metadata->stripestats_size()); + : static_cast(contents->metadata->stripe_stats_size()); } std::unique_ptr ReaderImpl::getStripe(uint64_t stripeIndex) const { @@ -603,8 +604,8 @@ namespace orc { proto::StripeInformation stripeInfo = footer->stripes(static_cast(stripeIndex)); return std::unique_ptr(new StripeInformationImpl( - stripeInfo.offset(), stripeInfo.indexlength(), stripeInfo.datalength(), - stripeInfo.footerlength(), stripeInfo.numberofrows(), contents->stream.get(), + stripeInfo.offset(), stripeInfo.index_length(), stripeInfo.data_length(), + stripeInfo.footer_length(), stripeInfo.number_of_rows(), contents->stream.get(), *contents->pool, contents->compression, contents->blockSize, contents->readerMetrics)); } @@ -616,7 +617,7 @@ namespace orc { } uint64_t ReaderImpl::getNumberOfRows() const { - return footer->numberofrows(); + return footer->number_of_rows(); } WriterId ReaderImpl::getWriterId() const { @@ -642,8 +643,8 @@ namespace orc { std::string ReaderImpl::getSoftwareVersion() const { std::ostringstream buffer; buffer << writerIdToString(getWriterIdValue()); - if (footer->has_softwareversion()) { - buffer << " " << footer->softwareversion(); + if (footer->has_software_version()) { + buffer << " " << footer->software_version(); } return buffer.str(); } @@ -653,15 +654,15 @@ namespace orc { } uint64_t ReaderImpl::getContentLength() const { - return footer->contentlength(); + return footer->content_length(); } uint64_t ReaderImpl::getStripeStatisticsLength() const { - return contents->postscript->metadatalength(); + return contents->postscript->metadata_length(); } uint64_t ReaderImpl::getFileFooterLength() const { - return contents->postscript->footerlength(); + return contents->postscript->footer_length(); } uint64_t ReaderImpl::getFilePostscriptLength() const { @@ -673,7 +674,7 @@ namespace orc { } uint64_t ReaderImpl::getRowIndexStride() const { - return footer->rowindexstride(); + return footer->row_index_stride(); } const std::string& ReaderImpl::getStreamName() const { @@ -703,7 +704,7 @@ namespace orc { std::vector>* indexStats) const { int num_streams = currentStripeFooter.streams_size(); uint64_t offset = stripeInfo.offset(); - uint64_t indexEnd = stripeInfo.offset() + stripeInfo.indexlength(); + uint64_t indexEnd = stripeInfo.offset() + stripeInfo.index_length(); for (int i = 0; i < num_streams; i++) { const proto::Stream& stream = currentStripeFooter.streams(i); StreamKind streamKind = static_cast(stream.kind()); @@ -714,7 +715,7 @@ namespace orc { msg << "Malformed RowIndex stream meta in stripe " << stripeIndex << ": streamOffset=" << offset << ", streamLength=" << length << ", stripeOffset=" << stripeInfo.offset() - << ", stripeIndexLength=" << stripeInfo.indexlength(); + << ", stripeIndexLength=" << stripeInfo.index_length(); throw ParseError(msg.str()); } std::unique_ptr pbStream = @@ -759,7 +760,7 @@ namespace orc { throw std::logic_error("No stripe statistics in file"); } size_t num_cols = static_cast( - contents->metadata->stripestats(static_cast(stripeIndex)).colstats_size()); + contents->metadata->stripe_stats(static_cast(stripeIndex)).col_stats_size()); std::vector> indexStats(num_cols); proto::StripeInformation currentStripeInfo = footer->stripes(static_cast(stripeIndex)); @@ -767,12 +768,12 @@ namespace orc { getRowIndexStatistics(currentStripeInfo, stripeIndex, currentStripeFooter, &indexStats); - const Timezone& writerTZ = currentStripeFooter.has_writertimezone() - ? getTimezoneByName(currentStripeFooter.writertimezone()) + const Timezone& writerTZ = currentStripeFooter.has_writer_timezone() + ? getTimezoneByName(currentStripeFooter.writer_timezone()) : getLocalTimezone(); StatContext statContext(hasCorrectStatistics(), &writerTZ); return std::make_unique( - contents->metadata->stripestats(static_cast(stripeIndex)), indexStats, statContext); + contents->metadata->stripe_stats(static_cast(stripeIndex)), indexStats, statContext); } std::unique_ptr ReaderImpl::getStatistics() const { @@ -791,8 +792,8 @@ namespace orc { } void ReaderImpl::readMetadata() const { - uint64_t metadataSize = contents->postscript->metadatalength(); - uint64_t footerLength = contents->postscript->footerlength(); + uint64_t metadataSize = contents->postscript->metadata_length(); + uint64_t footerLength = contents->postscript->footer_length(); if (fileLength < metadataSize + footerLength + postscriptLength + 1) { std::stringstream msg; msg << "Invalid Metadata length: fileLength=" << fileLength @@ -935,13 +936,13 @@ namespace orc { uint64_t maxDataLength = 0; if (stripeIx >= 0 && stripeIx < footer->stripes_size()) { - uint64_t stripe = footer->stripes(stripeIx).datalength(); + uint64_t stripe = footer->stripes(stripeIx).data_length(); if (maxDataLength < stripe) { maxDataLength = stripe; } } else { for (int i = 0; i < footer->stripes_size(); i++) { - uint64_t stripe = footer->stripes(i).datalength(); + uint64_t stripe = footer->stripes(i).data_length(); if (maxDataLength < stripe) { maxDataLength = stripe; } @@ -969,7 +970,7 @@ namespace orc { } } - /* If a string column is read, use stripe datalength as a memory estimate + /* If a string column is read, use stripe data_length as a memory estimate * because we don't know the dictionary size. Multiply by 2 because * a string column requires two buffers: * in the input stream and in the seekable input stream. @@ -981,11 +982,11 @@ namespace orc { nSelectedStreams * contents->stream->getNaturalReadSize()); // Do we need even more memory to read the footer or the metadata? - if (memory < contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS) { - memory = contents->postscript->footerlength() + DIRECTORY_SIZE_GUESS; + if (memory < contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS) { + memory = contents->postscript->footer_length() + DIRECTORY_SIZE_GUESS; } - if (memory < contents->postscript->metadatalength()) { - memory = contents->postscript->metadatalength(); + if (memory < contents->postscript->metadata_length()) { + memory = contents->postscript->metadata_length(); } // Account for firstRowOfStripe. @@ -1018,7 +1019,7 @@ namespace orc { previousRow = 0; } else { previousRow = firstRowOfStripe[lastStripe - 1] + - footer->stripes(static_cast(lastStripe - 1)).numberofrows(); + footer->stripes(static_cast(lastStripe - 1)).number_of_rows(); } } @@ -1037,30 +1038,30 @@ namespace orc { do { currentStripeInfo = footer->stripes(static_cast(currentStripe)); uint64_t fileLength = contents->stream->getLength(); - if (currentStripeInfo.offset() + currentStripeInfo.indexlength() + - currentStripeInfo.datalength() + currentStripeInfo.footerlength() >= + if (currentStripeInfo.offset() + currentStripeInfo.index_length() + + currentStripeInfo.data_length() + currentStripeInfo.footer_length() >= fileLength) { std::stringstream msg; msg << "Malformed StripeInformation at stripe index " << currentStripe << ": fileLength=" << fileLength << ", StripeInfo=(offset=" << currentStripeInfo.offset() - << ", indexLength=" << currentStripeInfo.indexlength() - << ", dataLength=" << currentStripeInfo.datalength() - << ", footerLength=" << currentStripeInfo.footerlength() << ")"; + << ", indexLength=" << currentStripeInfo.index_length() + << ", dataLength=" << currentStripeInfo.data_length() + << ", footerLength=" << currentStripeInfo.footer_length() << ")"; throw ParseError(msg.str()); } currentStripeFooter = getStripeFooter(currentStripeInfo, *contents.get()); - rowsInCurrentStripe = currentStripeInfo.numberofrows(); + rowsInCurrentStripe = currentStripeInfo.number_of_rows(); processingStripe = currentStripe; if (sargsApplier) { bool isStripeNeeded = true; if (contents->metadata) { const auto& currentStripeStats = - contents->metadata->stripestats(static_cast(currentStripe)); + contents->metadata->stripe_stats(static_cast(currentStripe)); // skip this stripe after stats fail to satisfy sargs uint64_t stripeRowGroupCount = - (rowsInCurrentStripe + footer->rowindexstride() - 1) / footer->rowindexstride(); + (rowsInCurrentStripe + footer->row_index_stride() - 1) / footer->row_index_stride(); isStripeNeeded = sargsApplier->evaluateStripeStatistics(currentStripeStats, stripeRowGroupCount); } @@ -1087,9 +1088,10 @@ namespace orc { if (currentStripe < lastStripe) { // get writer timezone info from stripe footer to help understand timestamp values. - const Timezone& writerTimezone = currentStripeFooter.has_writertimezone() - ? getTimezoneByName(currentStripeFooter.writertimezone()) - : localTimezone; + const Timezone& writerTimezone = + currentStripeFooter.has_writer_timezone() + ? getTimezoneByName(currentStripeFooter.writer_timezone()) + : localTimezone; StripeStreamsImpl stripeStreams(*this, currentStripe, currentStripeInfo, currentStripeFooter, currentStripeInfo.offset(), *contents->stream, writerTimezone, readerTimezone); @@ -1099,11 +1101,11 @@ namespace orc { if (sargsApplier) { // move to the 1st selected row group when PPD is enabled. currentRowInStripe = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->rowindexstride(), - sargsApplier->getNextSkippedRows()); + advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, + footer->row_index_stride(), sargsApplier->getNextSkippedRows()); previousRow = firstRowOfStripe[currentStripe] + currentRowInStripe - 1; if (currentRowInStripe > 0) { - seekToRowGroup(static_cast(currentRowInStripe / footer->rowindexstride())); + seekToRowGroup(static_cast(currentRowInStripe / footer->row_index_stride())); } } } else { @@ -1126,7 +1128,7 @@ namespace orc { std::min(static_cast(data.capacity), rowsInCurrentStripe - currentRowInStripe); if (sargsApplier && rowsToRead > 0) { rowsToRead = computeBatchSize(rowsToRead, currentRowInStripe, rowsInCurrentStripe, - footer->rowindexstride(), sargsApplier->getNextSkippedRows()); + footer->row_index_stride(), sargsApplier->getNextSkippedRows()); } data.numElements = rowsToRead; if (rowsToRead == 0) { @@ -1145,13 +1147,13 @@ namespace orc { // check if we need to advance to next selected row group if (sargsApplier) { uint64_t nextRowToRead = - advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->rowindexstride(), + advanceToNextRowGroup(currentRowInStripe, rowsInCurrentStripe, footer->row_index_stride(), sargsApplier->getNextSkippedRows()); if (currentRowInStripe != nextRowToRead) { // it is guaranteed to be at start of a row group currentRowInStripe = nextRowToRead; if (currentRowInStripe < rowsInCurrentStripe) { - seekToRowGroup(static_cast(currentRowInStripe / footer->rowindexstride())); + seekToRowGroup(static_cast(currentRowInStripe / footer->row_index_stride())); } } } @@ -1302,9 +1304,9 @@ namespace orc { for (int i = 0; i < maxId; ++i) { const proto::Type& type = footer.types(i); if (type.kind() == proto::Type_Kind_STRUCT && - type.subtypes_size() != type.fieldnames_size()) { + type.subtypes_size() != type.field_names_size()) { msg << "Footer is corrupt: STRUCT type " << i << " has " << type.subtypes_size() - << " subTypes, but has " << type.fieldnames_size() << " fieldNames"; + << " subTypes, but has " << type.field_names_size() << " fieldNames"; throw ParseError(msg.str()); } for (int j = 0; j < type.subtypes_size(); ++j) { @@ -1339,10 +1341,10 @@ namespace orc { MemoryPool& memoryPool, ReaderMetrics* readerMetrics) { const char* footerPtr = buffer->data() + footerOffset; - std::unique_ptr pbStream = - createDecompressor(convertCompressionKind(ps), - std::make_unique(footerPtr, ps.footerlength()), - getCompressionBlockSize(ps), memoryPool, readerMetrics); + std::unique_ptr pbStream = createDecompressor( + convertCompressionKind(ps), + std::make_unique(footerPtr, ps.footer_length()), + getCompressionBlockSize(ps), memoryPool, readerMetrics); auto footer = std::make_unique(); if (!footer->ParseFromZeroCopyStream(pbStream.get())) { @@ -1370,8 +1372,8 @@ namespace orc { } contents->postscript = std::make_unique(tail.postscript()); contents->footer = std::make_unique(tail.footer()); - fileLength = tail.filelength(); - postscriptLength = tail.postscriptlength(); + fileLength = tail.file_length(); + postscriptLength = tail.postscript_length(); } else { // figure out the size of the file using the option or filesystem fileLength = std::min(options.getTailLocation(), static_cast(stream->getLength())); @@ -1386,7 +1388,7 @@ namespace orc { postscriptLength = buffer->data()[readSize - 1] & 0xff; contents->postscript = readPostscript(stream.get(), buffer.get(), postscriptLength); - uint64_t footerSize = contents->postscript->footerlength(); + uint64_t footerSize = contents->postscript->footer_length(); uint64_t tailSize = 1 + postscriptLength + footerSize; if (tailSize >= fileLength) { std::stringstream msg; @@ -1430,7 +1432,7 @@ namespace orc { footer->stripes(static_cast(stripeIndex)); const proto::StripeFooter currentStripeFooter = getStripeFooter(currentStripeInfo, *contents); - // iterate stripe footer to get stream of bloomfilter + // iterate stripe footer to get stream of bloom_filter uint64_t offset = static_cast(currentStripeInfo.offset()); for (int i = 0; i < currentStripeFooter.streams_size(); i++) { const proto::Stream& stream = currentStripeFooter.streams(i); @@ -1452,10 +1454,10 @@ namespace orc { } BloomFilterIndex bfIndex; - for (int j = 0; j < pbBFIndex.bloomfilter_size(); j++) { + for (int j = 0; j < pbBFIndex.bloom_filter_size(); j++) { std::unique_ptr entry = BloomFilterUTF8Utils::deserialize( stream.kind(), currentStripeFooter.columns(static_cast(stream.column())), - pbBFIndex.bloomfilter(j)); + pbBFIndex.bloom_filter(j)); bfIndex.entries.push_back(std::shared_ptr(std::move(entry))); } diff --git a/c++/src/Statistics.cc b/c++/src/Statistics.cc index 7b64873961..8ed29d0e7c 100644 --- a/c++/src/Statistics.cc +++ b/c++/src/Statistics.cc @@ -26,23 +26,23 @@ namespace orc { ColumnStatistics* convertColumnStatistics(const proto::ColumnStatistics& s, const StatContext& statContext) { - if (s.has_intstatistics()) { + if (s.has_int_statistics()) { return new IntegerColumnStatisticsImpl(s); - } else if (s.has_doublestatistics()) { + } else if (s.has_double_statistics()) { return new DoubleColumnStatisticsImpl(s); - } else if (s.has_collectionstatistics()) { + } else if (s.has_collection_statistics()) { return new CollectionColumnStatisticsImpl(s); - } else if (s.has_stringstatistics()) { + } else if (s.has_string_statistics()) { return new StringColumnStatisticsImpl(s, statContext); - } else if (s.has_bucketstatistics()) { + } else if (s.has_bucket_statistics()) { return new BooleanColumnStatisticsImpl(s, statContext); - } else if (s.has_decimalstatistics()) { + } else if (s.has_decimal_statistics()) { return new DecimalColumnStatisticsImpl(s, statContext); - } else if (s.has_timestampstatistics()) { + } else if (s.has_timestamp_statistics()) { return new TimestampColumnStatisticsImpl(s, statContext); - } else if (s.has_datestatistics()) { + } else if (s.has_date_statistics()) { return new DateColumnStatisticsImpl(s, statContext); - } else if (s.has_binarystatistics()) { + } else if (s.has_binary_statistics()) { return new BinaryColumnStatisticsImpl(s, statContext); } else { return new ColumnStatisticsImpl(s); @@ -51,8 +51,8 @@ namespace orc { StatisticsImpl::StatisticsImpl(const proto::StripeStatistics& stripeStats, const StatContext& statContext) { - for (int i = 0; i < stripeStats.colstats_size(); i++) { - colStats.push_back(convertColumnStatistics(stripeStats.colstats(i), statContext)); + for (int i = 0; i < stripeStats.col_stats_size(); i++) { + colStats.push_back(convertColumnStatistics(stripeStats.col_stats(i), statContext)); } } @@ -180,27 +180,27 @@ namespace orc { } ColumnStatisticsImpl::ColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); } BinaryColumnStatisticsImpl::BinaryColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (pb.has_binarystatistics() && statContext.correctStats) { - _stats.setHasTotalLength(pb.binarystatistics().has_sum()); - _stats.setTotalLength(static_cast(pb.binarystatistics().sum())); + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (pb.has_binary_statistics() && statContext.correctStats) { + _stats.setHasTotalLength(pb.binary_statistics().has_sum()); + _stats.setTotalLength(static_cast(pb.binary_statistics().sum())); } } BooleanColumnStatisticsImpl::BooleanColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (pb.has_bucketstatistics() && statContext.correctStats) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (pb.has_bucket_statistics() && statContext.correctStats) { _hasCount = true; - _trueCount = pb.bucketstatistics().count(0); + _trueCount = pb.bucket_statistics().count(0); } else { _hasCount = false; _trueCount = 0; @@ -209,27 +209,27 @@ namespace orc { DateColumnStatisticsImpl::DateColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_datestatistics() || !statContext.correctStats) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_date_statistics() || !statContext.correctStats) { // hasMinimum_ is false by default; // hasMaximum_ is false by default; _stats.setMinimum(0); _stats.setMaximum(0); } else { - _stats.setHasMinimum(pb.datestatistics().has_minimum()); - _stats.setHasMaximum(pb.datestatistics().has_maximum()); - _stats.setMinimum(pb.datestatistics().minimum()); - _stats.setMaximum(pb.datestatistics().maximum()); + _stats.setHasMinimum(pb.date_statistics().has_minimum()); + _stats.setHasMaximum(pb.date_statistics().has_maximum()); + _stats.setMinimum(pb.date_statistics().minimum()); + _stats.setMaximum(pb.date_statistics().maximum()); } } DecimalColumnStatisticsImpl::DecimalColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (pb.has_decimalstatistics() && statContext.correctStats) { - const proto::DecimalStatistics& stats = pb.decimalstatistics(); + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (pb.has_decimal_statistics() && statContext.correctStats) { + const proto::DecimalStatistics& stats = pb.decimal_statistics(); _stats.setHasMinimum(stats.has_minimum()); _stats.setHasMaximum(stats.has_maximum()); _stats.setHasSum(stats.has_sum()); @@ -241,14 +241,14 @@ namespace orc { } DoubleColumnStatisticsImpl::DoubleColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_doublestatistics()) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_double_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); _stats.setSum(0); } else { - const proto::DoubleStatistics& stats = pb.doublestatistics(); + const proto::DoubleStatistics& stats = pb.double_statistics(); _stats.setHasMinimum(stats.has_minimum()); _stats.setHasMaximum(stats.has_maximum()); _stats.setHasSum(stats.has_sum()); @@ -260,14 +260,14 @@ namespace orc { } IntegerColumnStatisticsImpl::IntegerColumnStatisticsImpl(const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_intstatistics()) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_int_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); _stats.setSum(0); } else { - const proto::IntegerStatistics& stats = pb.intstatistics(); + const proto::IntegerStatistics& stats = pb.int_statistics(); _stats.setHasMinimum(stats.has_minimum()); _stats.setHasMaximum(stats.has_maximum()); _stats.setHasSum(stats.has_sum()); @@ -280,12 +280,12 @@ namespace orc { StringColumnStatisticsImpl::StringColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_stringstatistics() || !statContext.correctStats) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_string_statistics() || !statContext.correctStats) { _stats.setTotalLength(0); } else { - const proto::StringStatistics& stats = pb.stringstatistics(); + const proto::StringStatistics& stats = pb.string_statistics(); _stats.setHasMinimum(stats.has_minimum()); _stats.setHasMaximum(stats.has_maximum()); _stats.setHasTotalLength(stats.has_sum()); @@ -298,9 +298,9 @@ namespace orc { TimestampColumnStatisticsImpl::TimestampColumnStatisticsImpl(const proto::ColumnStatistics& pb, const StatContext& statContext) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_timestampstatistics() || !statContext.correctStats) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_timestamp_statistics() || !statContext.correctStats) { _stats.setMinimum(0); _stats.setMaximum(0); _lowerBound = 0; @@ -308,21 +308,21 @@ namespace orc { _minimumNanos = DEFAULT_MIN_NANOS; _maximumNanos = DEFAULT_MAX_NANOS; } else { - const proto::TimestampStatistics& stats = pb.timestampstatistics(); - _stats.setHasMinimum(stats.has_minimumutc() || + const proto::TimestampStatistics& stats = pb.timestamp_statistics(); + _stats.setHasMinimum(stats.has_minimum_utc() || (stats.has_minimum() && (statContext.writerTimezone != nullptr))); - _stats.setHasMaximum(stats.has_maximumutc() || + _stats.setHasMaximum(stats.has_maximum_utc() || (stats.has_maximum() && (statContext.writerTimezone != nullptr))); - _hasLowerBound = stats.has_minimumutc() || stats.has_minimum(); - _hasUpperBound = stats.has_maximumutc() || stats.has_maximum(); - // to be consistent with java side, non-default minimumnanos and maximumnanos + _hasLowerBound = stats.has_minimum_utc() || stats.has_minimum(); + _hasUpperBound = stats.has_maximum_utc() || stats.has_maximum(); + // to be consistent with java side, non-default minimum_nanos and maximum_nanos // are added by one in their serialized form. - _minimumNanos = stats.has_minimumnanos() ? stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS; - _maximumNanos = stats.has_maximumnanos() ? stats.maximumnanos() - 1 : DEFAULT_MAX_NANOS; + _minimumNanos = stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS; + _maximumNanos = stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS; // Timestamp stats are stored in milliseconds - if (stats.has_minimumutc()) { - int64_t minimum = stats.minimumutc(); + if (stats.has_minimum_utc()) { + int64_t minimum = stats.minimum_utc(); _stats.setMinimum(minimum); _lowerBound = minimum; } else if (statContext.writerTimezone) { @@ -340,8 +340,8 @@ namespace orc { } // Timestamp stats are stored in milliseconds - if (stats.has_maximumutc()) { - int64_t maximum = stats.maximumutc(); + if (stats.has_maximum_utc()) { + int64_t maximum = stats.maximum_utc(); _stats.setMaximum(maximum); _upperBound = maximum; } else if (statContext.writerTimezone) { @@ -364,21 +364,21 @@ namespace orc { CollectionColumnStatisticsImpl::CollectionColumnStatisticsImpl( const proto::ColumnStatistics& pb) { - _stats.setNumberOfValues(pb.numberofvalues()); - _stats.setHasNull(pb.hasnull()); - if (!pb.has_collectionstatistics()) { + _stats.setNumberOfValues(pb.number_of_values()); + _stats.setHasNull(pb.has_null()); + if (!pb.has_collection_statistics()) { _stats.setMinimum(0); _stats.setMaximum(0); _stats.setSum(0); } else { - const proto::CollectionStatistics& stats = pb.collectionstatistics(); - _stats.setHasMinimum(stats.has_minchildren()); - _stats.setHasMaximum(stats.has_maxchildren()); - _stats.setHasSum(stats.has_totalchildren()); - - _stats.setMinimum(stats.minchildren()); - _stats.setMaximum(stats.maxchildren()); - _stats.setSum(stats.totalchildren()); + const proto::CollectionStatistics& stats = pb.collection_statistics(); + _stats.setHasMinimum(stats.has_min_children()); + _stats.setHasMaximum(stats.has_max_children()); + _stats.setHasSum(stats.has_total_children()); + + _stats.setMinimum(stats.min_children()); + _stats.setMaximum(stats.max_children()); + _stats.setSum(stats.total_children()); } } diff --git a/c++/src/Statistics.hh b/c++/src/Statistics.hh index a1aafa7dbe..e585bf971c 100644 --- a/c++/src/Statistics.hh +++ b/c++/src/Statistics.hh @@ -278,8 +278,8 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); } std::string toString() const override { @@ -355,10 +355,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::BinaryStatistics* binStats = pbStats.mutable_binarystatistics(); + proto::BinaryStatistics* binStats = pbStats.mutable_binary_statistics(); binStats->set_sum(static_cast(_stats.getTotalLength())); } @@ -457,10 +457,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::BucketStatistics* bucketStats = pbStats.mutable_bucketstatistics(); + proto::BucketStatistics* bucketStats = pbStats.mutable_bucket_statistics(); if (_hasCount) { bucketStats->add_count(_trueCount); } else { @@ -563,10 +563,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::DateStatistics* dateStatistics = pbStats.mutable_datestatistics(); + proto::DateStatistics* dateStatistics = pbStats.mutable_date_statistics(); if (_stats.hasMinimum()) { dateStatistics->set_maximum(_stats.getMaximum()); dateStatistics->set_minimum(_stats.getMinimum()); @@ -706,10 +706,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::DecimalStatistics* decStats = pbStats.mutable_decimalstatistics(); + proto::DecimalStatistics* decStats = pbStats.mutable_decimal_statistics(); if (_stats.hasMinimum()) { decStats->set_minimum(_stats.getMinimum().toString(true)); decStats->set_maximum(_stats.getMaximum().toString(true)); @@ -883,10 +883,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::DoubleStatistics* doubleStats = pbStats.mutable_doublestatistics(); + proto::DoubleStatistics* doubleStats = pbStats.mutable_double_statistics(); if (_stats.hasMinimum()) { doubleStats->set_minimum(_stats.getMinimum()); doubleStats->set_maximum(_stats.getMaximum()); @@ -1051,10 +1051,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::IntegerStatistics* intStats = pbStats.mutable_intstatistics(); + proto::IntegerStatistics* intStats = pbStats.mutable_int_statistics(); if (_stats.hasMinimum()) { intStats->set_minimum(_stats.getMinimum()); intStats->set_maximum(_stats.getMaximum()); @@ -1220,10 +1220,10 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::StringStatistics* strStats = pbStats.mutable_stringstatistics(); + proto::StringStatistics* strStats = pbStats.mutable_string_statistics(); if (_stats.hasMinimum()) { strStats->set_minimum(_stats.getMinimum()); strStats->set_maximum(_stats.getMaximum()); @@ -1408,24 +1408,24 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::TimestampStatistics* tsStats = pbStats.mutable_timestampstatistics(); + proto::TimestampStatistics* tsStats = pbStats.mutable_timestamp_statistics(); if (_stats.hasMinimum()) { - tsStats->set_minimumutc(_stats.getMinimum()); - tsStats->set_maximumutc(_stats.getMaximum()); + tsStats->set_minimum_utc(_stats.getMinimum()); + tsStats->set_maximum_utc(_stats.getMaximum()); if (_minimumNanos != DEFAULT_MIN_NANOS) { - tsStats->set_minimumnanos(_minimumNanos + 1); + tsStats->set_minimum_nanos(_minimumNanos + 1); } if (_maximumNanos != DEFAULT_MAX_NANOS) { - tsStats->set_maximumnanos(_maximumNanos + 1); + tsStats->set_maximum_nanos(_maximumNanos + 1); } } else { - tsStats->clear_minimumutc(); - tsStats->clear_maximumutc(); - tsStats->clear_minimumnanos(); - tsStats->clear_maximumnanos(); + tsStats->clear_minimum_utc(); + tsStats->clear_maximum_utc(); + tsStats->clear_minimum_nanos(); + tsStats->clear_maximum_nanos(); } } @@ -1639,21 +1639,21 @@ namespace orc { } void toProtoBuf(proto::ColumnStatistics& pbStats) const override { - pbStats.set_hasnull(_stats.hasNull()); - pbStats.set_numberofvalues(_stats.getNumberOfValues()); + pbStats.set_has_null(_stats.hasNull()); + pbStats.set_number_of_values(_stats.getNumberOfValues()); - proto::CollectionStatistics* collectionStats = pbStats.mutable_collectionstatistics(); + proto::CollectionStatistics* collectionStats = pbStats.mutable_collection_statistics(); if (_stats.hasMinimum()) { - collectionStats->set_minchildren(_stats.getMinimum()); - collectionStats->set_maxchildren(_stats.getMaximum()); + collectionStats->set_min_children(_stats.getMinimum()); + collectionStats->set_max_children(_stats.getMaximum()); } else { - collectionStats->clear_minchildren(); - collectionStats->clear_maxchildren(); + collectionStats->clear_min_children(); + collectionStats->clear_max_children(); } if (_stats.hasSum()) { - collectionStats->set_totalchildren(_stats.getSum()); + collectionStats->set_total_children(_stats.getSum()); } else { - collectionStats->clear_totalchildren(); + collectionStats->clear_total_children(); } } diff --git a/c++/src/StripeStream.cc b/c++/src/StripeStream.cc index 6b95a4dc4d..8507e95767 100644 --- a/c++/src/StripeStream.cc +++ b/c++/src/StripeStream.cc @@ -81,7 +81,7 @@ namespace orc { proto::Stream_Kind kind, bool shouldStream) const { uint64_t offset = stripeStart; - uint64_t dataEnd = stripeInfo.offset() + stripeInfo.indexlength() + stripeInfo.datalength(); + uint64_t dataEnd = stripeInfo.offset() + stripeInfo.index_length() + stripeInfo.data_length(); MemoryPool* pool = reader.getFileContents().pool; for (int i = 0; i < footer.streams_size(); ++i) { const proto::Stream& stream = footer.streams(i); @@ -94,8 +94,8 @@ namespace orc { msg << "Malformed stream meta at stream index " << i << " in stripe " << stripeIndex << ": streamOffset=" << offset << ", streamLength=" << streamLength << ", stripeOffset=" << stripeInfo.offset() - << ", stripeIndexLength=" << stripeInfo.indexlength() - << ", stripeDataLength=" << stripeInfo.datalength(); + << ", stripeIndexLength=" << stripeInfo.index_length() + << ", stripeDataLength=" << stripeInfo.data_length(); throw ParseError(msg.str()); } return createDecompressor(reader.getCompression(), diff --git a/c++/src/StripeStream.hh b/c++/src/StripeStream.hh index a3b748c6e6..eae6ce0c31 100644 --- a/c++/src/StripeStream.hh +++ b/c++/src/StripeStream.hh @@ -198,12 +198,12 @@ namespace orc { uint64_t getDictionarySize(uint64_t colId) const override { ensureStripeFooterLoaded(); return static_cast( - stripeFooter->columns(static_cast(colId)).dictionarysize()); + stripeFooter->columns(static_cast(colId)).dictionary_size()); } const std::string& getWriterTimezone() const override { ensureStripeFooterLoaded(); - return stripeFooter->writertimezone(); + return stripeFooter->writer_timezone(); } }; diff --git a/c++/src/TypeImpl.cc b/c++/src/TypeImpl.cc index c914d84f4d..cf8aa0ad72 100644 --- a/c++/src/TypeImpl.cc +++ b/c++/src/TypeImpl.cc @@ -440,7 +440,7 @@ namespace orc { case proto::Type_Kind_CHAR: case proto::Type_Kind_VARCHAR: - ret = std::make_unique(static_cast(type.kind()), type.maximumlength()); + ret = std::make_unique(static_cast(type.kind()), type.maximum_length()); break; case proto::Type_Kind_DECIMAL: @@ -465,11 +465,11 @@ namespace orc { case proto::Type_Kind_STRUCT: { ret = std::make_unique(STRUCT); - if (type.subtypes_size() > type.fieldnames_size()) - throw ParseError("Illegal STRUCT type that contains less fieldnames than subtypes"); + if (type.subtypes_size() > type.field_names_size()) + throw ParseError("Illegal STRUCT type that contains less field_names than subtypes"); for (int i = 0; i < type.subtypes_size(); ++i) { ret->addStructField( - type.fieldnames(i), + type.field_names(i), convertType(footer.types(static_cast(type.subtypes(i))), footer)); } break; diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc index 84c8a502ed..e478fc7ac8 100644 --- a/c++/src/Writer.cc +++ b/c++/src/Writer.cc @@ -440,25 +440,25 @@ namespace orc { currentOffset += magicIdLength; // Initialize file footer - fileFooter.set_headerlength(currentOffset); - fileFooter.set_contentlength(0); - fileFooter.set_numberofrows(0); - fileFooter.set_rowindexstride(static_cast(options.getRowIndexStride())); + fileFooter.set_header_length(currentOffset); + fileFooter.set_content_length(0); + fileFooter.set_number_of_rows(0); + fileFooter.set_row_index_stride(static_cast(options.getRowIndexStride())); fileFooter.set_writer(writerId); - fileFooter.set_softwareversion(ORC_VERSION); + fileFooter.set_software_version(ORC_VERSION); uint32_t index = 0; buildFooterType(type, fileFooter, index); // Initialize post script - postScript.set_footerlength(0); + postScript.set_footer_length(0); postScript.set_compression(WriterImpl::convertCompressionKind(options.getCompression())); - postScript.set_compressionblocksize(options.getCompressionBlockSize()); + postScript.set_compression_block_size(options.getCompressionBlockSize()); postScript.add_version(options.getFileVersion().getMajor()); postScript.add_version(options.getFileVersion().getMinor()); - postScript.set_writerversion(WriterVersion_ORC_135); + postScript.set_writer_version(WriterVersion_ORC_135); postScript.set_magic("ORC"); // Initialize first stripe @@ -467,10 +467,10 @@ namespace orc { void WriterImpl::initStripe() { stripeInfo.set_offset(currentOffset); - stripeInfo.set_indexlength(0); - stripeInfo.set_datalength(0); - stripeInfo.set_footerlength(0); - stripeInfo.set_numberofrows(0); + stripeInfo.set_index_length(0); + stripeInfo.set_data_length(0); + stripeInfo.set_footer_length(0); + stripeInfo.set_number_of_rows(0); stripeRows = indexRows = 0; } @@ -507,14 +507,14 @@ namespace orc { *stripeFooter.add_columns() = encodings[i]; } - stripeFooter.set_writertimezone(options.getTimezoneName()); + stripeFooter.set_writer_timezone(options.getTimezoneName()); // add stripe statistics to metadata - proto::StripeStatistics* stripeStats = metadata.add_stripestats(); + proto::StripeStatistics* stripeStats = metadata.add_stripe_stats(); std::vector colStats; columnWriter->getStripeStatistics(colStats); for (uint32_t i = 0; i != colStats.size(); ++i) { - *stripeStats->add_colstats() = colStats[i]; + *stripeStats->add_col_stats() = colStats[i]; } // merge stripe stats into file stats and clear stripe stats columnWriter->mergeStripeStatsIntoFileStats(); @@ -537,10 +537,10 @@ namespace orc { } // update stripe info - stripeInfo.set_indexlength(indexLength); - stripeInfo.set_datalength(dataLength); - stripeInfo.set_footerlength(footerLength); - stripeInfo.set_numberofrows(stripeRows); + stripeInfo.set_index_length(indexLength); + stripeInfo.set_data_length(dataLength); + stripeInfo.set_footer_length(footerLength); + stripeInfo.set_number_of_rows(stripeRows); *fileFooter.add_stripes() = stripeInfo; @@ -556,12 +556,12 @@ namespace orc { if (!metadata.SerializeToZeroCopyStream(compressionStream.get())) { throw std::logic_error("Failed to write metadata."); } - postScript.set_metadatalength(compressionStream.get()->flush()); + postScript.set_metadata_length(compressionStream.get()->flush()); } void WriterImpl::writeFileFooter() { - fileFooter.set_contentlength(currentOffset - fileFooter.headerlength()); - fileFooter.set_numberofrows(totalRows); + fileFooter.set_content_length(currentOffset - fileFooter.header_length()); + fileFooter.set_number_of_rows(totalRows); // update file statistics std::vector colStats; @@ -574,7 +574,7 @@ namespace orc { if (!fileFooter.SerializeToZeroCopyStream(compressionStream.get())) { throw std::logic_error("Failed to write file footer."); } - postScript.set_footerlength(compressionStream->flush()); + postScript.set_footer_length(compressionStream->flush()); } void WriterImpl::writePostscript() { @@ -588,7 +588,7 @@ namespace orc { void WriterImpl::buildFooterType(const Type& t, proto::Footer& footer, uint32_t& index) { proto::Type protoType; - protoType.set_maximumlength(static_cast(t.getMaximumLength())); + protoType.set_maximum_length(static_cast(t.getMaximumLength())); protoType.set_precision(static_cast(t.getPrecision())); protoType.set_scale(static_cast(t.getScale())); @@ -686,7 +686,7 @@ namespace orc { for (uint64_t i = 0; i < t.getSubtypeCount(); ++i) { // only add subtypes' field names if this type is STRUCT if (t.getKind() == STRUCT) { - footer.mutable_types(pos)->add_fieldnames(t.getFieldName(i)); + footer.mutable_types(pos)->add_field_names(t.getFieldName(i)); } footer.mutable_types(pos)->add_subtypes(++index); buildFooterType(*t.getSubtype(i), footer, index); diff --git a/c++/src/sargs/PredicateLeaf.cc b/c++/src/sargs/PredicateLeaf.cc index 9e9f41c338..525901b1f7 100644 --- a/c++/src/sargs/PredicateLeaf.cc +++ b/c++/src/sargs/PredicateLeaf.cc @@ -393,14 +393,14 @@ namespace orc { static TruthValue evaluateBoolPredicate(const PredicateLeaf::Operator op, const std::vector& literals, const proto::ColumnStatistics& stats) { - bool hasNull = stats.hasnull(); - if (!stats.has_bucketstatistics() || stats.bucketstatistics().count_size() == 0) { + bool hasNull = stats.has_null(); + if (!stats.has_bucket_statistics() || stats.bucket_statistics().count_size() == 0) { // does not have bool stats return hasNull ? TruthValue::YES_NO_NULL : TruthValue::YES_NO; } - auto trueCount = stats.bucketstatistics().count(0); - auto falseCount = stats.numberofvalues() - trueCount; + auto trueCount = stats.bucket_statistics().count(0); + auto falseCount = stats.number_of_values() - trueCount; switch (op) { case PredicateLeaf::Operator::IS_NULL: return hasNull ? TruthValue::YES_NO : TruthValue::NO; @@ -509,77 +509,80 @@ namespace orc { TruthValue result = TruthValue::YES_NO_NULL; switch (mType) { case PredicateDataType::LONG: { - if (colStats.has_intstatistics() && colStats.intstatistics().has_minimum() && - colStats.intstatistics().has_maximum()) { - const auto& stats = colStats.intstatistics(); + if (colStats.has_int_statistics() && colStats.int_statistics().has_minimum() && + colStats.int_statistics().has_maximum()) { + const auto& stats = colStats.int_statistics(); result = evaluatePredicateRange(mOperator, literal2Long(mLiterals), stats.minimum(), - stats.maximum(), colStats.hasnull()); + stats.maximum(), colStats.has_null()); } break; } case PredicateDataType::FLOAT: { - if (colStats.has_doublestatistics() && colStats.doublestatistics().has_minimum() && - colStats.doublestatistics().has_maximum()) { - const auto& stats = colStats.doublestatistics(); + if (colStats.has_double_statistics() && colStats.double_statistics().has_minimum() && + colStats.double_statistics().has_maximum()) { + const auto& stats = colStats.double_statistics(); if (!std::isfinite(stats.sum())) { - result = colStats.hasnull() ? TruthValue::YES_NO_NULL : TruthValue::YES_NO; + result = colStats.has_null() ? TruthValue::YES_NO_NULL : TruthValue::YES_NO; } else { result = evaluatePredicateRange(mOperator, literal2Double(mLiterals), stats.minimum(), - stats.maximum(), colStats.hasnull()); + stats.maximum(), colStats.has_null()); } } break; } case PredicateDataType::STRING: { /// TODO: check lowerBound and upperBound as well - if (colStats.has_stringstatistics() && colStats.stringstatistics().has_minimum() && - colStats.stringstatistics().has_maximum()) { - const auto& stats = colStats.stringstatistics(); + if (colStats.has_string_statistics() && colStats.string_statistics().has_minimum() && + colStats.string_statistics().has_maximum()) { + const auto& stats = colStats.string_statistics(); result = evaluatePredicateRange(mOperator, literal2String(mLiterals), stats.minimum(), - stats.maximum(), colStats.hasnull()); + stats.maximum(), colStats.has_null()); } break; } case PredicateDataType::DATE: { - if (colStats.has_datestatistics() && colStats.datestatistics().has_minimum() && - colStats.datestatistics().has_maximum()) { - const auto& stats = colStats.datestatistics(); + if (colStats.has_date_statistics() && colStats.date_statistics().has_minimum() && + colStats.date_statistics().has_maximum()) { + const auto& stats = colStats.date_statistics(); result = evaluatePredicateRange(mOperator, literal2Date(mLiterals), stats.minimum(), - stats.maximum(), colStats.hasnull()); + stats.maximum(), colStats.has_null()); } break; } case PredicateDataType::TIMESTAMP: { - if (colStats.has_timestampstatistics() && colStats.timestampstatistics().has_minimumutc() && - colStats.timestampstatistics().has_maximumutc()) { - const auto& stats = colStats.timestampstatistics(); + if (colStats.has_timestamp_statistics() && + colStats.timestamp_statistics().has_minimum_utc() && + colStats.timestamp_statistics().has_maximum_utc()) { + const auto& stats = colStats.timestamp_statistics(); constexpr int32_t DEFAULT_MIN_NANOS = 0; constexpr int32_t DEFAULT_MAX_NANOS = 999999; - int32_t minNano = stats.has_minimumnanos() ? stats.minimumnanos() - 1 : DEFAULT_MIN_NANOS; - int32_t maxNano = stats.has_maximumnanos() ? stats.maximumnanos() - 1 : DEFAULT_MAX_NANOS; + int32_t minNano = + stats.has_minimum_nanos() ? stats.minimum_nanos() - 1 : DEFAULT_MIN_NANOS; + int32_t maxNano = + stats.has_maximum_nanos() ? stats.maximum_nanos() - 1 : DEFAULT_MAX_NANOS; Literal::Timestamp minTimestamp( - stats.minimumutc() / 1000, - static_cast((stats.minimumutc() % 1000) * 1000000) + minNano); + stats.minimum_utc() / 1000, + static_cast((stats.minimum_utc() % 1000) * 1000000) + minNano); Literal::Timestamp maxTimestamp( - stats.maximumutc() / 1000, - static_cast((stats.maximumutc() % 1000) * 1000000) + maxNano); + stats.maximum_utc() / 1000, + static_cast((stats.maximum_utc() % 1000) * 1000000) + maxNano); result = evaluatePredicateRange(mOperator, literal2Timestamp(mLiterals), minTimestamp, - maxTimestamp, colStats.hasnull()); + maxTimestamp, colStats.has_null()); } break; } case PredicateDataType::DECIMAL: { - if (colStats.has_decimalstatistics() && colStats.decimalstatistics().has_minimum() && - colStats.decimalstatistics().has_maximum()) { - const auto& stats = colStats.decimalstatistics(); + if (colStats.has_decimal_statistics() && colStats.decimal_statistics().has_minimum() && + colStats.decimal_statistics().has_maximum()) { + const auto& stats = colStats.decimal_statistics(); result = evaluatePredicateRange(mOperator, literal2Decimal(mLiterals), Decimal(stats.minimum()), Decimal(stats.maximum()), - colStats.hasnull()); + colStats.has_null()); } break; } case PredicateDataType::BOOLEAN: { - if (colStats.has_bucketstatistics()) { + if (colStats.has_bucket_statistics()) { result = evaluateBoolPredicate(mOperator, mLiterals, colStats); } break; @@ -589,7 +592,7 @@ namespace orc { } // make sure null literal is respected for IN operator - if (mOperator == Operator::IN && colStats.hasnull()) { + if (mOperator == Operator::IN && colStats.has_null()) { for (const auto& literal : mLiterals) { if (literal.isNull()) { result = TruthValue::YES_NO_NULL; @@ -698,12 +701,13 @@ namespace orc { } } - bool allNull = colStats.hasnull() && colStats.numberofvalues() == 0; + bool allNull = colStats.has_null() && colStats.number_of_values() == 0; if (mOperator == Operator::IS_NULL || ((mOperator == Operator::EQUALS || mOperator == Operator::NULL_SAFE_EQUALS) && mLiterals.at(0).isNull())) { // IS_NULL operator does not need to check min/max stats and bloom filter - return allNull ? TruthValue::YES : (colStats.hasnull() ? TruthValue::YES_NO : TruthValue::NO); + return allNull ? TruthValue::YES + : (colStats.has_null() ? TruthValue::YES_NO : TruthValue::NO); } else if (allNull) { // if we don't have any value, everything must have been null return TruthValue::IS_NULL; @@ -711,7 +715,7 @@ namespace orc { TruthValue result = evaluatePredicateMinMax(colStats); if (shouldEvaluateBloomFilter(mOperator, result, bloomFilter)) { - return evaluatePredicateBloomFiter(bloomFilter, colStats.hasnull()); + return evaluatePredicateBloomFiter(bloomFilter, colStats.has_null()); } else { return result; } diff --git a/c++/src/sargs/SargsApplier.cc b/c++/src/sargs/SargsApplier.cc index 2cc3a7cf44..7032a88126 100644 --- a/c++/src/sargs/SargsApplier.cc +++ b/c++/src/sargs/SargsApplier.cc @@ -155,11 +155,11 @@ namespace orc { bool SargsApplier::evaluateStripeStatistics(const proto::StripeStatistics& stripeStats, uint64_t stripeRowGroupCount) { - if (stripeStats.colstats_size() == 0) { + if (stripeStats.col_stats_size() == 0) { return true; } - bool ret = evaluateColumnStatistics(stripeStats.colstats()); + bool ret = evaluateColumnStatistics(stripeStats.col_stats()); if (!ret) { // reset mNextSkippedRows when the current stripe does not satisfy the PPD mNextSkippedRows.clear(); diff --git a/c++/test/CreateTestFiles.cc b/c++/test/CreateTestFiles.cc index 56f86fd774..19956afbe5 100644 --- a/c++/test/CreateTestFiles.cc +++ b/c++/test/CreateTestFiles.cc @@ -42,14 +42,14 @@ void writeCustomOrcFile(const std::string& filename, const orc::proto::Metadata& exit(1); } orc::proto::PostScript ps; - ps.set_footerlength(static_cast(footer.ByteSizeLong())); + ps.set_footer_length(static_cast(footer.ByteSizeLong())); ps.set_compression(orc::proto::NONE); - ps.set_compressionblocksize(64 * 1024); + ps.set_compression_block_size(64 * 1024); for (size_t i = 0; i < version.size(); ++i) { ps.add_version(version[i]); } - ps.set_metadatalength(static_cast(metadata.ByteSizeLong())); - ps.set_writerversion(writerVersion); + ps.set_metadata_length(static_cast(metadata.ByteSizeLong())); + ps.set_writer_version(writerVersion); ps.set_magic("ORC"); if (!ps.SerializeToOstream(&output)) { std::cerr << "Failed to write postscript for " << filename << "\n"; @@ -64,15 +64,15 @@ void writeCustomOrcFile(const std::string& filename, const orc::proto::Metadata& void writeVersion1999() { orc::proto::Metadata meta; orc::proto::Footer footer; - footer.set_headerlength(3); - footer.set_contentlength(3); + footer.set_header_length(3); + footer.set_content_length(3); orc::proto::Type* type = footer.add_types(); type->set_kind(orc::proto::Type_Kind_STRUCT); - footer.set_numberofrows(0); - footer.set_rowindexstride(10000); + footer.set_number_of_rows(0); + footer.set_row_index_stride(10000); orc::proto::ColumnStatistics* stats = footer.add_statistics(); - stats->set_numberofvalues(0); - stats->set_hasnull(false); + stats->set_number_of_values(0); + stats->set_has_null(false); std::vector version; version.push_back(19); version.push_back(99); diff --git a/c++/test/TestBloomFilter.cc b/c++/test/TestBloomFilter.cc index 47ab717de9..0b6cc9ebeb 100644 --- a/c++/test/TestBloomFilter.cc +++ b/c++/test/TestBloomFilter.cc @@ -242,7 +242,7 @@ namespace orc { proto::BloomFilter pbBloomFilter; proto::ColumnEncoding encoding; - encoding.set_bloomencoding(1); + encoding.set_bloom_encoding(1); // serialize BloomFilterUTF8Utils::serialize(srcBloomFilter, pbBloomFilter); diff --git a/c++/test/TestBufferedOutputStream.cc b/c++/test/TestBufferedOutputStream.cc index 6735ac43d3..98c492867b 100644 --- a/c++/test/TestBufferedOutputStream.cc +++ b/c++/test/TestBufferedOutputStream.cc @@ -109,12 +109,12 @@ namespace orc { BufferedOutputStream bufStream(*pool, &memStream, capacity, block, &metrics); proto::PostScript ps; - ps.set_footerlength(197934); + ps.set_footer_length(197934); ps.set_compression(proto::ZLIB); ps.add_version(6); ps.add_version(20); - ps.set_metadatalength(100); - ps.set_writerversion(789); + ps.set_metadata_length(100); + ps.set_writer_version(789); ps.set_magic("protobuff_serialization"); EXPECT_TRUE(ps.SerializeToZeroCopyStream(&bufStream)); @@ -124,12 +124,12 @@ namespace orc { proto::PostScript ps2; ps2.ParseFromArray(memStream.getData(), static_cast(memStream.getLength())); - EXPECT_EQ(ps.footerlength(), ps2.footerlength()); + EXPECT_EQ(ps.footer_length(), ps2.footer_length()); EXPECT_EQ(ps.compression(), ps2.compression()); EXPECT_EQ(ps.version(0), ps2.version(0)); EXPECT_EQ(ps.version(1), ps2.version(1)); - EXPECT_EQ(ps.metadatalength(), ps2.metadatalength()); - EXPECT_EQ(ps.writerversion(), ps2.writerversion()); + EXPECT_EQ(ps.metadata_length(), ps2.metadata_length()); + EXPECT_EQ(ps.writer_version(), ps2.writer_version()); EXPECT_EQ(ps.magic(), ps2.magic()); } } // namespace orc diff --git a/c++/test/TestColumnReader.cc b/c++/test/TestColumnReader.cc index 98f2d86bde..fcbf007630 100644 --- a/c++/test/TestColumnReader.cc +++ b/c++/test/TestColumnReader.cc @@ -339,7 +339,7 @@ namespace orc { EXPECT_CALL(streams, getEncoding(0)).WillRepeatedly(testing::Return(directEncoding)); proto::ColumnEncoding dictionaryEncoding; dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY); - dictionaryEncoding.set_dictionarysize(2); + dictionaryEncoding.set_dictionary_size(2); EXPECT_CALL(streams, getEncoding(1)).WillRepeatedly(testing::Return(dictionaryEncoding)); // set getStream @@ -438,12 +438,12 @@ namespace orc { proto::ColumnEncoding dictionary2Encoding; dictionary2Encoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY); - dictionary2Encoding.set_dictionarysize(2); + dictionary2Encoding.set_dictionary_size(2); EXPECT_CALL(streams, getEncoding(1)).WillRepeatedly(testing::Return(dictionary2Encoding)); proto::ColumnEncoding dictionary0Encoding; dictionary0Encoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY); - dictionary0Encoding.set_dictionarysize(0); + dictionary0Encoding.set_dictionary_size(0); EXPECT_CALL(streams, getEncoding(testing::Ge(2))) .WillRepeatedly(testing::Return(dictionary0Encoding)); @@ -645,7 +645,7 @@ namespace orc { EXPECT_CALL(streams, getEncoding(testing::_)).WillRepeatedly(testing::Return(directEncoding)); proto::ColumnEncoding dictionaryEncoding; dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY); - dictionaryEncoding.set_dictionarysize(100); + dictionaryEncoding.set_dictionary_size(100); EXPECT_CALL(streams, getEncoding(2)).WillRepeatedly(testing::Return(dictionaryEncoding)); // set getStream @@ -4091,7 +4091,7 @@ namespace orc { EXPECT_CALL(streams, getEncoding(0)).WillRepeatedly(testing::Return(directEncoding)); proto::ColumnEncoding dictionaryEncoding; dictionaryEncoding.set_kind(proto::ColumnEncoding_Kind_DICTIONARY); - dictionaryEncoding.set_dictionarysize(2); + dictionaryEncoding.set_dictionary_size(2); EXPECT_CALL(streams, getEncoding(1)).WillRepeatedly(testing::Return(dictionaryEncoding)); // set getStream diff --git a/c++/test/TestColumnStatistics.cc b/c++/test/TestColumnStatistics.cc index 8a4c59b149..5cf2d9e41b 100644 --- a/c++/test/TestColumnStatistics.cc +++ b/c++/test/TestColumnStatistics.cc @@ -449,10 +449,10 @@ namespace orc { proto::ColumnStatistics pbStats; tsStats->toProtoBuf(pbStats); - EXPECT_EQ(100, pbStats.timestampstatistics().minimumutc()); - EXPECT_EQ(200, pbStats.timestampstatistics().maximumutc()); - EXPECT_FALSE(pbStats.timestampstatistics().has_minimumnanos()); - EXPECT_FALSE(pbStats.timestampstatistics().has_maximumnanos()); + EXPECT_EQ(100, pbStats.timestamp_statistics().minimum_utc()); + EXPECT_EQ(200, pbStats.timestamp_statistics().maximum_utc()); + EXPECT_FALSE(pbStats.timestamp_statistics().has_minimum_nanos()); + EXPECT_FALSE(pbStats.timestamp_statistics().has_maximum_nanos()); StatContext ctx(true, nullptr); auto tsStatsFromPb = std::make_unique(pbStats, ctx); @@ -465,12 +465,12 @@ namespace orc { tsStats->update(500, 9999); pbStats.Clear(); tsStats->toProtoBuf(pbStats); - EXPECT_EQ(50, pbStats.timestampstatistics().minimumutc()); - EXPECT_EQ(500, pbStats.timestampstatistics().maximumutc()); - EXPECT_TRUE(pbStats.timestampstatistics().has_minimumnanos()); - EXPECT_TRUE(pbStats.timestampstatistics().has_maximumnanos()); - EXPECT_EQ(5555 + 1, pbStats.timestampstatistics().minimumnanos()); - EXPECT_EQ(9999 + 1, pbStats.timestampstatistics().maximumnanos()); + EXPECT_EQ(50, pbStats.timestamp_statistics().minimum_utc()); + EXPECT_EQ(500, pbStats.timestamp_statistics().maximum_utc()); + EXPECT_TRUE(pbStats.timestamp_statistics().has_minimum_nanos()); + EXPECT_TRUE(pbStats.timestamp_statistics().has_maximum_nanos()); + EXPECT_EQ(5555 + 1, pbStats.timestamp_statistics().minimum_nanos()); + EXPECT_EQ(9999 + 1, pbStats.timestamp_statistics().maximum_nanos()); tsStatsFromPb.reset(new TimestampColumnStatisticsImpl(pbStats, ctx)); EXPECT_EQ(50, tsStatsFromPb->getMinimum()); diff --git a/c++/test/TestCompression.cc b/c++/test/TestCompression.cc index 2dba7b9a35..a77800a3dd 100644 --- a/c++/test/TestCompression.cc +++ b/c++/test/TestCompression.cc @@ -195,10 +195,10 @@ namespace orc { uint64_t block = 256; proto::PostScript ps; - ps.set_footerlength(197934); + ps.set_footer_length(197934); ps.set_compression(protoKind); - ps.set_metadatalength(100); - ps.set_writerversion(789); + ps.set_metadata_length(100); + ps.set_writer_version(789); ps.set_magic("protobuff_serialization"); for (uint32_t i = 0; i < 1024; ++i) { ps.add_version(static_cast(std::rand())); @@ -219,10 +219,10 @@ namespace orc { proto::PostScript ps2; ps2.ParseFromZeroCopyStream(decompressStream.get()); - EXPECT_EQ(ps.footerlength(), ps2.footerlength()); + EXPECT_EQ(ps.footer_length(), ps2.footer_length()); EXPECT_EQ(ps.compression(), ps2.compression()); - EXPECT_EQ(ps.metadatalength(), ps2.metadatalength()); - EXPECT_EQ(ps.writerversion(), ps2.writerversion()); + EXPECT_EQ(ps.metadata_length(), ps2.metadata_length()); + EXPECT_EQ(ps.writer_version(), ps2.writer_version()); EXPECT_EQ(ps.magic(), ps2.magic()); for (int i = 0; i < 1024; ++i) { EXPECT_EQ(ps.version(i), ps2.version(i)); diff --git a/c++/test/TestPredicateLeaf.cc b/c++/test/TestPredicateLeaf.cc index e0ab293d31..2703776e39 100644 --- a/c++/test/TestPredicateLeaf.cc +++ b/c++/test/TestPredicateLeaf.cc @@ -74,20 +74,20 @@ namespace orc { static proto::ColumnStatistics createBooleanStats(uint64_t n, uint64_t trueCount, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(n); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(n); - proto::BucketStatistics* boolStats = colStats.mutable_bucketstatistics(); + proto::BucketStatistics* boolStats = colStats.mutable_bucket_statistics(); boolStats->add_count(trueCount); return colStats; } static proto::ColumnStatistics createIntStats(int64_t min, int64_t max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::IntegerStatistics* intStats = colStats.mutable_intstatistics(); + proto::IntegerStatistics* intStats = colStats.mutable_int_statistics(); intStats->set_minimum(min); intStats->set_maximum(max); return colStats; @@ -95,10 +95,10 @@ namespace orc { static proto::ColumnStatistics createDoubleStats(double min, double max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::DoubleStatistics* doubleStats = colStats.mutable_doublestatistics(); + proto::DoubleStatistics* doubleStats = colStats.mutable_double_statistics(); const auto& curr_sum = min + max; doubleStats->set_minimum(min); doubleStats->set_maximum(max); @@ -109,10 +109,10 @@ namespace orc { static proto::ColumnStatistics createDecimalStats(Decimal min, Decimal max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::DecimalStatistics* decimalStats = colStats.mutable_decimalstatistics(); + proto::DecimalStatistics* decimalStats = colStats.mutable_decimal_statistics(); decimalStats->set_minimum(min.toString(true)); decimalStats->set_maximum(max.toString(true)); return colStats; @@ -120,10 +120,10 @@ namespace orc { static proto::ColumnStatistics createDateStats(int32_t min, int32_t max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::DateStatistics* dateStats = colStats.mutable_datestatistics(); + proto::DateStatistics* dateStats = colStats.mutable_date_statistics(); dateStats->set_minimum(min); dateStats->set_maximum(max); return colStats; @@ -132,12 +132,12 @@ namespace orc { static proto::ColumnStatistics createTimestampStats(int64_t min, int64_t max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::TimestampStatistics* tsStats = colStats.mutable_timestampstatistics(); - tsStats->set_minimumutc(min); - tsStats->set_maximumutc(max); + proto::TimestampStatistics* tsStats = colStats.mutable_timestamp_statistics(); + tsStats->set_minimum_utc(min); + tsStats->set_maximum_utc(max); return colStats; } @@ -145,24 +145,24 @@ namespace orc { int64_t maxSecond, int32_t maxNano, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); - - proto::TimestampStatistics* tsStats = colStats.mutable_timestampstatistics(); - tsStats->set_minimumutc(minSecond * 1000 + minNano / 1000000); - tsStats->set_maximumutc(maxSecond * 1000 + maxNano / 1000000); - tsStats->set_minimumnanos((minNano % 1000000) + 1); - tsStats->set_maximumnanos((maxNano % 1000000) + 1); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); + + proto::TimestampStatistics* tsStats = colStats.mutable_timestamp_statistics(); + tsStats->set_minimum_utc(minSecond * 1000 + minNano / 1000000); + tsStats->set_maximum_utc(maxSecond * 1000 + maxNano / 1000000); + tsStats->set_minimum_nanos((minNano % 1000000) + 1); + tsStats->set_maximum_nanos((maxNano % 1000000) + 1); return colStats; } static proto::ColumnStatistics createStringStats(std::string min, std::string max, bool hasNull = false) { proto::ColumnStatistics colStats; - colStats.set_hasnull(hasNull); - colStats.set_numberofvalues(10); + colStats.set_has_null(hasNull); + colStats.set_number_of_values(10); - proto::StringStatistics* strStats = colStats.mutable_stringstatistics(); + proto::StringStatistics* strStats = colStats.mutable_string_statistics(); strStats->set_minimum(min); strStats->set_maximum(max); return colStats; diff --git a/c++/test/TestSargsApplier.cc b/c++/test/TestSargsApplier.cc index ebf8f0a646..2ba927a5fd 100644 --- a/c++/test/TestSargsApplier.cc +++ b/c++/test/TestSargsApplier.cc @@ -56,8 +56,8 @@ namespace orc { static proto::ColumnStatistics createIntStats(int64_t min, int64_t max, bool hasNull = false) { proto::ColumnStatistics statistics; - statistics.set_hasnull(hasNull); - auto intStats = statistics.mutable_intstatistics(); + statistics.set_has_null(hasNull); + auto intStats = statistics.mutable_int_statistics(); intStats->set_minimum(min); intStats->set_maximum(max); return statistics; @@ -117,10 +117,10 @@ namespace orc { { orc::proto::StripeStatistics stripeStats; proto::ColumnStatistics structStatistics; - structStatistics.set_hasnull(false); - *stripeStats.add_colstats() = structStatistics; - *stripeStats.add_colstats() = createIntStats(0L, 10L); - *stripeStats.add_colstats() = createIntStats(0L, 50L); + structStatistics.set_has_null(false); + *stripeStats.add_col_stats() = structStatistics; + *stripeStats.add_col_stats() = createIntStats(0L, 10L); + *stripeStats.add_col_stats() = createIntStats(0L, 50L); ReaderMetrics metrics; SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135, &metrics); EXPECT_FALSE(applier.evaluateStripeStatistics(stripeStats, 1)); @@ -131,10 +131,10 @@ namespace orc { { orc::proto::StripeStatistics stripeStats; proto::ColumnStatistics structStatistics; - structStatistics.set_hasnull(false); - *stripeStats.add_colstats() = structStatistics; - *stripeStats.add_colstats() = createIntStats(0L, 50L); - *stripeStats.add_colstats() = createIntStats(0L, 50L); + structStatistics.set_has_null(false); + *stripeStats.add_col_stats() = structStatistics; + *stripeStats.add_col_stats() = createIntStats(0L, 50L); + *stripeStats.add_col_stats() = createIntStats(0L, 50L); ReaderMetrics metrics; SargsApplier applier(*type, sarg.get(), 1000, WriterVersion_ORC_135, &metrics); EXPECT_TRUE(applier.evaluateStripeStatistics(stripeStats, 1)); @@ -145,7 +145,7 @@ namespace orc { { orc::proto::Footer footer; proto::ColumnStatistics structStatistics; - structStatistics.set_hasnull(false); + structStatistics.set_has_null(false); *footer.add_statistics() = structStatistics; *footer.add_statistics() = createIntStats(0L, 10L); *footer.add_statistics() = createIntStats(0L, 50L); @@ -159,7 +159,7 @@ namespace orc { { orc::proto::Footer footer; proto::ColumnStatistics structStatistics; - structStatistics.set_hasnull(false); + structStatistics.set_has_null(false); *footer.add_statistics() = structStatistics; *footer.add_statistics() = createIntStats(0L, 50L); *footer.add_statistics() = createIntStats(0L, 30L); @@ -173,7 +173,7 @@ namespace orc { { orc::proto::Footer footer; proto::ColumnStatistics structStatistics; - structStatistics.set_hasnull(false); + structStatistics.set_has_null(false); *footer.add_statistics() = structStatistics; *footer.add_statistics() = createIntStats(0L, 50L); *footer.add_statistics() = createIntStats(0L, 50L); diff --git a/c++/test/TestType.cc b/c++/test/TestType.cc index c1d7e360f5..c9ac2f2850 100644 --- a/c++/test/TestType.cc +++ b/c++/test/TestType.cc @@ -393,14 +393,14 @@ namespace orc { illStructType.set_kind(proto::Type_Kind_STRUCT); structType.set_kind(proto::Type_Kind_STRUCT); structType.add_subtypes(0); // construct a loop back to root - structType.add_fieldnames("root"); + structType.add_field_names("root"); illStructType.add_subtypes(1); - illStructType.add_fieldnames("f1"); + illStructType.add_field_names("f1"); illStructType.add_subtypes(2); *(footer.add_types()) = illStructType; *(footer.add_types()) = structType; testCorruptHelper(illStructType, footer, - "Illegal STRUCT type that contains less fieldnames than subtypes"); + "Illegal STRUCT type that contains less field_names than subtypes"); } void expectParseError(const proto::Footer& footer, const char* errMsg) { @@ -421,33 +421,33 @@ namespace orc { rootType.set_kind(proto::Type_Kind_STRUCT); rootType.add_subtypes(1); // add a non existent type id - rootType.add_fieldnames("f1"); + rootType.add_field_names("f1"); *(footer.add_types()) = rootType; expectParseError(footer, "Footer is corrupt: types(1) not exists"); footer.clear_types(); rootType.clear_subtypes(); - rootType.clear_fieldnames(); + rootType.clear_field_names(); proto::Type structType; structType.set_kind(proto::Type_Kind_STRUCT); structType.add_subtypes(0); // construct a loop back to root - structType.add_fieldnames("root"); + structType.add_field_names("root"); rootType.add_subtypes(1); - rootType.add_fieldnames("f1"); + rootType.add_field_names("f1"); *(footer.add_types()) = rootType; *(footer.add_types()) = structType; expectParseError(footer, "Footer is corrupt: malformed link from type 1 to 0"); footer.clear_types(); rootType.clear_subtypes(); - rootType.clear_fieldnames(); + rootType.clear_field_names(); proto::Type listType; listType.set_kind(proto::Type_Kind_LIST); proto::Type mapType; mapType.set_kind(proto::Type_Kind_MAP); proto::Type unionType; unionType.set_kind(proto::Type_Kind_UNION); - rootType.add_fieldnames("f1"); + rootType.add_field_names("f1"); rootType.add_subtypes(1); // 0 -> 1 listType.add_subtypes(2); // 1 -> 2 mapType.add_subtypes(3); // 2 -> 3 @@ -460,15 +460,15 @@ namespace orc { footer.clear_types(); rootType.clear_subtypes(); - rootType.clear_fieldnames(); + rootType.clear_field_names(); proto::Type intType; intType.set_kind(proto::Type_Kind_INT); proto::Type strType; strType.set_kind(proto::Type_Kind_STRING); rootType.add_subtypes(2); - rootType.add_fieldnames("f2"); + rootType.add_field_names("f2"); rootType.add_subtypes(1); - rootType.add_fieldnames("f1"); + rootType.add_field_names("f1"); *(footer.add_types()) = rootType; *(footer.add_types()) = intType; *(footer.add_types()) = strType; @@ -476,7 +476,7 @@ namespace orc { footer.clear_types(); rootType.clear_subtypes(); - rootType.clear_fieldnames(); + rootType.clear_field_names(); rootType.set_kind(proto::Type_Kind_STRUCT); rootType.add_subtypes(1); *(footer.add_types()) = rootType; @@ -485,7 +485,7 @@ namespace orc { "Footer is corrupt: STRUCT type 0 has 1 subTypes, but has 0 fieldNames"); // Should pass the check after adding the field name footer.clear_types(); - rootType.add_fieldnames("f1"); + rootType.add_field_names("f1"); *(footer.add_types()) = rootType; *(footer.add_types()) = intType; checkProtoTypes(footer); diff --git a/cmake_modules/ThirdpartyToolchain.cmake b/cmake_modules/ThirdpartyToolchain.cmake index d92c926ebd..58ec26221d 100644 --- a/cmake_modules/ThirdpartyToolchain.cmake +++ b/cmake_modules/ThirdpartyToolchain.cmake @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -set(ORC_FORMAT_VERSION "1.0.0-alpha") +set(ORC_FORMAT_VERSION "1.0.0-beta") set(LZ4_VERSION "1.9.3") set(SNAPPY_VERSION "1.1.7") set(ZLIB_VERSION "1.2.11") @@ -73,7 +73,7 @@ endif () # ORC Format ExternalProject_Add (orc-format_ep URL "https://github.com/apache/orc-format/archive/refs/tags/v${ORC_FORMAT_VERSION}.tar.gz" - URL_HASH SHA256=d04e878feec01dd9a3ce20553c0bfc70e856a319fe8693725a699bb077d0d286 + URL_HASH SHA256=28184fac3f182be5ead6e31972222b57754532ae94a89c13e9b9733378b60149 CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java index 8eae7a7cde..bb6bc9e79f 100644 --- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java +++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java @@ -4157,9 +4157,9 @@ public void testFutureOrcFile(Version fileFormat) throws Exception { String m = e.getMessage(); assertTrue(m.contains("version1999.orc was written by a future ORC version 19.99.")); assertTrue(m.contains("This file is not readable by this version of ORC.")); - assertTrue(m.contains("Postscript: footerLength: 19 compression: NONE " + - "compressionBlockSize: 65536 version: 19 version: 99 metadataLength: 0 " + - "writerVersion: 1")); + assertTrue(m.contains("Postscript: footer_length: 19 compression: NONE " + + "compression_block_size: 65536 version: 19 version: 99 metadata_length: 0 " + + "writer_version: 1")); } } diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc index 809d6f3b64..bfc444489c 100644 --- a/tools/test/TestFileMetadata.cc +++ b/tools/test/TestFileMetadata.cc @@ -30,50 +30,50 @@ TEST(TestFileMetadata, testRaw) { const std::string expected = "Raw file tail: " + file + "\n" "postscript {\n" - " footerLength: 288\n" + " footer_length: 288\n" " compression: NONE\n" " version: 0\n" " version: 12\n" - " metadataLength: 526\n" + " metadata_length: 526\n" " magic: \"ORC\"\n" "}\n" "footer {\n" - " headerLength: 3\n" - " contentLength: 245568\n" + " header_length: 3\n" + " content_length: 245568\n" " stripes {\n" " offset: 3\n" - " indexLength: 137\n" - " dataLength: 45282\n" - " footerLength: 149\n" - " numberOfRows: 5000\n" + " index_length: 137\n" + " data_length: 45282\n" + " footer_length: 149\n" + " number_of_rows: 5000\n" " }\n" " stripes {\n" " offset: 45571\n" - " indexLength: 137\n" - " dataLength: 45282\n" - " footerLength: 149\n" - " numberOfRows: 5000\n" + " index_length: 137\n" + " data_length: 45282\n" + " footer_length: 149\n" + " number_of_rows: 5000\n" " }\n" " stripes {\n" " offset: 91139\n" - " indexLength: 137\n" - " dataLength: 45282\n" - " footerLength: 149\n" - " numberOfRows: 5000\n" + " index_length: 137\n" + " data_length: 45282\n" + " footer_length: 149\n" + " number_of_rows: 5000\n" " }\n" " stripes {\n" " offset: 136707\n" - " indexLength: 138\n" - " dataLength: 45283\n" - " footerLength: 149\n" - " numberOfRows: 5000\n" + " index_length: 138\n" + " data_length: 45283\n" + " footer_length: 149\n" + " number_of_rows: 5000\n" " }\n" " stripes {\n" " offset: 200000\n" - " indexLength: 137\n" - " dataLength: 45282\n" - " footerLength: 149\n" - " numberOfRows: 5000\n" + " index_length: 137\n" + " data_length: 45282\n" + " footer_length: 149\n" + " number_of_rows: 5000\n" " }\n" " types {\n" " kind: STRUCT\n" @@ -82,11 +82,11 @@ TEST(TestFileMetadata, testRaw) { " subtypes: 3\n" " subtypes: 4\n" " subtypes: 5\n" - " fieldNames: \"userid\"\n" - " fieldNames: \"string1\"\n" - " fieldNames: \"subtype\"\n" - " fieldNames: \"decimal1\"\n" - " fieldNames: \"ts\"\n" + " field_names: \"userid\"\n" + " field_names: \"string1\"\n" + " field_names: \"subtype\"\n" + " field_names: \"decimal1\"\n" + " field_names: \"ts\"\n" " }\n" " types {\n" " kind: LONG\n" @@ -103,49 +103,49 @@ TEST(TestFileMetadata, testRaw) { " types {\n" " kind: TIMESTAMP\n" " }\n" - " numberOfRows: 25000\n" + " number_of_rows: 25000\n" " statistics {\n" - " numberOfValues: 25000\n" + " number_of_values: 25000\n" " }\n" " statistics {\n" - " numberOfValues: 25000\n" - " intStatistics {\n" + " number_of_values: 25000\n" + " int_statistics {\n" " minimum: 2\n" " maximum: 100\n" " sum: 2499619\n" " }\n" " }\n" " statistics {\n" - " numberOfValues: 25000\n" - " stringStatistics {\n" + " number_of_values: 25000\n" + " string_statistics {\n" " minimum: \"bar\"\n" " maximum: \"zebra\"\n" " sum: 124990\n" " }\n" " }\n" " statistics {\n" - " numberOfValues: 25000\n" - " doubleStatistics {\n" + " number_of_values: 25000\n" + " double_statistics {\n" " minimum: 0.8\n" " maximum: 80\n" " sum: 200051.40000000002\n" " }\n" " }\n" " statistics {\n" - " numberOfValues: 25000\n" - " decimalStatistics {\n" + " number_of_values: 25000\n" + " decimal_statistics {\n" " minimum: \"0\"\n" " maximum: \"5.5\"\n" " sum: \"16.6\"\n" " }\n" " }\n" " statistics {\n" - " numberOfValues: 25000\n" + " number_of_values: 25000\n" " }\n" - " rowIndexStride: 10000\n" + " row_index_stride: 10000\n" "}\n" - "fileLength: 246402\n" - "postscriptLength: 19\n"; + "file_length: 246402\n" + "postscript_length: 19\n"; std::string output; std::string error;