From 1cfb11f8da785d232d5b0a79a97ffed98d799292 Mon Sep 17 00:00:00 2001 From: rdmello Date: Tue, 29 Jan 2019 16:08:14 -0500 Subject: [PATCH] PARQUET-1482: [C++] Adding basic unit test for DataPageV2 serialization and deserialization. --- cpp/src/parquet/file-deserialize-test.cc | 46 ++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/cpp/src/parquet/file-deserialize-test.cc b/cpp/src/parquet/file-deserialize-test.cc index e62968e5d5dc9..99aeb14820831 100644 --- a/cpp/src/parquet/file-deserialize-test.cc +++ b/cpp/src/parquet/file-deserialize-test.cc @@ -90,6 +90,21 @@ class TestPageSerde : public ::testing::Test { ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get())); } + void WriteDataPageHeaderV2(int max_serialized_len = 1024, int32_t uncompressed_size = 0, + int32_t compressed_size = 0) { + // Simplifying writing serialized data page V2 headers which may or may not + // have meaningful data associated with them + + // Serialize the Page header + page_header_.__set_data_page_header_v2(data_page_header_v2_); + page_header_.uncompressed_page_size = uncompressed_size; + page_header_.compressed_page_size = compressed_size; + page_header_.type = format::PageType::DATA_PAGE_V2; + + ThriftSerializer serializer; + ASSERT_NO_THROW(serializer.Serialize(&page_header_, out_stream_.get())); + } + void ResetStream() { out_stream_.reset(new InMemoryOutputStream); } void EndStream() { out_buffer_ = out_stream_->GetBuffer(); } @@ -101,6 +116,7 @@ class TestPageSerde : public ::testing::Test { std::unique_ptr page_reader_; format::PageHeader page_header_; format::DataPageHeader data_page_header_; + format::DataPageHeaderV2 data_page_header_v2_; }; void CheckDataPageHeader(const format::DataPageHeader expected, const Page* page) { @@ -120,6 +136,24 @@ void CheckDataPageHeader(const format::DataPageHeader expected, const Page* page } } +// Overload for DataPageV2 tests. +void CheckDataPageHeader(const format::DataPageHeaderV2 expected, const Page* page) { + ASSERT_EQ(PageType::DATA_PAGE_V2, page->type()); + + const DataPageV2* data_page = static_cast(page); + ASSERT_EQ(expected.num_values, data_page->num_values()); + ASSERT_EQ(expected.num_nulls, data_page->num_nulls()); + ASSERT_EQ(expected.num_rows, data_page->num_rows()); + ASSERT_EQ(expected.encoding, data_page->encoding()); + ASSERT_EQ(expected.definition_levels_byte_length, + data_page->definition_levels_byte_length()); + ASSERT_EQ(expected.repetition_levels_byte_length, + data_page->repetition_levels_byte_length()); + ASSERT_EQ(expected.is_compressed, data_page->is_compressed()); + + // TODO: Tests for DataPageHeaderV2 statistics. +} + TEST_F(TestPageSerde, DataPage) { format::PageHeader out_page_header; @@ -134,6 +168,18 @@ TEST_F(TestPageSerde, DataPage) { ASSERT_NO_FATAL_FAILURE(CheckDataPageHeader(data_page_header_, current_page.get())); } +TEST_F(TestPageSerde, DataPageV2) { + format::PageHeader out_page_header; + + const int32_t num_rows = 4444; + data_page_header_.num_values = num_rows; + + ASSERT_NO_FATAL_FAILURE(WriteDataPageHeaderV2()); + InitSerializedPageReader(num_rows); + std::shared_ptr current_page = page_reader_->NextPage(); + ASSERT_NO_FATAL_FAILURE(CheckDataPageHeader(data_page_header_v2_, current_page.get())); +} + TEST_F(TestPageSerde, TestLargePageHeaders) { int stats_size = 256 * 1024; // 256 KB AddDummyStats(stats_size, data_page_header_);