From 7790183cb6280cf5b2b0c2e7f8fc2948072e8e35 Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Wed, 21 Dec 2016 14:54:53 +0100 Subject: [PATCH] PARQUET-812: Read BYTE_ARRAY with no logical type as arrow::BinaryArray Depends on ARROW-374. Need to merge that and update the thirdparty git hash Author: Wes McKinney Closes #206 from wesm/PARQUET-812 and squashes the following commits: 73fb8d0 [Wes McKinney] Update thirdparty arrow version 5db7908 [Wes McKinney] typo fc0d559 [Wes McKinney] Read unadorned BYTE_ARRAY into arrow::BinaryArray Change-Id: I9466e3eb082fee464e832b3106223ac48b0afb4a --- .../parquet/arrow/arrow-reader-writer-test.cc | 15 +++-- cpp/src/parquet/arrow/arrow-schema-test.cc | 29 +++++----- cpp/src/parquet/arrow/reader.cc | 26 ++++++++- cpp/src/parquet/arrow/schema.cc | 58 +++++++------------ cpp/src/parquet/arrow/test-util.h | 21 ++++--- cpp/src/parquet/arrow/utils.h | 10 ++-- cpp/src/parquet/arrow/writer.cc | 15 ++--- cpp/src/parquet/column/column-writer-test.cc | 3 +- cpp/src/parquet/compression/brotli-codec.cc | 4 +- cpp/src/parquet/compression/codec.h | 4 +- cpp/src/parquet/schema/printer.cc | 11 ++-- cpp/src/parquet/schema/schema-printer-test.cc | 4 +- 12 files changed, 107 insertions(+), 93 deletions(-) diff --git a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc index a8a5db06995e8..6d2b0d5f85502 100644 --- a/cpp/src/parquet/arrow/arrow-reader-writer-test.cc +++ b/cpp/src/parquet/arrow/arrow-reader-writer-test.cc @@ -167,7 +167,15 @@ struct test_traits<::arrow::StringType> { static std::string const value; }; +template <> +struct test_traits<::arrow::BinaryType> { + static constexpr ParquetType::type parquet_enum = ParquetType::BYTE_ARRAY; + static constexpr LogicalType::type logical_enum = LogicalType::NONE; + static std::string const value; +}; + const std::string test_traits<::arrow::StringType>::value("Test"); +const std::string test_traits<::arrow::BinaryType>::value("\x00\x01\x02\x03"); template using ParquetDataType = DataType::parquet_enum>; @@ -247,7 +255,7 @@ class TestParquetIO : public ::testing::Test { std::shared_ptr sink_; }; -// We habe separate tests for UInt32Type as this is currently the only type +// We have separate tests for UInt32Type as this is currently the only type // where a roundtrip does not yield the identical Array structure. // There we write an UInt32 Array but receive an Int64 Array as result for // Parquet version 1.0. @@ -255,7 +263,7 @@ class TestParquetIO : public ::testing::Test { typedef ::testing::Types<::arrow::BooleanType, ::arrow::UInt8Type, ::arrow::Int8Type, ::arrow::UInt16Type, ::arrow::Int16Type, ::arrow::Int32Type, ::arrow::UInt64Type, ::arrow::Int64Type, ::arrow::TimestampType, ::arrow::FloatType, ::arrow::DoubleType, - ::arrow::StringType> + ::arrow::StringType, ::arrow::BinaryType> TestTypes; TYPED_TEST_CASE(TestParquetIO, TestTypes); @@ -504,8 +512,7 @@ using TestStringParquetIO = TestParquetIO<::arrow::StringType>; TEST_F(TestStringParquetIO, EmptyStringColumnRequiredWrite) { std::shared_ptr values; - ::arrow::StringBuilder builder( - ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>()); + ::arrow::StringBuilder builder(::arrow::default_memory_pool(), ::arrow::utf8()); for (size_t i = 0; i < SMALL_SIZE; i++) { builder.Append(""); } diff --git a/cpp/src/parquet/arrow/arrow-schema-test.cc b/cpp/src/parquet/arrow/arrow-schema-test.cc index 3d075617de259..43e57d8e87e1e 100644 --- a/cpp/src/parquet/arrow/arrow-schema-test.cc +++ b/cpp/src/parquet/arrow/arrow-schema-test.cc @@ -38,21 +38,19 @@ namespace parquet { namespace arrow { -const auto BOOL = std::make_shared<::arrow::BooleanType>(); -const auto UINT8 = std::make_shared<::arrow::UInt8Type>(); -const auto INT32 = std::make_shared<::arrow::Int32Type>(); -const auto INT64 = std::make_shared<::arrow::Int64Type>(); -const auto FLOAT = std::make_shared<::arrow::FloatType>(); -const auto DOUBLE = std::make_shared<::arrow::DoubleType>(); -const auto UTF8 = std::make_shared<::arrow::StringType>(); -const auto TIMESTAMP_MS = - std::make_shared<::arrow::TimestampType>(::arrow::TimestampType::Unit::MILLI); -const auto TIMESTAMP_NS = - std::make_shared<::arrow::TimestampType>(::arrow::TimestampType::Unit::NANO); +const auto BOOL = ::arrow::boolean(); +const auto UINT8 = ::arrow::uint8(); +const auto INT32 = ::arrow::int32(); +const auto INT64 = ::arrow::int64(); +const auto FLOAT = ::arrow::float32(); +const auto DOUBLE = ::arrow::float64(); +const auto UTF8 = ::arrow::utf8(); +const auto TIMESTAMP_MS = ::arrow::timestamp(::arrow::TimeUnit::MILLI); +const auto TIMESTAMP_NS = ::arrow::timestamp(::arrow::TimeUnit::NANO); + // TODO: This requires parquet-cpp implementing the MICROS enum value // const auto TIMESTAMP_US = std::make_shared(TimestampType::Unit::MICRO); -const auto BINARY = - std::make_shared<::arrow::ListType>(std::make_shared("", UINT8)); +const auto BINARY = ::arrow::binary(); const auto DECIMAL_8_4 = std::make_shared<::arrow::DecimalType>(8, 4); class TestConvertParquetSchema : public ::testing::Test { @@ -412,11 +410,14 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) { PrimitiveNode::Make("double", Repetition::OPTIONAL, ParquetType::DOUBLE)); arrow_fields.push_back(std::make_shared("double", DOUBLE)); - // TODO: String types need to be clarified a bit more in the Arrow spec parquet_fields.push_back(PrimitiveNode::Make( "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8)); arrow_fields.push_back(std::make_shared("string", UTF8)); + parquet_fields.push_back(PrimitiveNode::Make( + "binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::NONE)); + arrow_fields.push_back(std::make_shared("binary", BINARY)); + ASSERT_OK(ConvertSchema(arrow_fields)); CheckFlatSchema(parquet_fields); diff --git a/cpp/src/parquet/arrow/reader.cc b/cpp/src/parquet/arrow/reader.cc index 2efa8066f34ba..135867ccb0207 100644 --- a/cpp/src/parquet/arrow/reader.cc +++ b/cpp/src/parquet/arrow/reader.cc @@ -28,6 +28,7 @@ #include "parquet/arrow/utils.h" #include "arrow/api.h" +#include "arrow/type_traits.h" #include "arrow/util/bit-util.h" using arrow::Array; @@ -90,9 +91,13 @@ class FlatColumnReader::Impl { virtual ~Impl() {} Status NextBatch(int batch_size, std::shared_ptr* out); + template Status TypedReadBatch(int batch_size, std::shared_ptr* out); + template + Status ReadByteArrayBatch(int batch_size, std::shared_ptr* out); + template Status InitDataBuffer(int batch_size); template @@ -486,11 +491,13 @@ Status FlatColumnReader::Impl::TypedReadBatch<::arrow::BooleanType, BooleanType> } } -template <> -Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType>( +template +Status FlatColumnReader::Impl::ReadByteArrayBatch( int batch_size, std::shared_ptr* out) { + using BuilderType = typename ::arrow::TypeTraits::BuilderType; + int values_to_read = batch_size; - ::arrow::StringBuilder builder(pool_, field_->type); + BuilderType builder(pool_, field_->type); while ((values_to_read > 0) && column_reader_) { values_buffer_.Resize(values_to_read * sizeof(ByteArray)); if (descr_->max_definition_level() > 0) { @@ -528,6 +535,18 @@ Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType return builder.Finish(out); } +template <> +Status FlatColumnReader::Impl::TypedReadBatch<::arrow::BinaryType, ByteArrayType>( + int batch_size, std::shared_ptr* out) { + return ReadByteArrayBatch<::arrow::BinaryType>(batch_size, out); +} + +template <> +Status FlatColumnReader::Impl::TypedReadBatch<::arrow::StringType, ByteArrayType>( + int batch_size, std::shared_ptr* out) { + return ReadByteArrayBatch<::arrow::StringType>(batch_size, out); +} + #define TYPED_BATCH_CASE(ENUM, ArrowType, ParquetType) \ case ::arrow::Type::ENUM: \ return TypedReadBatch(batch_size, out); \ @@ -553,6 +572,7 @@ Status FlatColumnReader::Impl::NextBatch(int batch_size, std::shared_ptr* TYPED_BATCH_CASE(FLOAT, ::arrow::FloatType, FloatType) TYPED_BATCH_CASE(DOUBLE, ::arrow::DoubleType, DoubleType) TYPED_BATCH_CASE(STRING, ::arrow::StringType, ByteArrayType) + TYPED_BATCH_CASE(BINARY, ::arrow::BinaryType, ByteArrayType) case ::arrow::Type::TIMESTAMP: { ::arrow::TimestampType* timestamp_type = static_cast<::arrow::TimestampType*>(field_->type.get()); diff --git a/cpp/src/parquet/arrow/schema.cc b/cpp/src/parquet/arrow/schema.cc index e578ec2626609..3e5e7d960891a 100644 --- a/cpp/src/parquet/arrow/schema.cc +++ b/cpp/src/parquet/arrow/schema.cc @@ -44,24 +44,8 @@ namespace parquet { namespace arrow { -const auto BOOL = std::make_shared<::arrow::BooleanType>(); -const auto UINT8 = std::make_shared<::arrow::UInt8Type>(); -const auto INT8 = std::make_shared<::arrow::Int8Type>(); -const auto UINT16 = std::make_shared<::arrow::UInt16Type>(); -const auto INT16 = std::make_shared<::arrow::Int16Type>(); -const auto UINT32 = std::make_shared<::arrow::UInt32Type>(); -const auto INT32 = std::make_shared<::arrow::Int32Type>(); -const auto UINT64 = std::make_shared<::arrow::UInt64Type>(); -const auto INT64 = std::make_shared<::arrow::Int64Type>(); -const auto FLOAT = std::make_shared<::arrow::FloatType>(); -const auto DOUBLE = std::make_shared<::arrow::DoubleType>(); -const auto UTF8 = std::make_shared<::arrow::StringType>(); -const auto TIMESTAMP_MS = - std::make_shared<::arrow::TimestampType>(::arrow::TimestampType::Unit::MILLI); -const auto TIMESTAMP_NS = - std::make_shared<::arrow::TimestampType>(::arrow::TimestampType::Unit::NANO); -const auto BINARY = - std::make_shared<::arrow::ListType>(std::make_shared<::arrow::Field>("", UINT8)); +const auto TIMESTAMP_MS = ::arrow::timestamp(::arrow::TimeUnit::MILLI); +const auto TIMESTAMP_NS = ::arrow::timestamp(::arrow::TimeUnit::NANO); TypePtr MakeDecimalType(const PrimitiveNode* node) { int precision = node->decimal_metadata().precision; @@ -72,14 +56,14 @@ TypePtr MakeDecimalType(const PrimitiveNode* node) { static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) { switch (node->logical_type()) { case LogicalType::UTF8: - *out = UTF8; + *out = ::arrow::utf8(); break; case LogicalType::DECIMAL: *out = MakeDecimalType(node); break; default: // BINARY - *out = BINARY; + *out = ::arrow::binary(); break; } return Status::OK(); @@ -88,7 +72,7 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) { static Status FromFLBA(const PrimitiveNode* node, TypePtr* out) { switch (node->logical_type()) { case LogicalType::NONE: - *out = BINARY; + *out = ::arrow::binary(); break; case LogicalType::DECIMAL: *out = MakeDecimalType(node); @@ -104,22 +88,22 @@ static Status FromFLBA(const PrimitiveNode* node, TypePtr* out) { static Status FromInt32(const PrimitiveNode* node, TypePtr* out) { switch (node->logical_type()) { case LogicalType::NONE: - *out = INT32; + *out = ::arrow::int32(); break; case LogicalType::UINT_8: - *out = UINT8; + *out = ::arrow::uint8(); break; case LogicalType::INT_8: - *out = INT8; + *out = ::arrow::int8(); break; case LogicalType::UINT_16: - *out = UINT16; + *out = ::arrow::uint16(); break; case LogicalType::INT_16: - *out = INT16; + *out = ::arrow::int16(); break; case LogicalType::UINT_32: - *out = UINT32; + *out = ::arrow::uint32(); break; case LogicalType::DECIMAL: *out = MakeDecimalType(node); @@ -134,10 +118,10 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) { static Status FromInt64(const PrimitiveNode* node, TypePtr* out) { switch (node->logical_type()) { case LogicalType::NONE: - *out = INT64; + *out = ::arrow::int64(); break; case LogicalType::UINT_64: - *out = UINT64; + *out = ::arrow::uint64(); break; case LogicalType::DECIMAL: *out = MakeDecimalType(node); @@ -155,7 +139,7 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) { Status FromPrimitive(const PrimitiveNode* primitive, TypePtr* out) { switch (primitive->physical_type()) { case ParquetType::BOOLEAN: - *out = BOOL; + *out = ::arrow::boolean(); break; case ParquetType::INT32: RETURN_NOT_OK(FromInt32(primitive, out)); @@ -167,13 +151,12 @@ Status FromPrimitive(const PrimitiveNode* primitive, TypePtr* out) { *out = TIMESTAMP_NS; break; case ParquetType::FLOAT: - *out = FLOAT; + *out = ::arrow::float32(); break; case ParquetType::DOUBLE: - *out = DOUBLE; + *out = ::arrow::float64(); break; case ParquetType::BYTE_ARRAY: - // TODO: Do we have that type in Arrow? RETURN_NOT_OK(FromByteArray(primitive, out)); break; case ParquetType::FIXED_LEN_BYTE_ARRAY: @@ -211,13 +194,13 @@ Status NodeToList(const GroupNode* group, TypePtr* out) { // List of primitive type std::shared_ptr item_field; RETURN_NOT_OK(NodeToField(list_group->field(0), &item_field)); - *out = std::make_shared<::arrow::ListType>(item_field); + *out = ::arrow::list(item_field); } else { // List of struct std::shared_ptr<::arrow::DataType> inner_type; RETURN_NOT_OK(StructFromGroup(list_group, &inner_type)); auto item_field = std::make_shared(list_node->name(), inner_type, false); - *out = std::make_shared<::arrow::ListType>(item_field); + *out = ::arrow::list(item_field); } } else if (list_node->is_repeated()) { // repeated primitive node @@ -225,7 +208,7 @@ Status NodeToList(const GroupNode* group, TypePtr* out) { const PrimitiveNode* primitive = static_cast(list_node.get()); RETURN_NOT_OK(FromPrimitive(primitive, &inner_type)); auto item_field = std::make_shared(list_node->name(), inner_type, false); - *out = std::make_shared<::arrow::ListType>(item_field); + *out = ::arrow::list(item_field); } else { return Status::NotImplemented( "Non-repeated groups in a LIST-annotated group are not supported."); @@ -247,7 +230,7 @@ Status NodeToField(const NodePtr& node, std::shared_ptr* out) { const PrimitiveNode* primitive = static_cast(node.get()); RETURN_NOT_OK(FromPrimitive(primitive, &inner_type)); auto item_field = std::make_shared(node->name(), inner_type, false); - type = std::make_shared<::arrow::ListType>(item_field); + type = ::arrow::list(item_field); nullable = false; } else if (node->is_group()) { const GroupNode* group = static_cast(node.get()); @@ -423,5 +406,4 @@ Status ToParquetSchema(const ::arrow::Schema* arrow_schema, } } // namespace arrow - } // namespace parquet diff --git a/cpp/src/parquet/arrow/test-util.h b/cpp/src/parquet/arrow/test-util.h index bdb2bab0a61a4..f996c2c11779c 100644 --- a/cpp/src/parquet/arrow/test-util.h +++ b/cpp/src/parquet/arrow/test-util.h @@ -36,6 +36,9 @@ using is_arrow_int = std::is_integral; template using is_arrow_string = std::is_same; +template +using is_arrow_binary = std::is_same; + template using is_arrow_bool = std::is_same; @@ -62,10 +65,11 @@ typename std::enable_if::value, Status>::type NonNullArr } template -typename std::enable_if::value, Status>::type NonNullArray( - size_t size, std::shared_ptr* out) { - ::arrow::StringBuilder builder( - ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>()); +typename std::enable_if< + is_arrow_string::value || is_arrow_binary::value, Status>::type +NonNullArray(size_t size, std::shared_ptr* out) { + using BuilderType = typename ::arrow::TypeTraits::BuilderType; + BuilderType builder(::arrow::default_memory_pool(), std::make_shared()); for (size_t i = 0; i < size; i++) { builder.Append("test-string"); } @@ -121,16 +125,17 @@ typename std::enable_if::value, Status>::type NullableAr // This helper function only supports (size/2) nulls yet. template -typename std::enable_if::value, Status>::type NullableArray( - size_t size, size_t num_nulls, std::shared_ptr<::arrow::Array>* out) { +typename std::enable_if< + is_arrow_string::value || is_arrow_binary::value, Status>::type +NullableArray(size_t size, size_t num_nulls, std::shared_ptr<::arrow::Array>* out) { std::vector valid_bytes(size, 1); for (size_t i = 0; i < num_nulls; i++) { valid_bytes[i * 2] = 0; } - ::arrow::StringBuilder builder( - ::arrow::default_memory_pool(), std::make_shared<::arrow::StringType>()); + using BuilderType = typename ::arrow::TypeTraits::BuilderType; + BuilderType builder(::arrow::default_memory_pool(), std::make_shared()); for (size_t i = 0; i < size; i++) { builder.Append("test-string"); } diff --git a/cpp/src/parquet/arrow/utils.h b/cpp/src/parquet/arrow/utils.h index 015421bb915f9..9c2abfae879a1 100644 --- a/cpp/src/parquet/arrow/utils.h +++ b/cpp/src/parquet/arrow/utils.h @@ -26,11 +26,11 @@ namespace parquet { namespace arrow { -#define PARQUET_CATCH_NOT_OK(s) \ - try { \ - (s); \ - } catch (const ::parquet::ParquetException& e) { \ - return ::arrow::Status::IOError(e.what()); \ +#define PARQUET_CATCH_NOT_OK(s) \ + try { \ + (s); \ + } catch (const ::parquet::ParquetException& e) { \ + return ::arrow::Status::IOError(e.what()); \ } #define PARQUET_IGNORE_NOT_OK(s) \ diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index e588c844bb3c7..b7663a3a716c8 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -28,11 +28,11 @@ #include "arrow/api.h" +using arrow::BinaryArray; using arrow::MemoryPool; using arrow::PoolBuffer; using arrow::PrimitiveArray; using arrow::Status; -using arrow::StringArray; using arrow::Table; using parquet::ParquetFileWriter; @@ -82,7 +82,7 @@ class FileWriter::Impl { } Status WriteFlatColumnChunk(const PrimitiveArray* data, int64_t offset, int64_t length); - Status WriteFlatColumnChunk(const StringArray* data, int64_t offset, int64_t length); + Status WriteFlatColumnChunk(const BinaryArray* data, int64_t offset, int64_t length); Status Close(); virtual ~Impl() {} @@ -253,7 +253,7 @@ Status FileWriter::Impl::WriteFlatColumnChunk( } Status FileWriter::Impl::WriteFlatColumnChunk( - const StringArray* data, int64_t offset, int64_t length) { + const BinaryArray* data, int64_t offset, int64_t length) { ColumnWriter* column_writer; PARQUET_CATCH_NOT_OK(column_writer = row_group_writer_->NextColumn()); DCHECK((offset + length) <= data->length()); @@ -312,10 +312,11 @@ Status FileWriter::WriteFlatColumnChunk( const ::arrow::Array* array, int64_t offset, int64_t length) { int64_t real_length = length; if (length == -1) { real_length = array->length(); } - if (array->type_enum() == ::arrow::Type::STRING) { - auto string_array = dynamic_cast(array); - DCHECK(string_array); - return impl_->WriteFlatColumnChunk(string_array, offset, real_length); + if (array->type_enum() == ::arrow::Type::STRING || + array->type_enum() == ::arrow::Type::BINARY) { + auto binary_array = static_cast(array); + DCHECK(binary_array); + return impl_->WriteFlatColumnChunk(binary_array, offset, real_length); } else { auto primitive_array = dynamic_cast(array); if (!primitive_array) { diff --git a/cpp/src/parquet/column/column-writer-test.cc b/cpp/src/parquet/column/column-writer-test.cc index 5a65175c70a98..68d79d16d3a23 100644 --- a/cpp/src/parquet/column/column-writer-test.cc +++ b/cpp/src/parquet/column/column-writer-test.cc @@ -214,7 +214,8 @@ void TestPrimitiveWriter::ReadColumnFully(Compression::type compressio } typedef ::testing::Types TestTypes; + BooleanType, ByteArrayType, FLBAType> + TestTypes; TYPED_TEST_CASE(TestPrimitiveWriter, TestTypes); diff --git a/cpp/src/parquet/compression/brotli-codec.cc b/cpp/src/parquet/compression/brotli-codec.cc index 24ff230fa9e8a..81182068d92dc 100644 --- a/cpp/src/parquet/compression/brotli-codec.cc +++ b/cpp/src/parquet/compression/brotli-codec.cc @@ -15,10 +15,10 @@ // specific language governing permissions and limitations // under the License. -#include -#include #include #include +#include +#include #include "parquet/compression/codec.h" #include "parquet/exception.h" diff --git a/cpp/src/parquet/compression/codec.h b/cpp/src/parquet/compression/codec.h index e803a8c6d2b5c..abd4899654ae4 100644 --- a/cpp/src/parquet/compression/codec.h +++ b/cpp/src/parquet/compression/codec.h @@ -65,8 +65,8 @@ class BrotliCodec : public Codec { void Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, uint8_t* output_buffer) override; - int64_t Compress(int64_t input_len, const uint8_t* input, - int64_t output_buffer_len, uint8_t* output_buffer) override; + int64_t Compress(int64_t input_len, const uint8_t* input, int64_t output_buffer_len, + uint8_t* output_buffer) override; int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) override; diff --git a/cpp/src/parquet/schema/printer.cc b/cpp/src/parquet/schema/printer.cc index c4ab3e77ab850..ca11244350013 100644 --- a/cpp/src/parquet/schema/printer.cc +++ b/cpp/src/parquet/schema/printer.cc @@ -96,9 +96,8 @@ static void PrintType(const PrimitiveNode* node, std::ostream& stream) { static void PrintLogicalType(const PrimitiveNode* node, std::ostream& stream) { auto lt = node->logical_type(); if (lt == LogicalType::DECIMAL) { - stream << " (" << LogicalTypeToString(lt) << "(" << - node->decimal_metadata().precision << "," << - node->decimal_metadata().scale << "))"; + stream << " (" << LogicalTypeToString(lt) << "(" << node->decimal_metadata().precision + << "," << node->decimal_metadata().scale << "))"; } else if (lt != LogicalType::NONE) { stream << " (" << LogicalTypeToString(lt) << ")"; } @@ -120,10 +119,8 @@ void SchemaPrinter::Visit(const GroupNode* node) { PrintRepLevel(node->repetition(), stream_); stream_ << " group " << node->name(); auto lt = node->logical_type(); - if (lt != LogicalType::NONE) { - stream_ << " (" << LogicalTypeToString(lt) << ")"; - } - stream_ << " {" << std::endl; + if (lt != LogicalType::NONE) { stream_ << " (" << LogicalTypeToString(lt) << ")"; } + stream_ << " {" << std::endl; } indent_ += indent_width_; diff --git a/cpp/src/parquet/schema/schema-printer-test.cc b/cpp/src/parquet/schema/schema-printer-test.cc index e594f6feee6d4..29140f0d784a6 100644 --- a/cpp/src/parquet/schema/schema-printer-test.cc +++ b/cpp/src/parquet/schema/schema-printer-test.cc @@ -51,8 +51,8 @@ TEST(TestSchemaPrinter, Examples) { NodePtr bag(GroupNode::Make("bag", Repetition::OPTIONAL, {list})); fields.push_back(bag); - fields.push_back(PrimitiveNode::Make("c", Repetition::REQUIRED, Type::INT32, - LogicalType::DECIMAL, -1, 3, 2)); + fields.push_back(PrimitiveNode::Make( + "c", Repetition::REQUIRED, Type::INT32, LogicalType::DECIMAL, -1, 3, 2)); NodePtr schema = GroupNode::Make("schema", Repetition::REPEATED, fields);