From 2d8627cd81f83783b0ceb01d137a46b581ecba26 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Mon, 28 Mar 2016 10:49:08 -0700 Subject: [PATCH] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion See also: https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md#decimal Author: Uwe L. Korn Closes #48 from xhochy/arrow-87 and squashes the following commits: 05ca3be [Uwe L. Korn] Use parquet:: namespace instead of parquet_cpp 6bafc5f [Uwe L. Korn] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion --- cpp/src/arrow/parquet/parquet-schema-test.cc | 36 ++++++++++++++++++++ cpp/src/arrow/parquet/schema.cc | 9 +++++ 2 files changed, 45 insertions(+) diff --git a/cpp/src/arrow/parquet/parquet-schema-test.cc b/cpp/src/arrow/parquet/parquet-schema-test.cc index 02a8caf03c9bd..a289ddbfde6eb 100644 --- a/cpp/src/arrow/parquet/parquet-schema-test.cc +++ b/cpp/src/arrow/parquet/parquet-schema-test.cc @@ -22,6 +22,7 @@ #include "arrow/test-util.h" #include "arrow/type.h" +#include "arrow/types/decimal.h" #include "arrow/util/status.h" #include "arrow/parquet/schema.h" @@ -46,6 +47,7 @@ const auto DOUBLE = std::make_shared(); const auto UTF8 = std::make_shared(); const auto BINARY = std::make_shared( std::make_shared("", UINT8)); +const auto DECIMAL_8_4 = std::make_shared(8, 4); class TestConvertParquetSchema : public ::testing::Test { public: @@ -119,6 +121,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) { CheckFlatSchema(arrow_schema); } +TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) { + std::vector parquet_fields; + std::vector> arrow_fields; + + parquet_fields.push_back( + PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL, + ParquetType::FIXED_LEN_BYTE_ARRAY, + LogicalType::DECIMAL, 4, 8, 4)); + arrow_fields.push_back(std::make_shared("flba-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL, + ParquetType::BYTE_ARRAY, + LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("binary-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL, + ParquetType::INT32, + LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("int32-decimal", DECIMAL_8_4)); + + parquet_fields.push_back( + PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL, + ParquetType::INT64, + LogicalType::DECIMAL, -1, 8, 4)); + arrow_fields.push_back(std::make_shared("int64-decimal", DECIMAL_8_4)); + + auto arrow_schema = std::make_shared(arrow_fields); + ASSERT_OK(ConvertSchema(parquet_fields)); + + CheckFlatSchema(arrow_schema); +} + TEST_F(TestConvertParquetSchema, UnsupportedThings) { std::vector unsupported_nodes; diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc index d8eb2addb0ada..14f4f5be53ce9 100644 --- a/cpp/src/arrow/parquet/schema.cc +++ b/cpp/src/arrow/parquet/schema.cc @@ -57,6 +57,9 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) { case LogicalType::UTF8: *out = UTF8; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: // BINARY *out = BINARY; @@ -86,6 +89,9 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) { case LogicalType::NONE: *out = INT32; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: return Status::NotImplemented("Unhandled logical type for int32"); break; @@ -98,6 +104,9 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) { case LogicalType::NONE: *out = INT64; break; + case LogicalType::DECIMAL: + *out = MakeDecimalType(node); + break; default: return Status::NotImplemented("Unhandled logical type for int64"); break;