Skip to content

Commit

Permalink
ARROW-87: [C++] Add all four possible ways to encode Decimals in Parq…
Browse files Browse the repository at this point in the history
…uet to schema conversion

See also: https://github.com/Parquet/parquet-format/blob/master/LogicalTypes.md#decimal

Author: Uwe L. Korn <uwelk@xhochy.com>

Closes #48 from xhochy/arrow-87 and squashes the following commits:

05ca3be [Uwe L. Korn] Use parquet:: namespace instead of parquet_cpp
6bafc5f [Uwe L. Korn] ARROW-87: [C++] Add all four possible ways to encode Decimals in Parquet to schema conversion
  • Loading branch information
xhochy authored and wesm committed Mar 28, 2016
1 parent 38897ee commit 2d8627c
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 0 deletions.
36 changes: 36 additions & 0 deletions cpp/src/arrow/parquet/parquet-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

#include "arrow/test-util.h"
#include "arrow/type.h"
#include "arrow/types/decimal.h"
#include "arrow/util/status.h"

#include "arrow/parquet/schema.h"
Expand All @@ -46,6 +47,7 @@ const auto DOUBLE = std::make_shared<DoubleType>();
const auto UTF8 = std::make_shared<StringType>();
const auto BINARY = std::make_shared<ListType>(
std::make_shared<Field>("", UINT8));
const auto DECIMAL_8_4 = std::make_shared<DecimalType>(8, 4);

class TestConvertParquetSchema : public ::testing::Test {
public:
Expand Down Expand Up @@ -119,6 +121,40 @@ TEST_F(TestConvertParquetSchema, ParquetFlatPrimitives) {
CheckFlatSchema(arrow_schema);
}

TEST_F(TestConvertParquetSchema, ParquetFlatDecimals) {
std::vector<NodePtr> parquet_fields;
std::vector<std::shared_ptr<Field>> arrow_fields;

parquet_fields.push_back(
PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
ParquetType::FIXED_LEN_BYTE_ARRAY,
LogicalType::DECIMAL, 4, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));

parquet_fields.push_back(
PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
ParquetType::BYTE_ARRAY,
LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));

parquet_fields.push_back(
PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
ParquetType::INT32,
LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));

parquet_fields.push_back(
PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
ParquetType::INT64,
LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));

auto arrow_schema = std::make_shared<Schema>(arrow_fields);
ASSERT_OK(ConvertSchema(parquet_fields));

CheckFlatSchema(arrow_schema);
}

TEST_F(TestConvertParquetSchema, UnsupportedThings) {
std::vector<NodePtr> unsupported_nodes;

Expand Down
9 changes: 9 additions & 0 deletions cpp/src/arrow/parquet/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ static Status FromByteArray(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::UTF8:
*out = UTF8;
break;
case LogicalType::DECIMAL:
*out = MakeDecimalType(node);
break;
default:
// BINARY
*out = BINARY;
Expand Down Expand Up @@ -86,6 +89,9 @@ static Status FromInt32(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::NONE:
*out = INT32;
break;
case LogicalType::DECIMAL:
*out = MakeDecimalType(node);
break;
default:
return Status::NotImplemented("Unhandled logical type for int32");
break;
Expand All @@ -98,6 +104,9 @@ static Status FromInt64(const PrimitiveNode* node, TypePtr* out) {
case LogicalType::NONE:
*out = INT64;
break;
case LogicalType::DECIMAL:
*out = MakeDecimalType(node);
break;
default:
return Status::NotImplemented("Unhandled logical type for int64");
break;
Expand Down

0 comments on commit 2d8627c

Please sign in to comment.