Skip to content

Commit

Permalink
Add more types
Browse files Browse the repository at this point in the history
  • Loading branch information
xhochy committed May 1, 2016
1 parent 8a0293e commit 9a6c876
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 57 deletions.
35 changes: 5 additions & 30 deletions cpp/src/arrow/parquet/parquet-schema-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ class TestConvertArrowSchema : public ::testing::Test {
const GroupNode* result_schema_node =
static_cast<const GroupNode*>(result_schema_->schema().get());

ASSERT_EQ(expected_schema_node->field_count(),
result_schema_node->field_count());
ASSERT_EQ(expected_schema_node->field_count(), result_schema_node->field_count());

for (int i = 0; i < expected_schema_node->field_count(); i++) {
auto lhs = result_schema_node->field(i);
Expand Down Expand Up @@ -217,18 +216,9 @@ TEST_F(TestConvertArrowSchema, ParquetFlatPrimitives) {
arrow_fields.push_back(std::make_shared<Field>("double", DOUBLE));

// TODO: String types need to be clarified a bit more in the Arrow spec
// parquet_fields.push_back(PrimitiveNode::Make(
// "string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8));
// arrow_fields.push_back(std::make_shared<Field>("string", UTF8));

// TODO: At the moment we have not enough information in the BINARY type in Arrow
// parquet_fields.push_back(
// PrimitiveNode::Make("binary", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY));
// arrow_fields.push_back(std::make_shared<Field>("binary", BINARY));

// parquet_fields.push_back(PrimitiveNode::Make("flba-binary", Repetition::OPTIONAL,
// ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::NONE, 12));
// arrow_fields.push_back(std::make_shared<Field>("flba-binary", BINARY));
parquet_fields.push_back(PrimitiveNode::Make(
"string", Repetition::OPTIONAL, ParquetType::BYTE_ARRAY, LogicalType::UTF8));
arrow_fields.push_back(std::make_shared<Field>("string", UTF8));

ASSERT_OK(ConvertSchema(arrow_fields));

Expand All @@ -239,23 +229,8 @@ TEST_F(TestConvertArrowSchema, ParquetFlatDecimals) {
std::vector<NodePtr> parquet_fields;
std::vector<std::shared_ptr<Field>> arrow_fields;

/*parquet_fields.push_back(PrimitiveNode::Make("flba-decimal", Repetition::OPTIONAL,
ParquetType::FIXED_LEN_BYTE_ARRAY, LogicalType::DECIMAL, 4, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("flba-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("binary-decimal", Repetition::OPTIONAL,
ParquetType::BYTE_ARRAY, LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("binary-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int32-decimal", Repetition::OPTIONAL,
ParquetType::INT32, LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int32-decimal", DECIMAL_8_4));
parquet_fields.push_back(PrimitiveNode::Make("int64-decimal", Repetition::OPTIONAL,
ParquetType::INT64, LogicalType::DECIMAL, -1, 8, 4));
arrow_fields.push_back(std::make_shared<Field>("int64-decimal", DECIMAL_8_4));
// TODO: Test Decimal Arrow -> Parquet conversion

CheckFlatSchema(arrow_schema);*/
ASSERT_OK(ConvertSchema(arrow_fields));

CheckFlatSchema(parquet_fields);
Expand Down
62 changes: 35 additions & 27 deletions cpp/src/arrow/parquet/schema.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "parquet/api/schema.h"

#include "arrow/types/decimal.h"
#include "arrow/types/string.h"
#include "arrow/util/status.h"

using parquet::Repetition;
Expand Down Expand Up @@ -187,9 +188,8 @@ Status FieldToNode(const std::shared_ptr<Field>& field, NodePtr* out) {
LogicalType::type logical_type = LogicalType::NONE;
ParquetType::type type;
Repetition::type repetition = Repetition::REQUIRED;
if (field->nullable) {
repetition = Repetition::OPTIONAL;
}
if (field->nullable) { repetition = Repetition::OPTIONAL; }
int length = -1;

switch (field->type->type) {
// TODO:
Expand Down Expand Up @@ -234,35 +234,44 @@ Status FieldToNode(const std::shared_ptr<Field>& field, NodePtr* out) {
case Type::DOUBLE:
type = ParquetType::DOUBLE;
break;
// // CHAR(N): fixed-length UTF8 string with length N
// CHAR = 12,
// // UTF8 variable-length string as List<Char>
// STRING = 13,
// // VARCHAR(N): Null-terminated string type embedded in a CHAR(N + 1)
// VARCHAR = 14,

// // Variable-length bytes (no guarantee of UTF8-ness)
// BINARY = 15,
// By default, int32 days since the UNIX epoch
// DATE = 16,
// Exact timestamp encoded with int64 since UNIX epoch
// Default unit millisecond
// TIMESTAMP = 17,
// Timestamp as double seconds since the UNIX epoch
// TIMESTAMP_DOUBLE = 18,
// Exact time encoded with int64, default unit millisecond
// TIME = 19,
case Type::CHAR:
type = ParquetType::FIXED_LEN_BYTE_ARRAY;
logical_type = LogicalType::UTF8;
length = static_cast<CharType*>(field->type.get())->size;
break;
case Type::STRING:
type = ParquetType::BYTE_ARRAY;
logical_type = LogicalType::UTF8;
break;
case Type::BINARY:
type = ParquetType::BYTE_ARRAY;
break;
case Type::DATE:
type = ParquetType::INT32;
logical_type = LogicalType::DATE;
break;
case Type::TIMESTAMP:
type = ParquetType::INT64;
logical_type = LogicalType::TIMESTAMP_MILLIS;
break;
case Type::TIMESTAMP_DOUBLE:
type = ParquetType::INT64;
// This is specified as seconds since the UNIX epoch
// TODO: Converted type in Parquet?
// logical_type = LogicalType::TIMESTAMP_MILLIS;
break;
case Type::TIME:
type = ParquetType::INT64;
logical_type = LogicalType::TIME_MILLIS;
// Precision- and scale-based decimal type. Storage type depends on the
// parameters.
// DECIMAL = 20,
// Decimal value encoded as a text string
// DECIMAL_TEXT = 21,
default:
// TODO: LIST, STRUCT, DENSE_UNION, SPARE_UNION, JSON_SCALAR
// TODO: LIST, STRUCT, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR
return Status::NotImplemented("unhandled type");
}
// TODO: handle required, repeated
*out = PrimitiveNode::Make(field->name, repetition, type, logical_type);
// TODO: handle repeated
*out = PrimitiveNode::Make(field->name, repetition, type, logical_type, length);
return Status::OK();
}

Expand All @@ -280,7 +289,6 @@ Status ToParquetSchema(
return Status::OK();
}


} // namespace parquet

} // namespace arrow

0 comments on commit 9a6c876

Please sign in to comment.