From 42ed0eae447df098084e71e4ca42e8a86d3a4926 Mon Sep 17 00:00:00 2001 From: "Uwe L. Korn" Date: Sun, 1 May 2016 11:02:11 +0200 Subject: [PATCH] Add struct conversion --- cpp/src/arrow/parquet/schema.cc | 25 ++++++++++++++++++++----- cpp/src/arrow/parquet/schema.h | 2 ++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/cpp/src/arrow/parquet/schema.cc b/cpp/src/arrow/parquet/schema.cc index f921b05599955..f807029ec7ee7 100644 --- a/cpp/src/arrow/parquet/schema.cc +++ b/cpp/src/arrow/parquet/schema.cc @@ -184,6 +184,20 @@ Status FromParquetSchema( return Status::OK(); } +Status StructToNode(const std::shared_ptr& type, const std::string& name, + bool nullable, NodePtr* out) { + Repetition::type repetition = Repetition::REQUIRED; + if (nullable) { repetition = Repetition::OPTIONAL; } + + std::vector children(type->num_children()); + for (int i = 0; i < type->num_children(); i++) { + RETURN_NOT_OK(FieldToNode(type->child(i), &children[i])); + } + + *out = GroupNode::Make(name, repetition, children); + return Status::OK(); +} + Status FieldToNode(const std::shared_ptr& field, NodePtr* out) { LogicalType::type logical_type = LogicalType::NONE; ParquetType::type type; @@ -263,14 +277,15 @@ Status FieldToNode(const std::shared_ptr& field, NodePtr* out) { case Type::TIME: type = ParquetType::INT64; logical_type = LogicalType::TIME_MILLIS; - // Precision- and scale-based decimal type. Storage type depends on the - // parameters. - // DECIMAL = 20, + break; + case Type::STRUCT: { + auto struct_type = std::static_pointer_cast(field->type); + return StructToNode(struct_type, field->name, field->nullable, out); + } break; default: - // TODO: LIST, STRUCT, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL_TEXT, VARCHAR + // TODO: LIST, DENSE_UNION, SPARE_UNION, JSON_SCALAR, DECIMAL, DECIMAL_TEXT, VARCHAR return Status::NotImplemented("unhandled type"); } - // TODO: handle repeated *out = PrimitiveNode::Make(field->name, repetition, type, logical_type, length); return Status::OK(); } diff --git a/cpp/src/arrow/parquet/schema.h b/cpp/src/arrow/parquet/schema.h index 12a240364c928..bfc7d21138154 100644 --- a/cpp/src/arrow/parquet/schema.h +++ b/cpp/src/arrow/parquet/schema.h @@ -36,6 +36,8 @@ Status NodeToField(const ::parquet::schema::NodePtr& node, std::shared_ptr* out); +Status FieldToNode(const std::shared_ptr& field, ::parquet::schema::NodePtr* out); + Status ToParquetSchema( const Schema* arrow_schema, std::shared_ptr<::parquet::SchemaDescriptor>* out);