diff --git a/cpp/src/parquet/schema/converter.cc b/cpp/src/parquet/schema/converter.cc index 08eeb66677b6f..3b18af3e60805 100644 --- a/cpp/src/parquet/schema/converter.cc +++ b/cpp/src/parquet/schema/converter.cc @@ -96,8 +96,6 @@ class SchemaVisitor : public Node::ConstVisitor { void Visit(const Node* node) override { format::SchemaElement element; node->ToParquet(&element); - // Override field_id here as we can get user-generated Nodes without a valid id - element.__set_field_id(elements_->size()); elements_->push_back(element); if (node->is_group()) { diff --git a/cpp/src/parquet/schema/test-util.h b/cpp/src/parquet/schema/test-util.h index ee9e2256d3e16..752b8f387d686 100644 --- a/cpp/src/parquet/schema/test-util.h +++ b/cpp/src/parquet/schema/test-util.h @@ -42,11 +42,6 @@ static inline SchemaElement NewPrimitive(const std::string& name, result.__set_repetition_type(repetition); result.__set_type(type); result.__set_num_children(0); - result.__set_field_id(id); - // Set default (non-set) values - result.__set_type_length(-1); - result.__set_precision(-1); - result.__set_scale(-1); return result; } @@ -57,7 +52,6 @@ static inline SchemaElement NewGroup(const std::string& name, result.__set_name(name); result.__set_repetition_type(repetition); result.__set_num_children(num_children); - result.__set_field_id(id); return result; } diff --git a/cpp/src/parquet/schema/types.cc b/cpp/src/parquet/schema/types.cc index 043ef00408726..7d452c32c9310 100644 --- a/cpp/src/parquet/schema/types.cc +++ b/cpp/src/parquet/schema/types.cc @@ -86,16 +86,16 @@ PrimitiveNode::PrimitiveNode(const std::string& name, Repetition::type repetitio physical_type_(type), type_length_(length) { std::stringstream ss; - decimal_metadata_.isset = false; + + // PARQUET-842: In an earlier revision, decimal_metadata_.isset was being + // set to true, but Impala will raise an incompatible metadata in such cases + memset(&decimal_metadata_, 0, sizeof(decimal_metadata_)); + // Check if the physical and logical types match // Mapping referred from Apache parquet-mr as on 2016-02-22 switch (logical_type) { case LogicalType::NONE: // Logical type not set - // Clients should be able to read these values - decimal_metadata_.isset = true; - decimal_metadata_.precision = precision; - decimal_metadata_.scale = scale; break; case LogicalType::UTF8: case LogicalType::JSON: @@ -289,7 +289,6 @@ void GroupNode::ToParquet(void* opaque_element) const { if (logical_type_ != LogicalType::NONE) { element->__set_converted_type(ToThrift(logical_type_)); } - // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_); } void PrimitiveNode::ToParquet(void* opaque_element) const { @@ -302,8 +301,9 @@ void PrimitiveNode::ToParquet(void* opaque_element) const { element->__set_converted_type(ToThrift(logical_type_)); } element->__set_type(ToThrift(physical_type_)); - // FIXME: SchemaFlattener does this for us: element->__set_field_id(id_); - element->__set_type_length(type_length_); + if (physical_type_ == Type::FIXED_LEN_BYTE_ARRAY) { + element->__set_type_length(type_length_); + } if (decimal_metadata_.isset) { element->__set_precision(decimal_metadata_.precision); element->__set_scale(decimal_metadata_.scale);