chore: update Python and docs

apache · Apr 17, 2023 · 564000c · 564000c
1 parent e75a5c3
commit 564000c
Show file tree

Hide file tree

Showing 6 changed files with 17 additions and 19 deletions.
diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h
@@ -877,13 +877,13 @@ class PARQUET_EXPORT ArrowWriterProperties {
     /// list types (default "item"), will use "entries", as is specified in
     /// the Parquet spec.
     ///
-    /// This is disabled by default, but will be enabled by default in future.
+    /// This is enabled by default.
     Builder* enable_compliant_nested_types() {
       compliant_nested_types_ = true;
       return this;
     }
 
-    /// Preserve Arrow list field name (default behavior).
+    /// Preserve Arrow list field name.
     Builder* disable_compliant_nested_types() {
       compliant_nested_types_ = false;
       return this;

diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst
@@ -317,7 +317,6 @@ There are also Arrow-specific settings that can be configured with
    std::shared_ptr<ArrowWriterProperties> arrow_props = ArrowWriterProperties::Builder()
       .enable_deprecated_int96_timestamps() // default False
       ->store_schema() // default False
-      ->enable_compliant_nested_types() // default False
       ->build();
 
 These options mostly dictate how Arrow types are converted to Parquet types.

diff --git a/python/pyarrow/_dataset_parquet.pyx b/python/pyarrow/_dataset_parquet.pyx
@@ -596,7 +596,7 @@ cdef class ParquetFileWriteOptions(FileWriteOptions):
             use_deprecated_int96_timestamps=False,
             coerce_timestamps=None,
             allow_truncated_timestamps=False,
-            use_compliant_nested_type=False,
+            use_compliant_nested_type=True,
         )
         self._set_properties()
         self._set_arrow_properties()

diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx
@@ -1607,7 +1607,7 @@ cdef shared_ptr[ArrowWriterProperties] _create_arrow_writer_properties(
         coerce_timestamps=None,
         allow_truncated_timestamps=False,
         writer_engine_version=None,
-        use_compliant_nested_type=False,
+        use_compliant_nested_type=True,
         store_schema=True) except *:
     """Arrow writer properties"""
     cdef:
@@ -1704,7 +1704,7 @@ cdef class ParquetWriter(_Weakrefable):
                   column_encoding=None,
                   writer_engine_version=None,
                   data_page_version=None,
-                  use_compliant_nested_type=False,
+                  use_compliant_nested_type=True,
                   encryption_properties=None,
                   write_batch_size=None,
                   dictionary_pagesize_limit=None,

diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py
@@ -822,7 +822,7 @@ def _sanitize_table(table, new_schema, flavor):
     The serialized Parquet data page format version to write, defaults to
     1.0. This does not impact the file schema logical types and Arrow to
     Parquet type casting behavior; for that use the "version" option.
-use_compliant_nested_type : bool, default False
+use_compliant_nested_type : bool, default True
     Whether to write compliant Parquet nested type (lists) as defined
     `here <https://github.com/apache/parquet-format/blob/master/
     LogicalTypes.md#nested-types>`_, defaults to ``False``.
@@ -954,7 +954,7 @@ def __init__(self, where, schema, filesystem=None,
                  column_encoding=None,
                  writer_engine_version=None,
                  data_page_version='1.0',
-                 use_compliant_nested_type=False,
+                 use_compliant_nested_type=True,
                  encryption_properties=None,
                  write_batch_size=None,
                  dictionary_pagesize_limit=None,
@@ -3072,7 +3072,7 @@ def write_table(table, where, row_group_size=None, version='2.4',
                 use_byte_stream_split=False,
                 column_encoding=None,
                 data_page_version='1.0',
-                use_compliant_nested_type=False,
+                use_compliant_nested_type=True,
                 encryption_properties=None,
                 write_batch_size=None,
                 dictionary_pagesize_limit=None,

diff --git a/python/pyarrow/tests/parquet/test_compliant_nested_type.py b/python/pyarrow/tests/parquet/test_compliant_nested_type.py
@@ -64,17 +64,15 @@ def test_write_compliant_nested_type_enable(tempdir,
                                             use_legacy_dataset, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
-    # verify that we can read/write pandas df with new flag
+    # verify that we can read/write pandas df with new flag (default behaviour)
     _roundtrip_pandas_dataframe(df,
-                                write_kwargs={
-                                    'use_compliant_nested_type': True},
+                                write_kwargs={},
                                 use_legacy_dataset=use_legacy_dataset)
 
     # Write to a parquet file with compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
     path = str(tempdir / 'data.parquet')
     with pq.ParquetWriter(path, table.schema,
-                          use_compliant_nested_type=True,
                           version='2.6') as writer:
         writer.write_table(table)
     # Read back as a table
@@ -86,8 +84,7 @@ def test_write_compliant_nested_type_enable(tempdir,
 
     # Verify that the new table can be read/written correctly
     _check_roundtrip(new_table,
-                     use_legacy_dataset=use_legacy_dataset,
-                     use_compliant_nested_type=True)
+                     use_legacy_dataset=use_legacy_dataset)
 
 
 @pytest.mark.pandas
@@ -97,14 +94,16 @@ def test_write_compliant_nested_type_disable(tempdir,
                                              use_legacy_dataset, test_data):
     # prepare dataframe for testing
     df = pd.DataFrame(data=test_data)
-    # verify that we can read/write with new flag disabled (default behaviour)
-    _roundtrip_pandas_dataframe(df, write_kwargs={},
-                                use_legacy_dataset=use_legacy_dataset)
+    # verify that we can read/write with new flag disabled
+    _roundtrip_pandas_dataframe(df, write_kwargs={
+        'use_compliant_nested_type': False},
+        use_legacy_dataset=use_legacy_dataset)
 
     # Write to a parquet file while disabling compliant nested type
     table = pa.Table.from_pandas(df, preserve_index=False)
     path = str(tempdir / 'data.parquet')
-    with pq.ParquetWriter(path, table.schema, version='2.6') as writer:
+    with pq.ParquetWriter(path, table.schema, version='2.6',
+                          use_compliant_nested_type=False) as writer:
         writer.write_table(table)
     new_table = _read_table(path)