diff --git a/python/deltalake/table.py b/python/deltalake/table.py index 5cd0d252cf..c9a4600c52 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -162,6 +162,7 @@ def __init__( ] ] = None, compression_level: Optional[int] = None, + statistics_truncate_length: Optional[int] = None, ): """Create a Writer Properties instance for the Rust parquet writer: @@ -176,6 +177,7 @@ def __init__( GZIP: levels (1-9), BROTLI: levels (1-11), ZSTD: levels (1-22), + statistics_truncate_length: maximum length of truncated min/max values in statistics. """ self.data_page_size_limit = data_page_size_limit self.dictionary_page_size_limit = dictionary_page_size_limit @@ -183,6 +185,7 @@ def __init__( self.write_batch_size = write_batch_size self.max_row_group_size = max_row_group_size self.compression = None + self.statistics_truncate_length = statistics_truncate_length if compression_level is not None and compression is None: raise ValueError( @@ -211,7 +214,7 @@ def __str__(self) -> str: return ( f"WriterProperties(data_page_size_limit: {self.data_page_size_limit}, dictionary_page_size_limit: {self.dictionary_page_size_limit}, " f"data_page_row_count_limit: {self.data_page_row_count_limit}, write_batch_size: {self.write_batch_size}, " - f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression})" + f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression}, statistics_truncate_length: {self.statistics_truncate_length})" ) def _to_dict(self) -> Dict[str, Optional[str]]: diff --git a/python/src/lib.rs b/python/src/lib.rs index 3947216285..9b32a465fd 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -1362,6 +1362,7 @@ fn set_writer_properties( let write_batch_size = writer_properties.get("write_batch_size"); let max_row_group_size = writer_properties.get("max_row_group_size"); let compression = writer_properties.get("compression"); + let statistics_truncate_length = writer_properties.get("statistics_truncate_length"); if let Some(Some(data_page_size)) = data_page_size_limit { properties = properties.set_data_page_size_limit(data_page_size.parse::().unwrap()); @@ -1380,6 +1381,9 @@ fn set_writer_properties( if let Some(Some(row_group_size)) = max_row_group_size { properties = properties.set_max_row_group_size(row_group_size.parse::().unwrap()); } + if let Some(Some(statistics_truncate_length)) = statistics_truncate_length { + properties = properties.set_statistics_truncate_length(statistics_truncate_length.parse::().ok()); + } if let Some(Some(compression)) = compression { let compress: Compression = compression diff --git a/python/tests/test_writerproperties.py b/python/tests/test_writerproperties.py index 63f12879e6..89f8e02690 100644 --- a/python/tests/test_writerproperties.py +++ b/python/tests/test_writerproperties.py @@ -15,6 +15,7 @@ def test_writer_properties_all_filled(): write_batch_size=400, max_row_group_size=500, compression="SNAPPY", + statistics_truncate_length=600, ) expected = { @@ -24,6 +25,7 @@ def test_writer_properties_all_filled(): "write_batch_size": "400", "max_row_group_size": "500", "compression": "SNAPPY", + "statistics_truncate_length": "600", } assert wp._to_dict() == expected @@ -39,6 +41,7 @@ def test_writer_properties_lower_case_compression(): "write_batch_size": None, "max_row_group_size": None, "compression": "SNAPPY", + "statistics_truncate_length": None, } assert wp._to_dict() == expected