diff --git a/python/deltalake/table.py b/python/deltalake/table.py index d4e4dd192e..f8357c3700 100644 --- a/python/deltalake/table.py +++ b/python/deltalake/table.py @@ -217,22 +217,25 @@ class ColumnProperties: def __init__( self, dictionary_enabled: Optional[bool] = None, - max_statistics_size: Optional[int] = None, + statistics_enabled: Optional[Literal["NONE", "CHUNK", "PAGE"]] = None, bloom_filter_properties: Optional[BloomFilterProperties] = None, ): """Create a Column Properties instance for the Rust parquet writer: Args: dictionary_enabled: Enable dictionary encoding for the column. - max_statistics_size: Maximum size of statistics for the column. + statistics_enabled: Statistics level for the column. bloom_filter_properties: Bloom Filter Properties for the column. """ self.dictionary_enabled = dictionary_enabled - self.max_statistics_size = max_statistics_size + self.statistics_enabled = statistics_enabled self.bloom_filter_properties = bloom_filter_properties def __str__(self) -> str: - return f"dictionary_enabled: {self.dictionary_enabled}, max_statistics_size: {self.max_statistics_size}, bloom_filter_properties: {self.bloom_filter_properties}" + return ( + f"dictionary_enabled: {self.dictionary_enabled}, statistics_enabled: {self.statistics_enabled}, " + f"bloom_filter_properties: {self.bloom_filter_properties}" + ) @dataclass(init=True) diff --git a/python/src/lib.rs b/python/src/lib.rs index 8ea08158e8..b91874616d 100644 --- a/python/src/lib.rs +++ b/python/src/lib.rs @@ -52,7 +52,7 @@ use deltalake::operations::vacuum::VacuumBuilder; use deltalake::operations::{collect_sendable_stream, CustomExecuteHandler}; use deltalake::parquet::basic::Compression; use deltalake::parquet::errors::ParquetError; -use deltalake::parquet::file::properties::WriterProperties; +use deltalake::parquet::file::properties::{EnabledStatistics, WriterProperties}; use deltalake::partitions::PartitionFilter; use deltalake::protocol::{DeltaOperation, SaveMode}; use deltalake::storage::{IORuntime, ObjectStoreRef}; @@ -1566,8 +1566,12 @@ fn set_writer_properties(writer_properties: PyWriterProperties) -> DeltaResult DeltaResult, - pub max_statistics_size: Option, + pub statistics_enabled: Option, pub bloom_filter_properties: Option, } diff --git a/python/tests/test_writerproperties.py b/python/tests/test_writerproperties.py index 30c25548ad..822ad82c02 100644 --- a/python/tests/test_writerproperties.py +++ b/python/tests/test_writerproperties.py @@ -28,14 +28,14 @@ def test_writer_properties_all_filled(): column_properties={ "a": ColumnProperties( dictionary_enabled=True, - max_statistics_size=40, + statistics_enabled="CHUNK", bloom_filter_properties=BloomFilterProperties( set_bloom_filter_enabled=True, fpp=0.2, ndv=30 ), ), "b": ColumnProperties( dictionary_enabled=True, - max_statistics_size=400, + statistics_enabled="PAGE", bloom_filter_properties=BloomFilterProperties( set_bloom_filter_enabled=False, fpp=0.2, ndv=30 ),