Skip to content

Commit

Permalink
added statistics_truncate_length
Browse files Browse the repository at this point in the history
  • Loading branch information
sherlockbeard committed Aug 16, 2024
1 parent 597a5ae commit d979e78
Show file tree
Hide file tree
Showing 3 changed files with 11 additions and 1 deletion.
5 changes: 4 additions & 1 deletion python/deltalake/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ def __init__(
]
] = None,
compression_level: Optional[int] = None,
statistics_truncate_length: Optional[int] = None,
):
"""Create a Writer Properties instance for the Rust parquet writer:
Expand All @@ -176,13 +177,15 @@ def __init__(
GZIP: levels (1-9),
BROTLI: levels (1-11),
ZSTD: levels (1-22),
statistics_truncate_length: maximum length of truncated min/max values in statistics.
"""
self.data_page_size_limit = data_page_size_limit
self.dictionary_page_size_limit = dictionary_page_size_limit
self.data_page_row_count_limit = data_page_row_count_limit
self.write_batch_size = write_batch_size
self.max_row_group_size = max_row_group_size
self.compression = None
self.statistics_truncate_length = statistics_truncate_length

if compression_level is not None and compression is None:
raise ValueError(
Expand Down Expand Up @@ -211,7 +214,7 @@ def __str__(self) -> str:
return (
f"WriterProperties(data_page_size_limit: {self.data_page_size_limit}, dictionary_page_size_limit: {self.dictionary_page_size_limit}, "
f"data_page_row_count_limit: {self.data_page_row_count_limit}, write_batch_size: {self.write_batch_size}, "
f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression})"
f"max_row_group_size: {self.max_row_group_size}, compression: {self.compression}, statistics_truncate_length: {self.statistics_truncate_length})"
)

def _to_dict(self) -> Dict[str, Optional[str]]:
Expand Down
4 changes: 4 additions & 0 deletions python/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1362,6 +1362,7 @@ fn set_writer_properties(
let write_batch_size = writer_properties.get("write_batch_size");
let max_row_group_size = writer_properties.get("max_row_group_size");
let compression = writer_properties.get("compression");
let statistics_truncate_length = writer_properties.get("statistics_truncate_length");

if let Some(Some(data_page_size)) = data_page_size_limit {
properties = properties.set_data_page_size_limit(data_page_size.parse::<usize>().unwrap());
Expand All @@ -1380,6 +1381,9 @@ fn set_writer_properties(
if let Some(Some(row_group_size)) = max_row_group_size {
properties = properties.set_max_row_group_size(row_group_size.parse::<usize>().unwrap());
}
if let Some(Some(statistics_truncate_length)) = statistics_truncate_length {
properties = properties.set_statistics_truncate_length(statistics_truncate_length.parse::<usize>().ok());
}

if let Some(Some(compression)) = compression {
let compress: Compression = compression
Expand Down
3 changes: 3 additions & 0 deletions python/tests/test_writerproperties.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def test_writer_properties_all_filled():
write_batch_size=400,
max_row_group_size=500,
compression="SNAPPY",
statistics_truncate_length=600,
)

expected = {
Expand All @@ -24,6 +25,7 @@ def test_writer_properties_all_filled():
"write_batch_size": "400",
"max_row_group_size": "500",
"compression": "SNAPPY",
"statistics_truncate_length": "600",
}

assert wp._to_dict() == expected
Expand All @@ -39,6 +41,7 @@ def test_writer_properties_lower_case_compression():
"write_batch_size": None,
"max_row_group_size": None,
"compression": "SNAPPY",
"statistics_truncate_length": None,
}

assert wp._to_dict() == expected
Expand Down

0 comments on commit d979e78

Please sign in to comment.