Skip to content

Commit

Permalink
Add Statistics::distinct_count_opt and deprecate `Statistics::disti…
Browse files Browse the repository at this point in the history
…nct_count` (#6259)
  • Loading branch information
alamb authored Aug 20, 2024
1 parent c2d2311 commit e5d9816
Show file tree
Hide file tree
Showing 2 changed files with 17 additions and 10 deletions.
18 changes: 9 additions & 9 deletions parquet/src/column/writer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1842,7 +1842,7 @@ mod tests {
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::Int32(stats) = stats {
assert_eq!(stats.min_opt().unwrap(), &1);
assert_eq!(stats.max_opt().unwrap(), &4);
Expand Down Expand Up @@ -1968,7 +1968,7 @@ mod tests {
assert_eq!(metadata.dictionary_page_offset(), Some(0));
if let Some(stats) = metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count().unwrap_or(0), 55);
assert_eq!(stats.distinct_count_opt().unwrap_or(0), 55);
if let Statistics::Int32(stats) = stats {
assert_eq!(stats.min_opt().unwrap(), &-17);
assert_eq!(stats.max_opt().unwrap(), &9000);
Expand Down Expand Up @@ -1999,7 +1999,7 @@ mod tests {
assert_eq!(stats.min_bytes_opt().unwrap(), 1_i32.to_le_bytes());
assert_eq!(stats.max_bytes_opt().unwrap(), 7_i32.to_le_bytes());
assert_eq!(stats.null_count_opt(), Some(0));
assert!(stats.distinct_count().is_none());
assert!(stats.distinct_count_opt().is_none());

drop(write);

Expand Down Expand Up @@ -2031,7 +2031,7 @@ mod tests {
7_i32.to_le_bytes()
);
assert_eq!(page_statistics.null_count_opt(), Some(0));
assert!(page_statistics.distinct_count().is_none());
assert!(page_statistics.distinct_count_opt().is_none());
}

#[test]
Expand Down Expand Up @@ -2698,7 +2698,7 @@ mod tests {

if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::Int32(stats) = stats {
// first page is [1,2,3,4]
// second page is [-5,2,4,8]
Expand Down Expand Up @@ -2758,7 +2758,7 @@ mod tests {

if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(stats) = stats {
let column_index_min_value = &column_index.min_values[0];
let column_index_max_value = &column_index.max_values[0];
Expand Down Expand Up @@ -2830,7 +2830,7 @@ mod tests {

if let Some(stats) = r.metadata.statistics() {
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
let column_index_min_value = &column_index.min_values[0];
let column_index_max_value = &column_index.max_values[0];
Expand Down Expand Up @@ -2951,7 +2951,7 @@ mod tests {

let stats = r.metadata.statistics().expect("statistics");
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::ByteArray(_stats) = stats {
let min_value = _stats.min_opt().unwrap();
let max_value = _stats.max_opt().unwrap();
Expand Down Expand Up @@ -3003,7 +3003,7 @@ mod tests {

let stats = r.metadata.statistics().expect("statistics");
assert_eq!(stats.null_count_opt(), Some(0));
assert_eq!(stats.distinct_count(), None);
assert_eq!(stats.distinct_count_opt(), None);
if let Statistics::FixedLenByteArray(_stats) = stats {
let min_value = _stats.min_opt().unwrap();
let max_value = _stats.max_opt().unwrap();
Expand Down
9 changes: 8 additions & 1 deletion parquet/src/file/statistics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ pub fn to_thrift(stats: Option<&Statistics>) -> Option<TStatistics> {
max: None,
min: None,
null_count,
distinct_count: stats.distinct_count().map(|value| value as i64),
distinct_count: stats.distinct_count_opt().map(|value| value as i64),
max_value: None,
min_value: None,
is_max_value_exact: None,
Expand Down Expand Up @@ -380,7 +380,14 @@ impl Statistics {

/// Returns optional value of number of distinct values occurring.
/// When it is `None`, the value should be ignored.
#[deprecated(since = "53.0.0", note = "Use `distinct_count_opt` method instead")]
pub fn distinct_count(&self) -> Option<u64> {
self.distinct_count_opt()
}

/// Returns optional value of number of distinct values occurring.
/// When it is `None`, the value should be ignored.
pub fn distinct_count_opt(&self) -> Option<u64> {
statistics_enum_func![self, distinct_count]
}

Expand Down

0 comments on commit e5d9816

Please sign in to comment.