-
Notifications
You must be signed in to change notification settings - Fork 851
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Optimize max_boolean
by operating on u64 chunks
#6098
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -371,11 +371,26 @@ pub fn max_boolean(array: &BooleanArray) -> Option<bool> { | |
} | ||
|
||
// Note the max bool is true (1), so short circuit as soon as we see it | ||
array | ||
.iter() | ||
.find(|&b| b == Some(true)) | ||
.flatten() | ||
.or(Some(false)) | ||
match array.nulls() { | ||
None => array | ||
.values() | ||
.bit_chunks() | ||
.iter_padded() | ||
// We found a true if any bit is set | ||
.map(|x| x != 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder, what is the "penalty" for supporting short circuiting? |
||
.find(|b| *b) | ||
Comment on lines
+380
to
+381
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it a good idea to use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mean like this? It's a personal preference, but I prefer the existing map + find There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm thinking about |
||
.or(Some(false)), | ||
Some(nulls) => { | ||
let validity_chunks = nulls.inner().bit_chunks().iter_padded(); | ||
let value_chunks = array.values().bit_chunks().iter_padded(); | ||
value_chunks | ||
.zip(validity_chunks) | ||
// We found a true if the value bit is 1, AND the validity bit is 1 for any bits in the chunk | ||
.map(|(value_bits, validity_bits)| (value_bits & validity_bits) != 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The same question. |
||
.find(|b| *b) | ||
.or(Some(false)) | ||
} | ||
} | ||
} | ||
|
||
/// Helper to compute min/max of [`ArrayAccessor`]. | ||
|
@@ -748,6 +763,7 @@ where | |
mod tests { | ||
use super::*; | ||
use arrow_array::types::*; | ||
use builder::BooleanBuilder; | ||
use std::sync::Arc; | ||
|
||
#[test] | ||
|
@@ -1318,6 +1334,14 @@ mod tests { | |
let a = BooleanArray::from(vec![Some(false), Some(true), None, Some(false), None]); | ||
assert_eq!(Some(false), min_boolean(&a)); | ||
assert_eq!(Some(true), max_boolean(&a)); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we please add some tests that have more than 64 values to exercise this new code? I am thinking:
Both with/without nulls There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the feedback. |
||
let a = BooleanArray::from(vec![Some(true), None]); | ||
assert_eq!(Some(true), min_boolean(&a)); | ||
assert_eq!(Some(true), max_boolean(&a)); | ||
|
||
let a = BooleanArray::from(vec![Some(false), None]); | ||
assert_eq!(Some(false), min_boolean(&a)); | ||
assert_eq!(Some(false), max_boolean(&a)); | ||
} | ||
|
||
#[test] | ||
|
@@ -1339,6 +1363,92 @@ mod tests { | |
assert_eq!(Some(true), max_boolean(&a)); | ||
} | ||
|
||
#[test] | ||
fn test_boolean_min_max_64_true_64_false() { | ||
let mut no_nulls = BooleanBuilder::new(); | ||
no_nulls.append_slice(&[true; 64]); | ||
no_nulls.append_slice(&[false; 64]); | ||
let no_nulls = no_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&no_nulls)); | ||
assert_eq!(Some(true), max_boolean(&no_nulls)); | ||
|
||
let mut with_nulls = BooleanBuilder::new(); | ||
with_nulls.append_slice(&[true; 31]); | ||
with_nulls.append_null(); | ||
with_nulls.append_slice(&[true; 32]); | ||
with_nulls.append_slice(&[false; 1]); | ||
with_nulls.append_nulls(63); | ||
let with_nulls = with_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&with_nulls)); | ||
assert_eq!(Some(true), max_boolean(&with_nulls)); | ||
} | ||
|
||
#[test] | ||
fn test_boolean_min_max_64_false_64_true() { | ||
let mut no_nulls = BooleanBuilder::new(); | ||
no_nulls.append_slice(&[false; 64]); | ||
no_nulls.append_slice(&[true; 64]); | ||
let no_nulls = no_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&no_nulls)); | ||
assert_eq!(Some(true), max_boolean(&no_nulls)); | ||
|
||
let mut with_nulls = BooleanBuilder::new(); | ||
with_nulls.append_slice(&[false; 31]); | ||
with_nulls.append_null(); | ||
with_nulls.append_slice(&[false; 32]); | ||
with_nulls.append_slice(&[true; 1]); | ||
with_nulls.append_nulls(63); | ||
let with_nulls = with_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&with_nulls)); | ||
assert_eq!(Some(true), max_boolean(&with_nulls)); | ||
} | ||
|
||
#[test] | ||
fn test_boolean_min_max_96_true() { | ||
let mut no_nulls = BooleanBuilder::new(); | ||
no_nulls.append_slice(&[true; 96]); | ||
let no_nulls = no_nulls.finish(); | ||
|
||
assert_eq!(Some(true), min_boolean(&no_nulls)); | ||
assert_eq!(Some(true), max_boolean(&no_nulls)); | ||
|
||
let mut with_nulls = BooleanBuilder::new(); | ||
with_nulls.append_slice(&[true; 31]); | ||
with_nulls.append_null(); | ||
with_nulls.append_slice(&[true; 32]); | ||
with_nulls.append_slice(&[true; 31]); | ||
with_nulls.append_null(); | ||
let with_nulls = with_nulls.finish(); | ||
|
||
assert_eq!(Some(true), min_boolean(&with_nulls)); | ||
assert_eq!(Some(true), max_boolean(&with_nulls)); | ||
} | ||
|
||
#[test] | ||
fn test_boolean_min_max_96_false() { | ||
let mut no_nulls = BooleanBuilder::new(); | ||
no_nulls.append_slice(&[false; 96]); | ||
let no_nulls = no_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&no_nulls)); | ||
assert_eq!(Some(false), max_boolean(&no_nulls)); | ||
|
||
let mut with_nulls = BooleanBuilder::new(); | ||
with_nulls.append_slice(&[false; 31]); | ||
with_nulls.append_null(); | ||
with_nulls.append_slice(&[false; 32]); | ||
with_nulls.append_slice(&[false; 31]); | ||
with_nulls.append_null(); | ||
let with_nulls = with_nulls.finish(); | ||
|
||
assert_eq!(Some(false), min_boolean(&with_nulls)); | ||
assert_eq!(Some(false), max_boolean(&with_nulls)); | ||
} | ||
|
||
#[test] | ||
fn test_sum_dyn() { | ||
let values = Int8Array::from_iter_values([10_i8, 11, 12, 13, 14, 15, 16, 17]); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Very cool! I wonder if we can do similar to
min_boolean
where the condition is.map(|x| x != u64::MAX)
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Absolutely! I think I'll do it in a follow-up PR