From f9afbaa7176b7fd2c75185d958e01bb640042703 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sat, 13 Jul 2024 11:25:01 -0400 Subject: [PATCH 1/3] add --- arrow-arith/src/aggregate.rs | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs index 190685ff9df7..e582f9f06ccc 100644 --- a/arrow-arith/src/aggregate.rs +++ b/arrow-arith/src/aggregate.rs @@ -415,21 +415,41 @@ pub fn max_binary(array: &GenericBinaryArray) -> Option<& min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) } +/// Returns the maximum value in the binary view array, according to the natural order. +pub fn max_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { + min_max_helper::<&[u8], _, _>(array, |a, b| *a < *b) +} + /// Returns the minimum value in the binary array, according to the natural order. pub fn min_binary(array: &GenericBinaryArray) -> Option<&[u8]> { min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b) } +/// Returns the minimum value in the binary view array, according to the natural order. +pub fn min_binary_view(array: &BinaryViewArray) -> Option<&[u8]> { + min_max_helper::<&[u8], _, _>(array, |a, b| *a > *b) +} + /// Returns the maximum value in the string array, according to the natural order. pub fn max_string(array: &GenericStringArray) -> Option<&str> { min_max_helper::<&str, _, _>(array, |a, b| *a < *b) } +/// Returns the maximum value in the string view array, according to the natural order. +pub fn max_string_view(array: &StringViewArray) -> Option<&str> { + min_max_helper::<&str, _, _>(array, |a, b| *a < *b) +} + /// Returns the minimum value in the string array, according to the natural order. pub fn min_string(array: &GenericStringArray) -> Option<&str> { min_max_helper::<&str, _, _>(array, |a, b| *a > *b) } +/// Returns the minimum value in the string view array, according to the natural order. +pub fn min_string_view(array: &StringViewArray) -> Option<&str> { + min_max_helper::<&str, _, _>(array, |a, b| *a > *b) +} + /// Returns the sum of values in the array. /// /// This doesn't detect overflow. Once overflowing, the result will wrap around. From c62e71c0ce0f37638bb483589a7875b792d68bf2 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Sun, 14 Jul 2024 14:48:06 -0400 Subject: [PATCH 2/3] implement min max support for string/binary view --- arrow-arith/src/aggregate.rs | 157 +++++++++++++++++++++++------------ 1 file changed, 106 insertions(+), 51 deletions(-) diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs index e582f9f06ccc..d29cfa05cafd 100644 --- a/arrow-arith/src/aggregate.rs +++ b/arrow-arith/src/aggregate.rs @@ -1152,61 +1152,116 @@ mod tests { assert!(max(&a).unwrap().is_nan()); } - #[test] - fn test_binary_min_max_with_nulls() { - let a = BinaryArray::from(vec![ - Some("b".as_bytes()), + macro_rules! test_binary { + ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => { + #[test] + fn $NAME() { + let binary = BinaryArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_binary(&binary)); + assert_eq!($EXPECTED_MAX, max_binary(&binary)); + + let large_binary = LargeBinaryArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_binary(&large_binary)); + assert_eq!($EXPECTED_MAX, max_binary(&large_binary)); + + let binary_view = BinaryViewArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_binary_view(&binary_view)); + assert_eq!($EXPECTED_MAX, max_binary_view(&binary_view)); + } + }; + } + + test_binary!( + test_binary_min_max_with_nulls, + vec![ + Some("b01234567890123".as_bytes()), // long bytes None, None, Some(b"a"), Some(b"c"), - ]); - assert_eq!(Some("a".as_bytes()), min_binary(&a)); - assert_eq!(Some("c".as_bytes()), max_binary(&a)); - } - - #[test] - fn test_binary_min_max_no_null() { - let a = BinaryArray::from(vec![Some("b".as_bytes()), Some(b"a"), Some(b"c")]); - assert_eq!(Some("a".as_bytes()), min_binary(&a)); - assert_eq!(Some("c".as_bytes()), max_binary(&a)); - } - - #[test] - fn test_binary_min_max_all_nulls() { - let a = BinaryArray::from(vec![None, None]); - assert_eq!(None, min_binary(&a)); - assert_eq!(None, max_binary(&a)); - } - - #[test] - fn test_binary_min_max_1() { - let a = BinaryArray::from(vec![None, None, Some("b".as_bytes()), Some(b"a")]); - assert_eq!(Some("a".as_bytes()), min_binary(&a)); - assert_eq!(Some("b".as_bytes()), max_binary(&a)); - } - - #[test] - fn test_string_min_max_with_nulls() { - let a = StringArray::from(vec![Some("b"), None, None, Some("a"), Some("c")]); - assert_eq!(Some("a"), min_string(&a)); - assert_eq!(Some("c"), max_string(&a)); - } - - #[test] - fn test_string_min_max_all_nulls() { - let v: Vec> = vec![None, None]; - let a = StringArray::from(v); - assert_eq!(None, min_string(&a)); - assert_eq!(None, max_string(&a)); - } - - #[test] - fn test_string_min_max_1() { - let a = StringArray::from(vec![None, None, Some("b"), Some("a")]); - assert_eq!(Some("a"), min_string(&a)); - assert_eq!(Some("b"), max_string(&a)); - } + ], + Some("a".as_bytes()), + Some("c".as_bytes()) + ); + + test_binary!( + test_binary_min_max_no_null, + vec![ + Some("b".as_bytes()), + Some(b"abcdefghijklmnopqrst"), // long bytes + Some(b"c") + ], + Some("abcdefghijklmnopqrst".as_bytes()), + Some("c".as_bytes()) + ); + + test_binary!(test_binary_min_max_all_nulls, vec![None, None], None, None); + + test_binary!( + test_binary_min_max_1, + vec![ + None, + None, + Some("b01234567890123435".as_bytes()), + Some(b"a") + ], + Some("a".as_bytes()), + Some("b01234567890123435".as_bytes()) + ); + + macro_rules! test_string { + ($NAME:ident, $ARRAY:expr, $EXPECTED_MIN:expr, $EXPECTED_MAX: expr) => { + #[test] + fn $NAME() { + let string = StringArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_string(&string)); + assert_eq!($EXPECTED_MAX, max_string(&string)); + + let large_string = LargeStringArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_string(&large_string)); + assert_eq!($EXPECTED_MAX, max_string(&large_string)); + + let string_view = StringViewArray::from($ARRAY); + assert_eq!($EXPECTED_MIN, min_string_view(&string_view)); + assert_eq!($EXPECTED_MAX, max_string_view(&string_view)); + } + }; + } + + test_string!( + test_string_min_max_with_nulls, + vec![Some("b012345678901234"), None, None, Some("a"), Some("c")], + Some("a"), + Some("c") + ); + + test_string!( + test_string_min_max_no_null, + vec![Some("b"), Some("a"), Some("c12345678901234")], + Some("a"), + Some("c12345678901234") + ); + + test_string!( + test_string_min_max_all_nulls, + Vec::>::from_iter([None, None]), + None, + None + ); + + test_string!( + test_string_min_max_1, + vec![None, None, Some("b"), Some("a12345678901234")], + Some("a12345678901234"), + Some("b") + ); + + test_string!( + test_string_min_max_empty, + Vec::>::new(), + None, + None + ); #[test] fn test_boolean_min_max_empty() { From 3a73a8a64b3cd810c653aca14afec85865eed837 Mon Sep 17 00:00:00 2001 From: Xiangpeng Hao Date: Mon, 15 Jul 2024 10:03:02 -0400 Subject: [PATCH 3/3] update tests --- arrow-arith/src/aggregate.rs | 39 +++++++++++++++++++++++++++--------- 1 file changed, 30 insertions(+), 9 deletions(-) diff --git a/arrow-arith/src/aggregate.rs b/arrow-arith/src/aggregate.rs index d29cfa05cafd..0e4d31eee7a1 100644 --- a/arrow-arith/src/aggregate.rs +++ b/arrow-arith/src/aggregate.rs @@ -1179,6 +1179,7 @@ mod tests { None, Some(b"a"), Some(b"c"), + Some(b"abcdedfg0123456"), ], Some("a".as_bytes()), Some("c".as_bytes()) @@ -1189,7 +1190,8 @@ mod tests { vec![ Some("b".as_bytes()), Some(b"abcdefghijklmnopqrst"), // long bytes - Some(b"c") + Some(b"c"), + Some(b"b01234567890123"), // long bytes for view types ], Some("abcdefghijklmnopqrst".as_bytes()), Some("c".as_bytes()) @@ -1201,12 +1203,13 @@ mod tests { test_binary_min_max_1, vec![ None, + Some("b01234567890123435".as_bytes()), // long bytes for view types None, - Some("b01234567890123435".as_bytes()), + Some(b"b0123xxxxxxxxxxx"), Some(b"a") ], Some("a".as_bytes()), - Some("b01234567890123435".as_bytes()) + Some("b0123xxxxxxxxxxx".as_bytes()) ); macro_rules! test_string { @@ -1230,16 +1233,28 @@ mod tests { test_string!( test_string_min_max_with_nulls, - vec![Some("b012345678901234"), None, None, Some("a"), Some("c")], + vec![ + Some("b012345678901234"), // long bytes for view types + None, + None, + Some("a"), + Some("c"), + Some("b0123xxxxxxxxxxx") + ], Some("a"), Some("c") ); test_string!( test_string_min_max_no_null, - vec![Some("b"), Some("a"), Some("c12345678901234")], + vec![ + Some("b"), + Some("b012345678901234"), // long bytes for view types + Some("a"), + Some("b012xxxxxxxxxxxx") + ], Some("a"), - Some("c12345678901234") + Some("b012xxxxxxxxxxxx") ); test_string!( @@ -1251,9 +1266,15 @@ mod tests { test_string!( test_string_min_max_1, - vec![None, None, Some("b"), Some("a12345678901234")], - Some("a12345678901234"), - Some("b") + vec![ + None, + Some("c12345678901234"), // long bytes for view types + None, + Some("b"), + Some("c1234xxxxxxxxxx") + ], + Some("b"), + Some("c1234xxxxxxxxxx") ); test_string!(