From c6c2ae622f353386835e50c5ead5bc51bb4e2271 Mon Sep 17 00:00:00 2001 From: Ritchie Vink Date: Sun, 21 Jan 2024 10:44:16 +0100 Subject: [PATCH] perf: improve binview filter (#13878) --- .../src/array/growable/binview.rs | 22 +++++++++++++++++++ crates/polars-arrow/src/compute/filter.rs | 12 +++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/crates/polars-arrow/src/array/growable/binview.rs b/crates/polars-arrow/src/array/growable/binview.rs index 3f597f809219..3db541e9c593 100644 --- a/crates/polars-arrow/src/array/growable/binview.rs +++ b/crates/polars-arrow/src/array/growable/binview.rs @@ -114,6 +114,28 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { } })); } + + #[inline] + pub(crate) unsafe fn extend_unchecked_no_buffers( + &mut self, + index: usize, + start: usize, + len: usize, + ) { + let array = *self.arrays.get_unchecked(index); + + extend_validity(&mut self.validity, array, start, len); + + let range = start..start + len; + + self.views + .extend(array.views().get_unchecked(range).iter().map(|view| { + let len = (*view as u32) as usize; + self.total_bytes_len += len; + + *view + })) + } } impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> { diff --git a/crates/polars-arrow/src/compute/filter.rs b/crates/polars-arrow/src/compute/filter.rs index abe9fc58bbd9..09ab1c4645a2 100644 --- a/crates/polars-arrow/src/compute/filter.rs +++ b/crates/polars-arrow/src/compute/filter.rs @@ -274,14 +274,14 @@ pub fn filter(array: &dyn Array, filter: &BooleanArray) -> PolarsResult { let iter = SlicesIterator::new(filter.values()); - let mut mutable = growable::GrowableBinaryViewArray::new( - vec![array.as_any().downcast_ref::().unwrap()], - false, - iter.slots(), - ); + let array = array.as_any().downcast_ref::().unwrap(); + let mut mutable = + growable::GrowableBinaryViewArray::new(vec![array], false, iter.slots()); unsafe { - iter.for_each(|(start, len)| mutable.extend_unchecked(0, start, len)); + // We don't have to correct buffers as there is only one array. + iter.for_each(|(start, len)| mutable.extend_unchecked_no_buffers(0, start, len)); } + Ok(mutable.as_box()) }, // Should go via BinaryView