feat: Improved list arithmetic support (#19162)

pola-rs · Oct 14, 2024 · f7c6a05 · f7c6a05
1 parent 44cf3ad
commit f7c6a05
Show file tree

Hide file tree

Showing 18 changed files with 2,034 additions and 283 deletions.
diff --git a/crates/polars-arrow/src/bitmap/immutable.rs b/crates/polars-arrow/src/bitmap/immutable.rs
@@ -344,6 +344,7 @@ impl Bitmap {
     /// Unsound iff `i >= self.len()`.
     #[inline]
     pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
+        debug_assert!(i < self.len());
         get_bit_unchecked(&self.storage, self.offset + i)
     }
 

diff --git a/crates/polars-arrow/src/bitmap/mutable.rs b/crates/polars-arrow/src/bitmap/mutable.rs
@@ -362,6 +362,7 @@ impl MutableBitmap {
     /// Caller must ensure that `index < self.len()`
     #[inline]
     pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
+        debug_assert!(index < self.len());
         let byte = self.buffer.get_unchecked_mut(index / 8);
         *byte = set_bit_in_byte(*byte, index % 8, value);
     }

diff --git a/crates/polars-arrow/src/offset.rs b/crates/polars-arrow/src/offset.rs
@@ -415,7 +415,7 @@ impl<O: Offset> OffsetsBuffer<O> {
         &self.0
     }
 
-    /// Returns the length an array with these offsets would be.
+    /// Returns what the length an array with these offsets would be.
     #[inline]
     pub fn len_proxy(&self) -> usize {
         self.0.len() - 1
@@ -513,6 +513,53 @@ impl<O: Offset> OffsetsBuffer<O> {
         self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
     }
 
+    /// Returns `(offset, len)` pairs.
+    #[inline]
+    pub fn offset_and_length_iter(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
+        self.windows(2).map(|x| {
+            let [l, r] = x else { unreachable!() };
+            let l = l.to_usize();
+            let r = r.to_usize();
+            (l, r - l)
+        })
+    }
+
+    /// Offset and length of the primitive (leaf) array for a double+ nested list for every outer
+    /// row.
+    pub fn leaf_ranges_iter(
+        offsets: &[Self],
+    ) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
+        let others = &offsets[1..];
+
+        offsets[0].windows(2).map(move |x| {
+            let [l, r] = x else { unreachable!() };
+            let mut l = l.to_usize();
+            let mut r = r.to_usize();
+
+            for o in others {
+                let slc = o.as_slice();
+                l = slc[l].to_usize();
+                r = slc[r].to_usize();
+            }
+
+            l..r
+        })
+    }
+
+    /// Return the full range of the leaf array used by the list.
+    pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
+        let mut l = offsets[0].first().to_usize();
+        let mut r = offsets[0].last().to_usize();
+
+        for o in &offsets[1..] {
+            let slc = o.as_slice();
+            l = slc[l].to_usize();
+            r = slc[r].to_usize();
+        }
+
+        l..r
+    }
+
     /// Returns the inner [`Buffer`].
     #[inline]
     pub fn into_inner(self) -> Buffer<O> {

diff --git a/crates/polars-compute/src/arithmetic/mod.rs b/crates/polars-compute/src/arithmetic/mod.rs
@@ -141,5 +141,6 @@ impl<T: HasPrimitiveArithmeticKernel> ArithmeticKernel for PrimitiveArray<T> {
 }
 
 mod float;
+pub mod pl_num;
 mod signed;
 mod unsigned;
diff --git a/crates/polars-compute/src/arithmetic/pl_num.rs b/crates/polars-compute/src/arithmetic/pl_num.rs
@@ -0,0 +1,229 @@
+use core::any::TypeId;
+
+use arrow::types::NativeType;
+use polars_utils::floor_divmod::FloorDivMod;
+
+/// Implements basic arithmetic between scalars with the same behavior as `ArithmeticKernel`.
+///
+/// Note, however, that the user is responsible for setting the validity of
+/// results for e.g. div/mod operations with 0 in the denominator.
+///
+/// This is intended as a low-level utility for custom arithmetic loops
+/// (e.g. in list arithmetic). In most cases prefer using `ArithmeticKernel` or
+/// `ArithmeticChunked` instead.
+pub trait PlNumArithmetic: Sized + Copy + 'static {
+    type TrueDivT: NativeType;
+
+    fn wrapping_abs(self) -> Self;
+    fn wrapping_neg(self) -> Self;
+    fn wrapping_add(self, rhs: Self) -> Self;
+    fn wrapping_sub(self, rhs: Self) -> Self;
+    fn wrapping_mul(self, rhs: Self) -> Self;
+    fn wrapping_floor_div(self, rhs: Self) -> Self;
+    fn wrapping_trunc_div(self, rhs: Self) -> Self;
+    fn wrapping_mod(self, rhs: Self) -> Self;
+
+    fn true_div(self, rhs: Self) -> Self::TrueDivT;
+
+    #[inline(always)]
+    fn legacy_div(self, rhs: Self) -> Self {
+        if TypeId::of::<Self>() == TypeId::of::<Self::TrueDivT>() {
+            let ret = self.true_div(rhs);
+            unsafe { core::mem::transmute_copy(&ret) }
+        } else {
+            self.wrapping_floor_div(rhs)
+        }
+    }
+}
+
+macro_rules! impl_signed_pl_num_arith {
+    ($T:ty) => {
+        impl PlNumArithmetic for $T {
+            type TrueDivT = f64;
+
+            #[inline(always)]
+            fn wrapping_abs(self) -> Self {
+                self.wrapping_abs()
+            }
+
+            #[inline(always)]
+            fn wrapping_neg(self) -> Self {
+                self.wrapping_neg()
+            }
+
+            #[inline(always)]
+            fn wrapping_add(self, rhs: Self) -> Self {
+                self.wrapping_add(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_sub(self, rhs: Self) -> Self {
+                self.wrapping_sub(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_mul(self, rhs: Self) -> Self {
+                self.wrapping_mul(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_floor_div(self, rhs: Self) -> Self {
+                self.wrapping_floor_div_mod(rhs).0
+            }
+
+            #[inline(always)]
+            fn wrapping_trunc_div(self, rhs: Self) -> Self {
+                if rhs != 0 {
+                    self.wrapping_div(rhs)
+                } else {
+                    0
+                }
+            }
+
+            #[inline(always)]
+            fn wrapping_mod(self, rhs: Self) -> Self {
+                self.wrapping_floor_div_mod(rhs).1
+            }
+
+            #[inline(always)]
+            fn true_div(self, rhs: Self) -> Self::TrueDivT {
+                self as f64 / rhs as f64
+            }
+        }
+    };
+}
+
+impl_signed_pl_num_arith!(i8);
+impl_signed_pl_num_arith!(i16);
+impl_signed_pl_num_arith!(i32);
+impl_signed_pl_num_arith!(i64);
+impl_signed_pl_num_arith!(i128);
+
+macro_rules! impl_unsigned_pl_num_arith {
+    ($T:ty) => {
+        impl PlNumArithmetic for $T {
+            type TrueDivT = f64;
+
+            #[inline(always)]
+            fn wrapping_abs(self) -> Self {
+                self
+            }
+
+            #[inline(always)]
+            fn wrapping_neg(self) -> Self {
+                self.wrapping_neg()
+            }
+
+            #[inline(always)]
+            fn wrapping_add(self, rhs: Self) -> Self {
+                self.wrapping_add(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_sub(self, rhs: Self) -> Self {
+                self.wrapping_sub(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_mul(self, rhs: Self) -> Self {
+                self.wrapping_mul(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_floor_div(self, rhs: Self) -> Self {
+                if rhs != 0 {
+                    self / rhs
+                } else {
+                    0
+                }
+            }
+
+            #[inline(always)]
+            fn wrapping_trunc_div(self, rhs: Self) -> Self {
+                self.wrapping_floor_div(rhs)
+            }
+
+            #[inline(always)]
+            fn wrapping_mod(self, rhs: Self) -> Self {
+                if rhs != 0 {
+                    self % rhs
+                } else {
+                    0
+                }
+            }
+
+            #[inline(always)]
+            fn true_div(self, rhs: Self) -> Self::TrueDivT {
+                self as f64 / rhs as f64
+            }
+        }
+    };
+}
+
+impl_unsigned_pl_num_arith!(u8);
+impl_unsigned_pl_num_arith!(u16);
+impl_unsigned_pl_num_arith!(u32);
+impl_unsigned_pl_num_arith!(u64);
+impl_unsigned_pl_num_arith!(u128);
+
+macro_rules! impl_float_pl_num_arith {
+    ($T:ty) => {
+        impl PlNumArithmetic for $T {
+            type TrueDivT = $T;
+
+            #[inline(always)]
+            fn wrapping_abs(self) -> Self {
+                self.abs()
+            }
+
+            #[inline(always)]
+            fn wrapping_neg(self) -> Self {
+                -self
+            }
+
+            #[inline(always)]
+            fn wrapping_add(self, rhs: Self) -> Self {
+                self + rhs
+            }
+
+            #[inline(always)]
+            fn wrapping_sub(self, rhs: Self) -> Self {
+                self - rhs
+            }
+
+            #[inline(always)]
+            fn wrapping_mul(self, rhs: Self) -> Self {
+                self * rhs
+            }
+
+            #[inline(always)]
+            fn wrapping_floor_div(self, rhs: Self) -> Self {
+                let l = self;
+                let r = rhs;
+                (l / r).floor()
+            }
+
+            #[inline(always)]
+            fn wrapping_trunc_div(self, rhs: Self) -> Self {
+                let l = self;
+                let r = rhs;
+                (l / r).trunc()
+            }
+
+            #[inline(always)]
+            fn wrapping_mod(self, rhs: Self) -> Self {
+                let l = self;
+                let r = rhs;
+                l - r * (l / r).floor()
+            }
+
+            #[inline(always)]
+            fn true_div(self, rhs: Self) -> Self::TrueDivT {
+                self / rhs
+            }
+        }
+    };
+}
+
+impl_float_pl_num_arith!(f32);
+impl_float_pl_num_arith!(f64);