Skip to content

Commit

Permalink
feat: Improved list arithmetic support (#19162)
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion authored Oct 14, 2024
1 parent 44cf3ad commit f7c6a05
Show file tree
Hide file tree
Showing 18 changed files with 2,034 additions and 283 deletions.
1 change: 1 addition & 0 deletions crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ impl Bitmap {
/// Unsound iff `i >= self.len()`.
#[inline]
pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
debug_assert!(i < self.len());
get_bit_unchecked(&self.storage, self.offset + i)
}

Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ impl MutableBitmap {
/// Caller must ensure that `index < self.len()`
#[inline]
pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
debug_assert!(index < self.len());
let byte = self.buffer.get_unchecked_mut(index / 8);
*byte = set_bit_in_byte(*byte, index % 8, value);
}
Expand Down
49 changes: 48 additions & 1 deletion crates/polars-arrow/src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ impl<O: Offset> OffsetsBuffer<O> {
&self.0
}

/// Returns the length an array with these offsets would be.
/// Returns what the length an array with these offsets would be.
#[inline]
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
Expand Down Expand Up @@ -513,6 +513,53 @@ impl<O: Offset> OffsetsBuffer<O> {
self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
}

/// Returns `(offset, len)` pairs.
#[inline]
pub fn offset_and_length_iter(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
self.windows(2).map(|x| {
let [l, r] = x else { unreachable!() };
let l = l.to_usize();
let r = r.to_usize();
(l, r - l)
})
}

/// Offset and length of the primitive (leaf) array for a double+ nested list for every outer
/// row.
pub fn leaf_ranges_iter(
offsets: &[Self],
) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
let others = &offsets[1..];

offsets[0].windows(2).map(move |x| {
let [l, r] = x else { unreachable!() };
let mut l = l.to_usize();
let mut r = r.to_usize();

for o in others {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
}

l..r
})
}

/// Return the full range of the leaf array used by the list.
pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
let mut l = offsets[0].first().to_usize();
let mut r = offsets[0].last().to_usize();

for o in &offsets[1..] {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
}

l..r
}

/// Returns the inner [`Buffer`].
#[inline]
pub fn into_inner(self) -> Buffer<O> {
Expand Down
1 change: 1 addition & 0 deletions crates/polars-compute/src/arithmetic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,6 @@ impl<T: HasPrimitiveArithmeticKernel> ArithmeticKernel for PrimitiveArray<T> {
}

mod float;
pub mod pl_num;
mod signed;
mod unsigned;
229 changes: 229 additions & 0 deletions crates/polars-compute/src/arithmetic/pl_num.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
use core::any::TypeId;

use arrow::types::NativeType;
use polars_utils::floor_divmod::FloorDivMod;

/// Implements basic arithmetic between scalars with the same behavior as `ArithmeticKernel`.
///
/// Note, however, that the user is responsible for setting the validity of
/// results for e.g. div/mod operations with 0 in the denominator.
///
/// This is intended as a low-level utility for custom arithmetic loops
/// (e.g. in list arithmetic). In most cases prefer using `ArithmeticKernel` or
/// `ArithmeticChunked` instead.
pub trait PlNumArithmetic: Sized + Copy + 'static {
type TrueDivT: NativeType;

fn wrapping_abs(self) -> Self;
fn wrapping_neg(self) -> Self;
fn wrapping_add(self, rhs: Self) -> Self;
fn wrapping_sub(self, rhs: Self) -> Self;
fn wrapping_mul(self, rhs: Self) -> Self;
fn wrapping_floor_div(self, rhs: Self) -> Self;
fn wrapping_trunc_div(self, rhs: Self) -> Self;
fn wrapping_mod(self, rhs: Self) -> Self;

fn true_div(self, rhs: Self) -> Self::TrueDivT;

#[inline(always)]
fn legacy_div(self, rhs: Self) -> Self {
if TypeId::of::<Self>() == TypeId::of::<Self::TrueDivT>() {
let ret = self.true_div(rhs);
unsafe { core::mem::transmute_copy(&ret) }
} else {
self.wrapping_floor_div(rhs)
}
}
}

macro_rules! impl_signed_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = f64;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self.wrapping_abs()
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
self.wrapping_neg()
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self.wrapping_add(rhs)
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self.wrapping_sub(rhs)
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self.wrapping_mul(rhs)
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
self.wrapping_floor_div_mod(rhs).0
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
if rhs != 0 {
self.wrapping_div(rhs)
} else {
0
}
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
self.wrapping_floor_div_mod(rhs).1
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self as f64 / rhs as f64
}
}
};
}

impl_signed_pl_num_arith!(i8);
impl_signed_pl_num_arith!(i16);
impl_signed_pl_num_arith!(i32);
impl_signed_pl_num_arith!(i64);
impl_signed_pl_num_arith!(i128);

macro_rules! impl_unsigned_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = f64;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
self.wrapping_neg()
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self.wrapping_add(rhs)
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self.wrapping_sub(rhs)
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self.wrapping_mul(rhs)
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
if rhs != 0 {
self / rhs
} else {
0
}
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
self.wrapping_floor_div(rhs)
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
if rhs != 0 {
self % rhs
} else {
0
}
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self as f64 / rhs as f64
}
}
};
}

impl_unsigned_pl_num_arith!(u8);
impl_unsigned_pl_num_arith!(u16);
impl_unsigned_pl_num_arith!(u32);
impl_unsigned_pl_num_arith!(u64);
impl_unsigned_pl_num_arith!(u128);

macro_rules! impl_float_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = $T;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self.abs()
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
-self
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self + rhs
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self - rhs
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self * rhs
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
(l / r).floor()
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
(l / r).trunc()
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
l - r * (l / r).floor()
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self / rhs
}
}
};
}

impl_float_pl_num_arith!(f32);
impl_float_pl_num_arith!(f64);
Loading

0 comments on commit f7c6a05

Please sign in to comment.