Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Improved list arithmetic support #19162

Merged
merged 3 commits into from
Oct 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,7 @@ impl Bitmap {
/// Unsound iff `i >= self.len()`.
#[inline]
pub unsafe fn get_bit_unchecked(&self, i: usize) -> bool {
debug_assert!(i < self.len());
get_bit_unchecked(&self.storage, self.offset + i)
}

Expand Down
1 change: 1 addition & 0 deletions crates/polars-arrow/src/bitmap/mutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -362,6 +362,7 @@ impl MutableBitmap {
/// Caller must ensure that `index < self.len()`
#[inline]
pub unsafe fn set_unchecked(&mut self, index: usize, value: bool) {
debug_assert!(index < self.len());
let byte = self.buffer.get_unchecked_mut(index / 8);
*byte = set_bit_in_byte(*byte, index % 8, value);
}
Expand Down
49 changes: 48 additions & 1 deletion crates/polars-arrow/src/offset.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ impl<O: Offset> OffsetsBuffer<O> {
&self.0
}

/// Returns the length an array with these offsets would be.
/// Returns what the length an array with these offsets would be.
#[inline]
pub fn len_proxy(&self) -> usize {
self.0.len() - 1
Expand Down Expand Up @@ -513,6 +513,53 @@ impl<O: Offset> OffsetsBuffer<O> {
self.0.windows(2).map(|w| (w[1] - w[0]).to_usize())
}

/// Returns `(offset, len)` pairs.
#[inline]
pub fn offset_and_length_iter(&self) -> impl Iterator<Item = (usize, usize)> + '_ {
self.windows(2).map(|x| {
let [l, r] = x else { unreachable!() };
let l = l.to_usize();
let r = r.to_usize();
(l, r - l)
})
}

/// Offset and length of the primitive (leaf) array for a double+ nested list for every outer
/// row.
pub fn leaf_ranges_iter(
offsets: &[Self],
) -> impl Iterator<Item = core::ops::Range<usize>> + '_ {
let others = &offsets[1..];

offsets[0].windows(2).map(move |x| {
let [l, r] = x else { unreachable!() };
let mut l = l.to_usize();
let mut r = r.to_usize();

for o in others {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not 100% sure, but I feel like this should be r + 1. Might be completely wrong though.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be correct as it is - Arrow list offsets are defined as

1st row : offsets[0]..offsets[1]
2nd row : offsets[1]..offsets[2]
..and so on

}

l..r
})
}

/// Return the full range of the leaf array used by the list.
pub fn leaf_full_start_end(offsets: &[Self]) -> core::ops::Range<usize> {
let mut l = offsets[0].first().to_usize();
let mut r = offsets[0].last().to_usize();

for o in &offsets[1..] {
let slc = o.as_slice();
l = slc[l].to_usize();
r = slc[r].to_usize();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

idem.

}

l..r
}

/// Returns the inner [`Buffer`].
#[inline]
pub fn into_inner(self) -> Buffer<O> {
Expand Down
1 change: 1 addition & 0 deletions crates/polars-compute/src/arithmetic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,5 +141,6 @@ impl<T: HasPrimitiveArithmeticKernel> ArithmeticKernel for PrimitiveArray<T> {
}

mod float;
pub mod pl_num;
mod signed;
mod unsigned;
229 changes: 229 additions & 0 deletions crates/polars-compute/src/arithmetic/pl_num.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
use core::any::TypeId;

use arrow::types::NativeType;
use polars_utils::floor_divmod::FloorDivMod;

/// Implements basic arithmetic between scalars with the same behavior as `ArithmeticKernel`.
///
/// Note, however, that the user is responsible for setting the validity of
/// results for e.g. div/mod operations with 0 in the denominator.
///
/// This is intended as a low-level utility for custom arithmetic loops
/// (e.g. in list arithmetic). In most cases prefer using `ArithmeticKernel` or
/// `ArithmeticChunked` instead.
pub trait PlNumArithmetic: Sized + Copy + 'static {
type TrueDivT: NativeType;

fn wrapping_abs(self) -> Self;
fn wrapping_neg(self) -> Self;
fn wrapping_add(self, rhs: Self) -> Self;
fn wrapping_sub(self, rhs: Self) -> Self;
fn wrapping_mul(self, rhs: Self) -> Self;
fn wrapping_floor_div(self, rhs: Self) -> Self;
fn wrapping_trunc_div(self, rhs: Self) -> Self;
fn wrapping_mod(self, rhs: Self) -> Self;

fn true_div(self, rhs: Self) -> Self::TrueDivT;

#[inline(always)]
fn legacy_div(self, rhs: Self) -> Self {
if TypeId::of::<Self>() == TypeId::of::<Self::TrueDivT>() {
let ret = self.true_div(rhs);
unsafe { core::mem::transmute_copy(&ret) }
} else {
self.wrapping_floor_div(rhs)
}
}
}

macro_rules! impl_signed_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = f64;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self.wrapping_abs()
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
self.wrapping_neg()
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self.wrapping_add(rhs)
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self.wrapping_sub(rhs)
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self.wrapping_mul(rhs)
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
self.wrapping_floor_div_mod(rhs).0
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
if rhs != 0 {
self.wrapping_div(rhs)
} else {
0
}
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
self.wrapping_floor_div_mod(rhs).1
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self as f64 / rhs as f64
}
}
};
}

impl_signed_pl_num_arith!(i8);
impl_signed_pl_num_arith!(i16);
impl_signed_pl_num_arith!(i32);
impl_signed_pl_num_arith!(i64);
impl_signed_pl_num_arith!(i128);

macro_rules! impl_unsigned_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = f64;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
self.wrapping_neg()
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self.wrapping_add(rhs)
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self.wrapping_sub(rhs)
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self.wrapping_mul(rhs)
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
if rhs != 0 {
self / rhs
} else {
0
}
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
self.wrapping_floor_div(rhs)
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
if rhs != 0 {
self % rhs
} else {
0
}
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self as f64 / rhs as f64
}
}
};
}

impl_unsigned_pl_num_arith!(u8);
impl_unsigned_pl_num_arith!(u16);
impl_unsigned_pl_num_arith!(u32);
impl_unsigned_pl_num_arith!(u64);
impl_unsigned_pl_num_arith!(u128);

macro_rules! impl_float_pl_num_arith {
($T:ty) => {
impl PlNumArithmetic for $T {
type TrueDivT = $T;

#[inline(always)]
fn wrapping_abs(self) -> Self {
self.abs()
}

#[inline(always)]
fn wrapping_neg(self) -> Self {
-self
}

#[inline(always)]
fn wrapping_add(self, rhs: Self) -> Self {
self + rhs
}

#[inline(always)]
fn wrapping_sub(self, rhs: Self) -> Self {
self - rhs
}

#[inline(always)]
fn wrapping_mul(self, rhs: Self) -> Self {
self * rhs
}

#[inline(always)]
fn wrapping_floor_div(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
(l / r).floor()
}

#[inline(always)]
fn wrapping_trunc_div(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
(l / r).trunc()
}

#[inline(always)]
fn wrapping_mod(self, rhs: Self) -> Self {
let l = self;
let r = rhs;
l - r * (l / r).floor()
}

#[inline(always)]
fn true_div(self, rhs: Self) -> Self::TrueDivT {
self / rhs
}
}
};
}

impl_float_pl_num_arith!(f32);
impl_float_pl_num_arith!(f64);
Loading