-
-
Notifications
You must be signed in to change notification settings - Fork 2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(rust, python): allow arithmetic operations between numeric Series and list Series #18901
Changes from all commits
c1872cc
944d4ae
e3141fd
835f9fa
ef43739
3ddfbc6
9f984ee
2b115dc
11a0d06
4000b0f
d25c9b8
cbc2de1
3d14608
e916b56
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -53,23 +53,124 @@ fn lists_same_shapes(left: &ArrayRef, right: &ArrayRef) -> bool { | |
} | ||
} | ||
|
||
/// Arithmetic operations that can be applied to a Series | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there some existing code that does this? There's a whole bunch of similar things but I may've missed an actual implementation. |
||
#[derive(Clone, Copy)] | ||
enum Op { | ||
Add, | ||
Subtract, | ||
Multiply, | ||
Divide, | ||
Remainder, | ||
} | ||
|
||
impl Op { | ||
fn apply<T, U>(&self, lhs: T, rhs: U) -> <T as Add<U>>::Output | ||
where | ||
T: Add<U> + Sub<U> + Mul<U> + Div<U> + Rem<U>, | ||
{ | ||
{ | ||
// This should be all const, optimized away | ||
assert_eq!( | ||
[core::mem::align_of::<<T as Add<U>>::Output>(); 4], | ||
[ | ||
core::mem::align_of::<<T as Sub<U>>::Output>(), | ||
core::mem::align_of::<<T as Mul<U>>::Output>(), | ||
core::mem::align_of::<<T as Div<U>>::Output>(), | ||
core::mem::align_of::<<T as Rem<U>>::Output>(), | ||
] | ||
); | ||
} | ||
|
||
{ | ||
// Safety: All operations return the same type | ||
macro_rules! wrap { | ||
($e:expr) => { | ||
// Safety: This performs a `Copy`, but `$e` could be a `Series`, | ||
// so we need to wrap in `ManuallyDrop` to avoid double-free. | ||
unsafe { core::mem::transmute_copy(&core::mem::ManuallyDrop::new($e)) } | ||
}; | ||
} | ||
|
||
use Op::*; | ||
match self { | ||
Add => lhs + rhs, | ||
Subtract => wrap!(lhs - rhs), | ||
Multiply => wrap!(lhs * rhs), | ||
Divide => wrap!(lhs / rhs), | ||
Remainder => wrap!(lhs % rhs), | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can make |
||
} | ||
} | ||
} | ||
|
||
impl ListChunked { | ||
/// Helper function for NumOpsDispatchInner implementation for ListChunked. | ||
/// | ||
/// Run the given `op` on `self` and `rhs`, for cases where `rhs` has a | ||
/// primitive numeric dtype. | ||
fn arithm_helper_numeric(&self, rhs: &Series, op: Op) -> PolarsResult<Series> { | ||
let mut result = AnonymousListBuilder::new( | ||
self.name().clone(), | ||
self.len(), | ||
Some(self.inner_dtype().clone()), | ||
); | ||
macro_rules! combine { | ||
($ca:expr) => {{ | ||
self.amortized_iter() | ||
.zip($ca.iter()) | ||
.map(|(a, b)| { | ||
let (Some(a_owner), Some(b)) = (a, b) else { | ||
// Operations with nulls always result in nulls: | ||
return Ok(None); | ||
}; | ||
let a = a_owner.as_ref().rechunk(); | ||
let leaf_result = op.apply(&a.get_leaf_array(), b); | ||
let result = | ||
reshape_list_based_on(&leaf_result.chunks()[0], &a.chunks()[0]); | ||
Ok(Some(result)) | ||
}) | ||
.collect::<PolarsResult<Vec<Option<Box<dyn Array>>>>>()? | ||
}}; | ||
} | ||
let combined = downcast_as_macro_arg_physical!(rhs, combine); | ||
for arr in combined.iter() { | ||
if let Some(arr) = arr { | ||
result.append_array(arr.as_ref()); | ||
} else { | ||
result.append_null(); | ||
} | ||
} | ||
Ok(result.finish().into()) | ||
} | ||
|
||
/// Helper function for NumOpsDispatchInner implementation for ListChunked. | ||
/// | ||
/// Run the given `op` on `self` and `rhs`. | ||
fn arithm_helper( | ||
&self, | ||
rhs: &Series, | ||
op: &dyn Fn(&Series, &Series) -> PolarsResult<Series>, | ||
has_nulls: Option<bool>, | ||
) -> PolarsResult<Series> { | ||
fn arithm_helper(&self, rhs: &Series, op: Op, has_nulls: Option<bool>) -> PolarsResult<Series> { | ||
polars_ensure!( | ||
self.dtype().leaf_dtype().is_numeric() && rhs.dtype().leaf_dtype().is_numeric(), | ||
InvalidOperation: "List Series can only do arithmetic operations if they and other Series are numeric, left and right dtypes are {:?} and {:?}", | ||
self.dtype(), | ||
rhs.dtype() | ||
); | ||
polars_ensure!( | ||
self.len() == rhs.len(), | ||
InvalidOperation: "can only do arithmetic operations on Series of the same size; got {} and {}", | ||
self.len(), | ||
rhs.len() | ||
); | ||
|
||
if rhs.dtype().is_numeric() { | ||
return self.arithm_helper_numeric(rhs, op); | ||
} | ||
|
||
polars_ensure!( | ||
self.dtype() == rhs.dtype(), | ||
InvalidOperation: "List Series doing arithmetic operations to each other should have same dtype; got {:?} and {:?}", | ||
self.dtype(), | ||
rhs.dtype() | ||
); | ||
|
||
let mut has_nulls = has_nulls.unwrap_or(false); | ||
if !has_nulls { | ||
for chunk in self.chunks().iter() { | ||
|
@@ -118,7 +219,7 @@ impl ListChunked { | |
// along. | ||
a_listchunked.arithm_helper(b, op, Some(true)) | ||
} else { | ||
op(a, b) | ||
op.apply(a, b) | ||
}; | ||
chunk_result.map(Some) | ||
}).collect::<PolarsResult<Vec<Option<Series>>>>()?; | ||
|
@@ -139,8 +240,7 @@ impl ListChunked { | |
InvalidOperation: "can only do arithmetic operations on lists of the same size" | ||
); | ||
|
||
let result = op(&l_leaf_array, &r_leaf_array)?; | ||
|
||
let result = op.apply(&l_leaf_array, &r_leaf_array)?; | ||
// We now need to wrap the Arrow arrays with the metadata that turns | ||
// them into lists: | ||
// TODO is there a way to do this without cloning the underlying data? | ||
|
@@ -160,18 +260,18 @@ impl ListChunked { | |
|
||
impl NumOpsDispatchInner for ListType { | ||
fn add_to(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> { | ||
lhs.arithm_helper(rhs, &|l, r| l.add_to(r), None) | ||
lhs.arithm_helper(rhs, Op::Add, None) | ||
} | ||
fn subtract(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> { | ||
lhs.arithm_helper(rhs, &|l, r| l.subtract(r), None) | ||
lhs.arithm_helper(rhs, Op::Subtract, None) | ||
} | ||
fn multiply(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> { | ||
lhs.arithm_helper(rhs, &|l, r| l.multiply(r), None) | ||
lhs.arithm_helper(rhs, Op::Multiply, None) | ||
} | ||
fn divide(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> { | ||
lhs.arithm_helper(rhs, &|l, r| l.divide(r), None) | ||
lhs.arithm_helper(rhs, Op::Divide, None) | ||
} | ||
fn remainder(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> { | ||
lhs.arithm_helper(rhs, &|l, r| l.remainder(r), None) | ||
lhs.arithm_helper(rhs, Op::Remainder, None) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,55 +47,6 @@ fn is_cat_str_binary(type_left: &DataType, type_right: &DataType) -> bool { | |
} | ||
} | ||
|
||
fn process_list_arithmetic( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. remove logic from the |
||
type_left: DataType, | ||
type_right: DataType, | ||
node_left: Node, | ||
node_right: Node, | ||
op: Operator, | ||
expr_arena: &mut Arena<AExpr>, | ||
) -> PolarsResult<Option<AExpr>> { | ||
match (&type_left, &type_right) { | ||
(DataType::List(_), _) => { | ||
let leaf = type_left.leaf_dtype(); | ||
if type_right != *leaf { | ||
let new_node_right = expr_arena.add(AExpr::Cast { | ||
expr: node_right, | ||
dtype: type_left.cast_leaf(leaf.clone()), | ||
options: CastOptions::NonStrict, | ||
}); | ||
|
||
Ok(Some(AExpr::BinaryExpr { | ||
left: node_left, | ||
op, | ||
right: new_node_right, | ||
})) | ||
} else { | ||
Ok(None) | ||
} | ||
}, | ||
(_, DataType::List(_)) => { | ||
let leaf = type_right.leaf_dtype(); | ||
if type_left != *leaf { | ||
let new_node_left = expr_arena.add(AExpr::Cast { | ||
expr: node_left, | ||
dtype: type_right.cast_leaf(leaf.clone()), | ||
options: CastOptions::NonStrict, | ||
}); | ||
|
||
Ok(Some(AExpr::BinaryExpr { | ||
left: new_node_left, | ||
op, | ||
right: node_right, | ||
})) | ||
} else { | ||
Ok(None) | ||
} | ||
}, | ||
_ => unreachable!(), | ||
} | ||
} | ||
|
||
#[cfg(feature = "dtype-struct")] | ||
// Ensure we don't cast to supertype | ||
// otherwise we will fill a struct with null fields | ||
|
@@ -265,11 +216,6 @@ pub(super) fn process_binary( | |
(String, a) | (a, String) if a.is_numeric() => { | ||
polars_bail!(InvalidOperation: "arithmetic on string and numeric not allowed, try an explicit cast first") | ||
}, | ||
(List(_), _) | (_, List(_)) => { | ||
return process_list_arithmetic( | ||
type_left, type_right, node_left, node_right, op, expr_arena, | ||
) | ||
}, | ||
(Datetime(_, _), _) | ||
| (_, Datetime(_, _)) | ||
| (Date, _) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think we should do that. I want those casts to be explicit for now.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By explicit do you mean "explicitly cast list to array" and vice versa as needed? This section is definitely necessary in some form, if I remove it:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, the user should cast explicitly. And we shuold return an error instead of panicking if they pass wrong types.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I can do that, but given this will involving changing the behavior of those two tests (from e.g. "these two things are equal" to "this now errors asking for a cast"), this is a backwards incompatible change. So as long as that's OK.