pola-rs · itamarst · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024 · Sep 24, 2024
@@ -392,23 +392,35 @@ pub(crate) fn coerce_lhs_rhs<'a>(
     if let Some(result) = coerce_time_units(lhs, rhs) {
         return Ok(result);
     }
-    let dtype = match (lhs.dtype(), rhs.dtype()) {
+    let (left_dtype, right_dtype) = (lhs.dtype(), rhs.dtype());
+    let leaf_super_dtype = match (left_dtype, right_dtype) {
         #[cfg(feature = "dtype-struct")]
         (DataType::Struct(_), DataType::Struct(_)) => {
             return Ok((Cow::Borrowed(lhs), Cow::Borrowed(rhs)))
         },
-        _ => try_get_supertype(lhs.dtype(), rhs.dtype())?,
+        _ => try_get_supertype(left_dtype.leaf_dtype(), right_dtype.leaf_dtype())?,
     };
 
-    let left = if lhs.dtype() == &dtype {
+    let mut new_left_dtype = left_dtype.cast_leaf(leaf_super_dtype.clone());
+    let mut new_right_dtype = right_dtype.cast_leaf(leaf_super_dtype);
+
+    // If we have e.g. Array and List, we want to convert those too.
+    if (left_dtype.is_list() && right_dtype.is_array())
+        || (left_dtype.is_array() && right_dtype.is_list())
+    {
+        new_left_dtype = try_get_supertype(&new_left_dtype, &new_right_dtype)?;
+        new_right_dtype = new_left_dtype.clone();
+    }
+
+    let left = if lhs.dtype() == &new_left_dtype {
         Cow::Borrowed(lhs)
     } else {
-        Cow::Owned(lhs.cast(&dtype)?)
+        Cow::Owned(lhs.cast(&new_left_dtype)?)
     };
-    let right = if rhs.dtype() == &dtype {
+    let right = if rhs.dtype() == &new_right_dtype {
         Cow::Borrowed(rhs)
     } else {
-        Cow::Owned(rhs.cast(&dtype)?)
+        Cow::Owned(rhs.cast(&new_right_dtype)?)
     };
     Ok((left, right))
 }
@@ -522,6 +534,12 @@ impl Add for &Series {
             (DataType::Struct(_), DataType::Struct(_)) => {
                 _struct_arithmetic(self, rhs, |a, b| a.add(b))
             },
+            (left_dtype, DataType::List(_)) if left_dtype.is_numeric() => {
+                // Lists have implementation logic for rhs numeric:
+                let mut result = (rhs + self)?;
+                result.rename(self.name().clone());
+                Ok(result)
+            },
             _ => {
                 let (lhs, rhs) = coerce_lhs_rhs(self, rhs)?;
                 lhs.add_to(rhs.as_ref())
@@ -574,6 +592,12 @@ impl Mul for &Series {
                 let out = rhs.multiply(self)?;
                 Ok(out.with_name(self.name().clone()))
             },
+            (left_dtype, DataType::List(_)) if left_dtype.is_numeric() => {
+                // Lists have implementation logic for rhs numeric:
+                let mut result = (rhs * self)?;
+                result.rename(self.name().clone());
+                Ok(result)
+            },
             _ => {
                 let (lhs, rhs) = coerce_lhs_rhs(self, rhs)?;
                 lhs.multiply(rhs.as_ref())

@@ -53,23 +53,124 @@ fn lists_same_shapes(left: &ArrayRef, right: &ArrayRef) -> bool {
     }
 }
 
+/// Arithmetic operations that can be applied to a Series
+#[derive(Clone, Copy)]
+enum Op {
+    Add,
+    Subtract,
+    Multiply,
+    Divide,
+    Remainder,
+}
+
+impl Op {
+    fn apply<T, U>(&self, lhs: T, rhs: U) -> <T as Add<U>>::Output
+    where
+        T: Add<U> + Sub<U> + Mul<U> + Div<U> + Rem<U>,
+    {
+        {
+            // This should be all const, optimized away
+            assert_eq!(
+                [core::mem::align_of::<<T as Add<U>>::Output>(); 4],
+                [
+                    core::mem::align_of::<<T as Sub<U>>::Output>(),
+                    core::mem::align_of::<<T as Mul<U>>::Output>(),
+                    core::mem::align_of::<<T as Div<U>>::Output>(),
+                    core::mem::align_of::<<T as Rem<U>>::Output>(),
+                ]
+            );
+        }
+
+        {
+            // Safety: All operations return the same type
+            macro_rules! wrap {
+                ($e:expr) => {
+                    // Safety: This performs a `Copy`, but `$e` could be a `Series`,
+                    // so we need to wrap in `ManuallyDrop` to avoid double-free.
+                    unsafe { core::mem::transmute_copy(&core::mem::ManuallyDrop::new($e)) }
+                };
+            }
+
+            use Op::*;
+            match self {
+                Add => lhs + rhs,
+                Subtract => wrap!(lhs - rhs),
+                Multiply => wrap!(lhs * rhs),
+                Divide => wrap!(lhs / rhs),
+                Remainder => wrap!(lhs % rhs),
+            }
+        }
+    }
+}
+
 impl ListChunked {
+    /// Helper function for NumOpsDispatchInner implementation for ListChunked.
+    ///
+    /// Run the given `op` on `self` and `rhs`, for cases where `rhs` has a
+    /// primitive numeric dtype.
+    fn arithm_helper_numeric(&self, rhs: &Series, op: Op) -> PolarsResult<Series> {
+        let mut result = AnonymousListBuilder::new(
+            self.name().clone(),
+            self.len(),
+            Some(self.inner_dtype().clone()),
+        );
+        macro_rules! combine {
+            ($ca:expr) => {{
+                self.amortized_iter()
+                    .zip($ca.iter())
+                    .map(|(a, b)| {
+                        let (Some(a_owner), Some(b)) = (a, b) else {
+                            // Operations with nulls always result in nulls:
+                            return Ok(None);
+                        };
+                        let a = a_owner.as_ref().rechunk();
+                        let leaf_result = op.apply(&a.get_leaf_array(), b);
+                        let result =
+                            reshape_list_based_on(&leaf_result.chunks()[0], &a.chunks()[0]);
+                        Ok(Some(result))
+                    })
+                    .collect::<PolarsResult<Vec<Option<Box<dyn Array>>>>>()?
+            }};
+        }
+        let combined = downcast_as_macro_arg_physical!(rhs, combine);
+        for arr in combined.iter() {
+            if let Some(arr) = arr {
+                result.append_array(arr.as_ref());
+            } else {
+                result.append_null();
+            }
+        }
+        Ok(result.finish().into())
+    }
+
     /// Helper function for NumOpsDispatchInner implementation for ListChunked.
     ///
     /// Run the given `op` on `self` and `rhs`.
-    fn arithm_helper(
-        &self,
-        rhs: &Series,
-        op: &dyn Fn(&Series, &Series) -> PolarsResult<Series>,
-        has_nulls: Option<bool>,
-    ) -> PolarsResult<Series> {
+    fn arithm_helper(&self, rhs: &Series, op: Op, has_nulls: Option<bool>) -> PolarsResult<Series> {
+        polars_ensure!(
+            self.dtype().leaf_dtype().is_numeric() && rhs.dtype().leaf_dtype().is_numeric(),
+            InvalidOperation: "List Series can only do arithmetic operations if they and other Series are numeric, left and right dtypes are {:?} and {:?}",
+            self.dtype(),
+            rhs.dtype()
+        );
         polars_ensure!(
             self.len() == rhs.len(),
             InvalidOperation: "can only do arithmetic operations on Series of the same size; got {} and {}",
             self.len(),
             rhs.len()
         );
 
+        if rhs.dtype().is_numeric() {
+            return self.arithm_helper_numeric(rhs, op);
+        }
+
+        polars_ensure!(
+            self.dtype() == rhs.dtype(),
+            InvalidOperation: "List Series doing arithmetic operations to each other should have same dtype; got {:?} and {:?}",
+            self.dtype(),
+            rhs.dtype()
+        );
+
         let mut has_nulls = has_nulls.unwrap_or(false);
         if !has_nulls {
             for chunk in self.chunks().iter() {
@@ -118,7 +219,7 @@ impl ListChunked {
                         // along.
                         a_listchunked.arithm_helper(b, op, Some(true))
                     } else {
-                        op(a, b)
+                        op.apply(a, b)
                     };
                     chunk_result.map(Some)
                 }).collect::<PolarsResult<Vec<Option<Series>>>>()?;
@@ -139,8 +240,7 @@ impl ListChunked {
             InvalidOperation: "can only do arithmetic operations on lists of the same size"
         );
 
-        let result = op(&l_leaf_array, &r_leaf_array)?;
-
+        let result = op.apply(&l_leaf_array, &r_leaf_array)?;
         // We now need to wrap the Arrow arrays with the metadata that turns
         // them into lists:
         // TODO is there a way to do this without cloning the underlying data?
@@ -160,18 +260,18 @@ impl ListChunked {
 
 impl NumOpsDispatchInner for ListType {
     fn add_to(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
-        lhs.arithm_helper(rhs, &|l, r| l.add_to(r), None)
+        lhs.arithm_helper(rhs, Op::Add, None)
     }
     fn subtract(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
-        lhs.arithm_helper(rhs, &|l, r| l.subtract(r), None)
+        lhs.arithm_helper(rhs, Op::Subtract, None)
     }
     fn multiply(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
-        lhs.arithm_helper(rhs, &|l, r| l.multiply(r), None)
+        lhs.arithm_helper(rhs, Op::Multiply, None)
     }
     fn divide(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
-        lhs.arithm_helper(rhs, &|l, r| l.divide(r), None)
+        lhs.arithm_helper(rhs, Op::Divide, None)
     }
     fn remainder(lhs: &ListChunked, rhs: &Series) -> PolarsResult<Series> {
-        lhs.arithm_helper(rhs, &|l, r| l.remainder(r), None)
+        lhs.arithm_helper(rhs, Op::Remainder, None)
     }
 }
@@ -372,6 +372,12 @@ fn get_arithmetic_field(
                 (_, Time) | (Time, _) => {
                     polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
                 },
+                (list_dtype @ List(_), prim_dtype) if prim_dtype.is_primitive() => {
+                    list_dtype.cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?)
+                },
+                (prim_dtype, list_dtype @ List(_)) if prim_dtype.is_primitive() => {
+                    list_dtype.cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?)
+                },
                 (left, right) => try_get_supertype(left, right)?,
             }
         },
@@ -397,6 +403,12 @@ fn get_arithmetic_field(
                     polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
                 },
                 (Boolean, Boolean) => IDX_DTYPE,
+                (list_dtype @ List(_), prim_dtype) if prim_dtype.is_primitive() => {
+                    list_dtype.cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?)
+                },
+                (prim_dtype, list_dtype @ List(_)) if prim_dtype.is_primitive() => {
+                    list_dtype.cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?)
+                },
                 (left, right) => try_get_supertype(left, right)?,
             }
         },
@@ -429,6 +441,18 @@ fn get_arithmetic_field(
                         polars_bail!(InvalidOperation: "{} not allowed on {} and {}", op, left_field.dtype, right_type)
                     },
                 },
+                (list_dtype @ List(_), prim_dtype) if prim_dtype.is_primitive() => {
+                    let dtype = list_dtype
+                        .cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?);
+                    left_field.coerce(dtype);
+                    return Ok(left_field);
+                },
+                (prim_dtype, list_dtype @ List(_)) if prim_dtype.is_primitive() => {
+                    let dtype = list_dtype
+                        .cast_leaf(try_get_supertype(list_dtype.leaf_dtype(), prim_dtype)?);
+                    left_field.coerce(dtype);
+                    return Ok(left_field);
+                },
                 _ => {
                     // Avoid needlessly type casting numeric columns during arithmetic
                     // with literals.

@@ -47,55 +47,6 @@ fn is_cat_str_binary(type_left: &DataType, type_right: &DataType) -> bool {
     }
 }
 
-fn process_list_arithmetic(
-    type_left: DataType,
-    type_right: DataType,
-    node_left: Node,
-    node_right: Node,
-    op: Operator,
-    expr_arena: &mut Arena<AExpr>,
-) -> PolarsResult<Option<AExpr>> {
-    match (&type_left, &type_right) {
-        (DataType::List(_), _) => {
-            let leaf = type_left.leaf_dtype();
-            if type_right != *leaf {
-                let new_node_right = expr_arena.add(AExpr::Cast {
-                    expr: node_right,
-                    dtype: type_left.cast_leaf(leaf.clone()),
-                    options: CastOptions::NonStrict,
-                });
-
-                Ok(Some(AExpr::BinaryExpr {
-                    left: node_left,
-                    op,
-                    right: new_node_right,
-                }))
-            } else {
-                Ok(None)
-            }
-        },
-        (_, DataType::List(_)) => {
-            let leaf = type_right.leaf_dtype();
-            if type_left != *leaf {
-                let new_node_left = expr_arena.add(AExpr::Cast {
-                    expr: node_left,
-                    dtype: type_right.cast_leaf(leaf.clone()),
-                    options: CastOptions::NonStrict,
-                });
-
-                Ok(Some(AExpr::BinaryExpr {
-                    left: new_node_left,
-                    op,
-                    right: node_right,
-                }))
-            } else {
-                Ok(None)
-            }
-        },
-        _ => unreachable!(),
-    }
-}
-
 #[cfg(feature = "dtype-struct")]
 // Ensure we don't cast to supertype
 // otherwise we will fill a struct with null fields
@@ -265,11 +216,6 @@ pub(super) fn process_binary(
             (String, a) | (a, String) if a.is_numeric() => {
                 polars_bail!(InvalidOperation: "arithmetic on string and numeric not allowed, try an explicit cast first")
             },
-            (List(_), _) | (_, List(_)) => {
-                return process_list_arithmetic(
-                    type_left, type_right, node_left, node_right, op, expr_arena,
-                )
-            },
             (Datetime(_, _), _)
             | (_, Datetime(_, _))
             | (Date, _)