diff --git a/crates/polars-core/src/datatypes/dtype.rs b/crates/polars-core/src/datatypes/dtype.rs index 79b1c8f5145b..2d0969d136bd 100644 --- a/crates/polars-core/src/datatypes/dtype.rs +++ b/crates/polars-core/src/datatypes/dtype.rs @@ -227,6 +227,17 @@ impl DataType { prev } + /// Cast the leaf types of Lists/Arrays and keep the nesting. + pub fn cast_leaf(&self, to: DataType) -> DataType { + use DataType::*; + match self { + List(inner) => List(Box::new(inner.cast_leaf(to))), + #[cfg(feature = "dtype-array")] + Array(inner, size) => Array(Box::new(inner.cast_leaf(to)), *size), + _ => to, + } + } + /// Convert to the physical data type #[must_use] pub fn to_physical(&self) -> DataType { diff --git a/crates/polars-core/src/series/arithmetic/borrowed.rs b/crates/polars-core/src/series/arithmetic/borrowed.rs index 38a9254821f1..37a30a3ffc45 100644 --- a/crates/polars-core/src/series/arithmetic/borrowed.rs +++ b/crates/polars-core/src/series/arithmetic/borrowed.rs @@ -113,6 +113,63 @@ impl NumOpsDispatchInner for BooleanType { } } +#[cfg(feature = "dtype-array")] +fn array_shape(dt: &DataType, infer: bool) -> Vec { + fn inner(dt: &DataType, buf: &mut Vec) { + if let DataType::Array(_, size) = dt { + buf.push(*size as i64) + } + } + + let mut buf = vec![]; + if infer { + buf.push(-1) + } + inner(dt, &mut buf); + buf +} + +#[cfg(feature = "dtype-array")] +impl ArrayChunked { + fn arithm_helper( + &self, + rhs: &Series, + op: &dyn Fn(Series, Series) -> PolarsResult, + ) -> PolarsResult { + let l_leaf_array = self.clone().into_series().get_leaf_array(); + let shape = array_shape(self.dtype(), true); + + let r_leaf_array = if rhs.dtype().is_numeric() && rhs.len() == 1 { + rhs.clone() + } else { + polars_ensure!(self.dtype() == rhs.dtype(), InvalidOperation: "can only do arithmetic of array's of the same type and shape; got {} and {}", self.dtype(), rhs.dtype()); + rhs.get_leaf_array() + }; + + let out = op(l_leaf_array, r_leaf_array)?; + out.reshape_array(&shape) + } +} + +#[cfg(feature = "dtype-array")] +impl NumOpsDispatchInner for FixedSizeListType { + fn add_to(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult { + lhs.arithm_helper(rhs, &|l, r| l.add_to(&r)) + } + fn subtract(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult { + lhs.arithm_helper(rhs, &|l, r| l.subtract(&r)) + } + fn multiply(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult { + lhs.arithm_helper(rhs, &|l, r| l.multiply(&r)) + } + fn divide(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult { + lhs.arithm_helper(rhs, &|l, r| l.divide(&r)) + } + fn remainder(lhs: &ArrayChunked, rhs: &Series) -> PolarsResult { + lhs.arithm_helper(rhs, &|l, r| l.remainder(&r)) + } +} + #[cfg(feature = "checked_arithmetic")] pub mod checked { use num_traits::{CheckedDiv, One, ToPrimitive, Zero}; diff --git a/crates/polars-core/src/series/arithmetic/owned.rs b/crates/polars-core/src/series/arithmetic/owned.rs index 2540b181999c..c041929135d7 100644 --- a/crates/polars-core/src/series/arithmetic/owned.rs +++ b/crates/polars-core/src/series/arithmetic/owned.rs @@ -18,6 +18,10 @@ pub fn coerce_lhs_rhs_owned(lhs: Series, rhs: Series) -> PolarsResult<(Series, S Ok((left, right)) } +fn is_eligible(lhs: &DataType, rhs: &DataType) -> bool { + !lhs.is_logical() && lhs.to_physical().is_numeric() && rhs.to_physical().is_numeric() +} + #[cfg(feature = "performant")] fn apply_operation_mut(mut lhs: Series, mut rhs: Series, op: F) -> Series where @@ -43,10 +47,7 @@ macro_rules! impl_operation { #[cfg(feature = "performant")] { // only physical numeric values take the mutable path - if !self.dtype().is_logical() - && self.dtype().to_physical().is_numeric() - && rhs.dtype().to_physical().is_numeric() - { + if is_eligible(self.dtype(), rhs.dtype()) { let (lhs, rhs) = coerce_lhs_rhs_owned(self, rhs).unwrap(); let (lhs, rhs) = align_chunks_binary_owned_series(lhs, rhs); use DataType::*; @@ -84,3 +85,29 @@ impl_operation!(Add, add, |a, b| a.add(b)); impl_operation!(Sub, sub, |a, b| a.sub(b)); impl_operation!(Mul, mul, |a, b| a.mul(b)); impl_operation!(Div, div, |a, b| a.div(b)); + +impl Series { + pub fn try_add_owned(self, other: Self) -> PolarsResult { + if is_eligible(self.dtype(), other.dtype()) { + Ok(self + other) + } else { + self.try_add(&other) + } + } + + pub fn try_sub_owned(self, other: Self) -> PolarsResult { + if is_eligible(self.dtype(), other.dtype()) { + Ok(self - other) + } else { + self.try_sub(&other) + } + } + + pub fn try_mul_owned(self, other: Self) -> PolarsResult { + if is_eligible(self.dtype(), other.dtype()) { + Ok(self * other) + } else { + self.try_mul(&other) + } + } +} diff --git a/crates/polars-core/src/series/implementations/array.rs b/crates/polars-core/src/series/implementations/array.rs index d36d65d72124..a660c68db7a2 100644 --- a/crates/polars-core/src/series/implementations/array.rs +++ b/crates/polars-core/src/series/implementations/array.rs @@ -51,6 +51,24 @@ impl private::PrivateSeries for SeriesWrap { fn group_tuples(&self, multithreaded: bool, sorted: bool) -> PolarsResult { IntoGroupsProxy::group_tuples(&self.0, multithreaded, sorted) } + + fn add_to(&self, rhs: &Series) -> PolarsResult { + self.0.add_to(rhs) + } + + fn subtract(&self, rhs: &Series) -> PolarsResult { + self.0.subtract(rhs) + } + + fn multiply(&self, rhs: &Series) -> PolarsResult { + self.0.multiply(rhs) + } + fn divide(&self, rhs: &Series) -> PolarsResult { + self.0.divide(rhs) + } + fn remainder(&self, rhs: &Series) -> PolarsResult { + self.0.remainder(rhs) + } } impl SeriesTrait for SeriesWrap { diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index e6b392686d69..13d2f5f8ec62 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -286,6 +286,10 @@ impl Series { true } + pub fn from_arrow_chunks(name: &str, arrays: Vec) -> PolarsResult { + Self::try_from((name, arrays)) + } + pub fn from_arrow(name: &str, array: ArrayRef) -> PolarsResult { Self::try_from((name, array)) } diff --git a/crates/polars-core/src/series/ops/mod.rs b/crates/polars-core/src/series/ops/mod.rs index 1988936bf219..ad927e834f9c 100644 --- a/crates/polars-core/src/series/ops/mod.rs +++ b/crates/polars-core/src/series/ops/mod.rs @@ -3,6 +3,7 @@ mod extend; mod null; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +mod reshape; #[derive(Copy, Clone, Hash, Eq, PartialEq, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] diff --git a/crates/polars-ops/src/series/ops/reshape.rs b/crates/polars-core/src/series/ops/reshape.rs similarity index 92% rename from crates/polars-ops/src/series/ops/reshape.rs rename to crates/polars-core/src/series/ops/reshape.rs index 635294132d71..f02697ffb014 100644 --- a/crates/polars-ops/src/series/ops/reshape.rs +++ b/crates/polars-core/src/series/ops/reshape.rs @@ -5,14 +5,13 @@ use std::collections::VecDeque; use arrow::array::*; use arrow::legacy::kernels::list::array_to_unit_list; use arrow::offset::Offsets; -use polars_core::chunked_array::builder::get_list_builder; -use polars_core::datatypes::{DataType, ListChunked}; -use polars_core::prelude::{IntoSeries, Series}; use polars_error::{polars_bail, polars_ensure, PolarsResult}; #[cfg(feature = "dtype-array")] use polars_utils::format_tuple; -use crate::prelude::*; +use crate::chunked_array::builder::get_list_builder; +use crate::datatypes::{DataType, ListChunked}; +use crate::prelude::{IntoSeries, Series, *}; fn reshape_fast_path(name: &str, s: &Series) -> Series { let mut ca = match s.dtype() { @@ -30,10 +29,10 @@ fn reshape_fast_path(name: &str, s: &Series) -> Series { ca.into_series() } -pub trait SeriesReshape: SeriesSealed { +impl Series { /// Recurse nested types until we are at the leaf array. - fn get_leaf_array(&self) -> Series { - let s = self.as_series(); + pub fn get_leaf_array(&self) -> Series { + let s = self; match s.dtype() { #[cfg(feature = "dtype-array")] DataType::Array(dtype, _) => { @@ -62,8 +61,8 @@ pub trait SeriesReshape: SeriesSealed { /// Convert the values of this Series to a ListChunked with a length of 1, /// so a Series of `[1, 2, 3]` becomes `[[1, 2, 3]]`. - fn implode(&self) -> PolarsResult { - let s = self.as_series(); + pub fn implode(&self) -> PolarsResult { + let s = self; let s = s.rechunk(); let values = s.array_ref(0); @@ -89,7 +88,7 @@ pub trait SeriesReshape: SeriesSealed { } #[cfg(feature = "dtype-array")] - fn reshape_array(&self, dimensions: &[i64]) -> PolarsResult { + pub fn reshape_array(&self, dimensions: &[i64]) -> PolarsResult { let mut dims = dimensions.iter().copied().collect::>(); let leaf_array = self.get_leaf_array(); @@ -136,8 +135,8 @@ pub trait SeriesReshape: SeriesSealed { }) } - fn reshape_list(&self, dimensions: &[i64]) -> PolarsResult { - let s = self.as_series(); + pub fn reshape_list(&self, dimensions: &[i64]) -> PolarsResult { + let s = self; if dimensions.is_empty() { polars_bail!(ComputeError: "reshape `dimensions` cannot be empty") @@ -212,13 +211,10 @@ pub trait SeriesReshape: SeriesSealed { } } -impl SeriesReshape for Series {} - #[cfg(test)] mod test { - use polars_core::prelude::*; - use super::*; + use crate::prelude::*; #[test] fn test_to_list() -> PolarsResult<()> { diff --git a/crates/polars-core/src/series/series_trait.rs b/crates/polars-core/src/series/series_trait.rs index ea9ba3364ad1..cc897c775537 100644 --- a/crates/polars-core/src/series/series_trait.rs +++ b/crates/polars-core/src/series/series_trait.rs @@ -136,23 +136,23 @@ pub(crate) mod private { } fn subtract(&self, _rhs: &Series) -> PolarsResult { - invalid_operation_panic!(sub, self) + polars_bail!(opq = subtract, self._dtype()); } fn add_to(&self, _rhs: &Series) -> PolarsResult { - invalid_operation_panic!(add, self) + polars_bail!(opq = add, self._dtype()); } fn multiply(&self, _rhs: &Series) -> PolarsResult { - invalid_operation_panic!(mul, self) + polars_bail!(opq = multiply, self._dtype()); } fn divide(&self, _rhs: &Series) -> PolarsResult { - invalid_operation_panic!(div, self) + polars_bail!(opq = divide, self._dtype()); } fn remainder(&self, _rhs: &Series) -> PolarsResult { - invalid_operation_panic!(rem, self) + polars_bail!(opq = remainder, self._dtype()); } #[cfg(feature = "algorithm_group_by")] fn group_tuples(&self, _multithreaded: bool, _sorted: bool) -> PolarsResult { - invalid_operation_panic!(group_tuples, self) + polars_bail!(opq = group_tuples, self._dtype()); } #[cfg(feature = "zip_with")] fn zip_with_same_type( @@ -160,7 +160,7 @@ pub(crate) mod private { _mask: &BooleanChunked, _other: &Series, ) -> PolarsResult { - invalid_operation_panic!(zip_with_same_type, self) + polars_bail!(opq = zip_with_same_type, self._dtype()); } #[allow(unused_variables)] diff --git a/crates/polars-expr/src/expressions/binary.rs b/crates/polars-expr/src/expressions/binary.rs index 9013f85b1646..20a2c6ba35f4 100644 --- a/crates/polars-expr/src/expressions/binary.rs +++ b/crates/polars-expr/src/expressions/binary.rs @@ -37,9 +37,9 @@ impl BinaryExpr { /// Can partially do operations in place. fn apply_operator_owned(left: Series, right: Series, op: Operator) -> PolarsResult { match op { - Operator::Plus => Ok(left + right), - Operator::Minus => Ok(left - right), - Operator::Multiply => Ok(left * right), + Operator::Plus => left.try_add_owned(right), + Operator::Minus => left.try_sub_owned(right), + Operator::Multiply => left.try_mul_owned(right), _ => apply_operator(&left, &right, op), } } @@ -61,6 +61,12 @@ pub fn apply_operator(left: &Series, right: &Series, op: Operator) -> PolarsResu #[cfg(feature = "dtype-decimal")] Decimal(_, _) => left.try_div(right), Date | Datetime(_, _) | Float32 | Float64 => left.try_div(right), + #[cfg(feature = "dtype-array")] + dt @ Array(_, _) => { + let left_dt = dt.cast_leaf(Float64); + let right_dt = right.dtype().cast_leaf(Float64); + left.cast(&left_dt)?.try_div(&right.cast(&right_dt)?) + }, _ => left.cast(&Float64)?.try_div(&right.cast(&Float64)?), }, Operator::FloorDivide => { diff --git a/crates/polars-expr/src/expressions/mod.rs b/crates/polars-expr/src/expressions/mod.rs index 1271dfa9de0a..6fc8021b9886 100644 --- a/crates/polars-expr/src/expressions/mod.rs +++ b/crates/polars-expr/src/expressions/mod.rs @@ -34,7 +34,6 @@ pub(crate) use gather::*; pub(crate) use literal::*; use polars_core::prelude::*; use polars_io::predicates::PhysicalIoExpr; -use polars_ops::prelude::*; use polars_plan::prelude::*; #[cfg(feature = "dynamic_group_by")] pub(crate) use rolling::RollingExpr; diff --git a/crates/polars-ops/src/chunked_array/list/namespace.rs b/crates/polars-ops/src/chunked_array/list/namespace.rs index 45bdad8d3dee..15b32c55a7ed 100644 --- a/crates/polars-ops/src/chunked_array/list/namespace.rs +++ b/crates/polars-ops/src/chunked_array/list/namespace.rs @@ -19,7 +19,7 @@ use crate::chunked_array::list::sum_mean::sum_with_nulls; #[cfg(feature = "diff")] use crate::prelude::diff; use crate::prelude::list::sum_mean::{mean_list_numerical, sum_list_numerical}; -use crate::series::{ArgAgg, SeriesReshape}; +use crate::series::ArgAgg; pub(super) fn has_inner_nulls(ca: &ListChunked) -> bool { for arr in ca.downcast_iter() { diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index e17f7495a015..75c40c6d500d 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -50,7 +50,6 @@ mod rank; mod reinterpret; #[cfg(feature = "replace")] mod replace; -mod reshape; #[cfg(feature = "rle")] mod rle; #[cfg(feature = "rolling_window")] @@ -138,7 +137,6 @@ pub use unique::*; pub use various::*; mod not; pub use not::*; -pub use reshape::*; pub trait SeriesSealed { fn as_series(&self) -> &Series; diff --git a/crates/polars-sql/src/sql_expr.rs b/crates/polars-sql/src/sql_expr.rs index e5349d3252bd..8e2de9ebacb7 100644 --- a/crates/polars-sql/src/sql_expr.rs +++ b/crates/polars-sql/src/sql_expr.rs @@ -4,7 +4,6 @@ use polars_core::export::regex; use polars_core::prelude::*; use polars_error::to_compute_err; use polars_lazy::prelude::*; -use polars_ops::series::SeriesReshape; use polars_plan::prelude::typed_lit; use polars_plan::prelude::LiteralValue::Null; use polars_time::Duration; diff --git a/py-polars/src/series/mod.rs b/py-polars/src/series/mod.rs index 872dbaf2252a..9205e4483ad6 100644 --- a/py-polars/src/series/mod.rs +++ b/py-polars/src/series/mod.rs @@ -126,7 +126,6 @@ impl PySeries { } fn reshape(&self, dims: Vec, is_list: bool) -> PyResult { - use polars_ops::prelude::SeriesReshape; let out = if is_list { self.series.reshape_list(&dims) } else { diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 53a93f8e0b32..77c8d57ecd5e 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -1834,84 +1834,6 @@ def test_shrink_to_fit() -> None: assert_frame_equal(df.shrink_to_fit(in_place=False), df) -def test_arithmetic() -> None: - df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]}) - - for df_mul in (df * 2, 2 * df): - expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]}) - assert_frame_equal(df_mul, expected) - - for df_plus in (df + 2, 2 + df): - expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]}) - assert_frame_equal(df_plus, expected) - - df_div = df / 2 - expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]}) - assert_frame_equal(df_div, expected) - - df_minus = df - 2 - expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]}) - assert_frame_equal(df_minus, expected) - - df_mod = df % 2 - expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]}) - assert_frame_equal(df_mod, expected) - - df2 = pl.DataFrame({"c": [10]}) - - out = df + df2 - expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns( - pl.col("b").cast(pl.Float64) - ) - assert_frame_equal(out, expected) - - out = df - df2 - expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns( - pl.col("b").cast(pl.Float64) - ) - assert_frame_equal(out, expected) - - out = df / df2 - expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns( - pl.col("b").cast(pl.Float64) - ) - assert_frame_equal(out, expected) - - out = df * df2 - expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns( - pl.col("b").cast(pl.Float64) - ) - assert_frame_equal(out, expected) - - out = df % df2 - expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns( - pl.col("b").cast(pl.Float64) - ) - assert_frame_equal(out, expected) - - # cannot do arithmetic with a sequence - with pytest.raises(TypeError, match="operation not supported"): - _ = df + [1] # type: ignore[operator] - - -def test_df_series_division() -> None: - df = pl.DataFrame( - { - "a": [2, 2, 4, 4, 6, 6], - "b": [2, 2, 10, 5, 6, 6], - } - ) - s = pl.Series([2, 2, 2, 2, 2, 2]) - assert (df / s).to_dict(as_series=False) == { - "a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0], - "b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0], - } - assert (df // s).to_dict(as_series=False) == { - "a": [1, 1, 2, 2, 3, 3], - "b": [1, 1, 5, 2, 3, 3], - } - - def test_add_string() -> None: df = pl.DataFrame({"a": ["hi", "there"], "b": ["hello", "world"]}) expected = pl.DataFrame( diff --git a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py index c0d6ce80e5d4..67e9be665db4 100644 --- a/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py +++ b/py-polars/tests/unit/operations/arithmetic/test_arithmetic.py @@ -6,6 +6,18 @@ import pytest import polars as pl +from polars import ( + Date, + Float64, + Int8, + Int16, + Int32, + Int64, + UInt8, + UInt16, + UInt32, + UInt64, +) from polars.datatypes import FLOAT_DTYPES, INTEGER_DTYPES from polars.testing import assert_frame_equal, assert_series_equal @@ -303,3 +315,269 @@ def test_bool_floordiv() -> None: match="floor_div operation not supported for dtype `bool`", ): df.with_columns(pl.col("x").floordiv(2)) + + +def test_arithmetic_in_aggregation_3739() -> None: + def demean_dot() -> pl.Expr: + x = pl.col("x") + y = pl.col("y") + x1 = x - x.mean() + y1 = y - y.mean() + return (x1 * y1).sum().alias("demean_dot") + + assert ( + pl.DataFrame( + { + "key": ["a", "a", "a", "a"], + "x": [4, 2, 2, 4], + "y": [2, 0, 2, 0], + } + ) + .group_by("key") + .agg( + [ + demean_dot(), + ] + ) + ).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]} + + +def test_arithmetic_on_df() -> None: + df = pl.DataFrame({"a": [1.0, 2.0], "b": [3.0, 4.0]}) + + for df_mul in (df * 2, 2 * df): + expected = pl.DataFrame({"a": [2.0, 4.0], "b": [6.0, 8.0]}) + assert_frame_equal(df_mul, expected) + + for df_plus in (df + 2, 2 + df): + expected = pl.DataFrame({"a": [3.0, 4.0], "b": [5.0, 6.0]}) + assert_frame_equal(df_plus, expected) + + df_div = df / 2 + expected = pl.DataFrame({"a": [0.5, 1.0], "b": [1.5, 2.0]}) + assert_frame_equal(df_div, expected) + + df_minus = df - 2 + expected = pl.DataFrame({"a": [-1.0, 0.0], "b": [1.0, 2.0]}) + assert_frame_equal(df_minus, expected) + + df_mod = df % 2 + expected = pl.DataFrame({"a": [1.0, 0.0], "b": [1.0, 0.0]}) + assert_frame_equal(df_mod, expected) + + df2 = pl.DataFrame({"c": [10]}) + + out = df + df2 + expected = pl.DataFrame({"a": [11.0, None], "b": [None, None]}).with_columns( + pl.col("b").cast(pl.Float64) + ) + assert_frame_equal(out, expected) + + out = df - df2 + expected = pl.DataFrame({"a": [-9.0, None], "b": [None, None]}).with_columns( + pl.col("b").cast(pl.Float64) + ) + assert_frame_equal(out, expected) + + out = df / df2 + expected = pl.DataFrame({"a": [0.1, None], "b": [None, None]}).with_columns( + pl.col("b").cast(pl.Float64) + ) + assert_frame_equal(out, expected) + + out = df * df2 + expected = pl.DataFrame({"a": [10.0, None], "b": [None, None]}).with_columns( + pl.col("b").cast(pl.Float64) + ) + assert_frame_equal(out, expected) + + out = df % df2 + expected = pl.DataFrame({"a": [1.0, None], "b": [None, None]}).with_columns( + pl.col("b").cast(pl.Float64) + ) + assert_frame_equal(out, expected) + + # cannot do arithmetic with a sequence + with pytest.raises(TypeError, match="operation not supported"): + _ = df + [1] # type: ignore[operator] + + +def test_df_series_division() -> None: + df = pl.DataFrame( + { + "a": [2, 2, 4, 4, 6, 6], + "b": [2, 2, 10, 5, 6, 6], + } + ) + s = pl.Series([2, 2, 2, 2, 2, 2]) + assert (df / s).to_dict(as_series=False) == { + "a": [1.0, 1.0, 2.0, 2.0, 3.0, 3.0], + "b": [1.0, 1.0, 5.0, 2.5, 3.0, 3.0], + } + assert (df // s).to_dict(as_series=False) == { + "a": [1, 1, 2, 2, 3, 3], + "b": [1, 1, 5, 2, 3, 3], + } + + +@pytest.mark.parametrize( + "s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)] +) +def test_arithmetic_series(s: pl.Series) -> None: + a = s + b = s + + assert ((a * b) == [1, 4]).sum() == 2 + assert ((a / b) == [1.0, 1.0]).sum() == 2 + assert ((a + b) == [2, 4]).sum() == 2 + assert ((a - b) == [0, 0]).sum() == 2 + assert ((a + 1) == [2, 3]).sum() == 2 + assert ((a - 1) == [0, 1]).sum() == 2 + assert ((a / 1) == [1.0, 2.0]).sum() == 2 + assert ((a // 2) == [0, 1]).sum() == 2 + assert ((a * 2) == [2, 4]).sum() == 2 + assert ((2 + a) == [3, 4]).sum() == 2 + assert ((1 - a) == [0, -1]).sum() == 2 + assert ((2 * a) == [2, 4]).sum() == 2 + + # integer division + assert_series_equal(1 / a, pl.Series([1.0, 0.5])) + expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5]) + assert_series_equal(1 // a, expected) + # modulo + assert ((1 % a) == [0, 1]).sum() == 2 + assert ((a % 1) == [0, 0]).sum() == 2 + # negate + assert (-a == [-1, -2]).sum() == 2 + # unary plus + assert (+a == a).all() + # wrong dtypes in rhs operands + assert ((1.0 - a) == [0.0, -1.0]).sum() == 2 + assert ((1.0 / a) == [1.0, 0.5]).sum() == 2 + assert ((1.0 * a) == [1, 2]).sum() == 2 + assert ((1.0 + a) == [2, 3]).sum() == 2 + assert ((1.0 % a) == [0, 1]).sum() == 2 + + +def test_arithmetic_datetime() -> None: + a = pl.Series("a", [datetime(2021, 1, 1)]) + with pytest.raises(TypeError): + a // 2 + with pytest.raises(TypeError): + a / 2 + with pytest.raises(TypeError): + a * 2 + with pytest.raises(TypeError): + a % 2 + with pytest.raises( + pl.InvalidOperationError, + ): + a**2 + with pytest.raises(TypeError): + 2 / a + with pytest.raises(TypeError): + 2 // a + with pytest.raises(TypeError): + 2 * a + with pytest.raises(TypeError): + 2 % a + with pytest.raises( + pl.InvalidOperationError, + ): + 2**a + + +def test_power_series() -> None: + a = pl.Series([1, 2], dtype=Int64) + b = pl.Series([None, 2.0], dtype=Float64) + c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date) + d = pl.Series([1, 2], dtype=UInt8) + e = pl.Series([1, 2], dtype=Int8) + f = pl.Series([1, 2], dtype=UInt16) + g = pl.Series([1, 2], dtype=Int16) + h = pl.Series([1, 2], dtype=UInt32) + i = pl.Series([1, 2], dtype=Int32) + j = pl.Series([1, 2], dtype=UInt64) + k = pl.Series([1, 2], dtype=Int64) + m = pl.Series([2**33, 2**33], dtype=UInt64) + + # pow + assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64)) + assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64)) + assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64)) + assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64)) + assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64)) + assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8)) + assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8)) + assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16)) + assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16)) + assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32)) + assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32)) + assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64)) + assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64)) + + with pytest.raises( + pl.InvalidOperationError, + match="`pow` operation not supported for dtype `null` as exponent", + ): + a ** pl.lit(None) + + with pytest.raises( + pl.InvalidOperationError, + match="`pow` operation not supported for dtype `date` as base", + ): + c**2 + with pytest.raises( + pl.InvalidOperationError, + match="`pow` operation not supported for dtype `date` as exponent", + ): + 2**c + + with pytest.raises(pl.ColumnNotFoundError): + a ** "hi" # type: ignore[operator] + + # Raising to UInt64: raises if can't be downcast safely to UInt32... + with pytest.raises(pl.ComputeError, match="conversion from `u64` to `u32` failed"): + a**m + # ... but succeeds otherwise. + assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64)) + + # rpow + assert_series_equal(2.0**a, pl.Series("literal", [2.0, 4.0], dtype=Float64)) + assert_series_equal(2**b, pl.Series("literal", [None, 4.0], dtype=Float64)) + + with pytest.raises(pl.ColumnNotFoundError): + "hi" ** a + + # Series.pow() method + assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64)) + + +@pytest.mark.parametrize( + ("expected", "expr"), + [ + ( + np.array([[2, 4], [6, 8]]), + pl.col("a") + pl.col("a"), + ), + ( + np.array([[0, 0], [0, 0]]), + pl.col("a") - pl.col("a"), + ), + ( + np.array([[1, 4], [9, 16]]), + pl.col("a") * pl.col("a"), + ), + ( + np.array([[1.0, 1.0], [1.0, 1.0]]), + pl.col("a") / pl.col("a"), + ), + ], +) +def test_array_arithmetic_same_size(expected: Any, expr: pl.Expr) -> None: + df = pl.Series("a", np.array([[1, 2], [3, 4]])).to_frame() + + assert_frame_equal( + df.select(expr), + pl.Series("a", expected).to_frame(), + ) diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index c5a590507dce..484100d9d8e7 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -14,17 +14,12 @@ import polars as pl from polars._utils.construction import iterable_to_pyseries from polars.datatypes import ( - Date, Datetime, Field, Float64, - Int8, - Int16, Int32, Int64, Time, - UInt8, - UInt16, UInt32, UInt64, Unknown, @@ -371,139 +366,6 @@ def test_categorical_agg(s: pl.Series, min: str | None, max: str | None) -> None assert s.max() == max -@pytest.mark.parametrize( - "s", [pl.Series([1, 2], dtype=Int64), pl.Series([1, 2], dtype=Float64)] -) -def test_arithmetic(s: pl.Series) -> None: - a = s - b = s - - assert ((a * b) == [1, 4]).sum() == 2 - assert ((a / b) == [1.0, 1.0]).sum() == 2 - assert ((a + b) == [2, 4]).sum() == 2 - assert ((a - b) == [0, 0]).sum() == 2 - assert ((a + 1) == [2, 3]).sum() == 2 - assert ((a - 1) == [0, 1]).sum() == 2 - assert ((a / 1) == [1.0, 2.0]).sum() == 2 - assert ((a // 2) == [0, 1]).sum() == 2 - assert ((a * 2) == [2, 4]).sum() == 2 - assert ((2 + a) == [3, 4]).sum() == 2 - assert ((1 - a) == [0, -1]).sum() == 2 - assert ((2 * a) == [2, 4]).sum() == 2 - - # integer division - assert_series_equal(1 / a, pl.Series([1.0, 0.5])) - expected = pl.Series([1, 0]) if s.dtype == Int64 else pl.Series([1.0, 0.5]) - assert_series_equal(1 // a, expected) - # modulo - assert ((1 % a) == [0, 1]).sum() == 2 - assert ((a % 1) == [0, 0]).sum() == 2 - # negate - assert (-a == [-1, -2]).sum() == 2 - # unary plus - assert (+a == a).all() - # wrong dtypes in rhs operands - assert ((1.0 - a) == [0.0, -1.0]).sum() == 2 - assert ((1.0 / a) == [1.0, 0.5]).sum() == 2 - assert ((1.0 * a) == [1, 2]).sum() == 2 - assert ((1.0 + a) == [2, 3]).sum() == 2 - assert ((1.0 % a) == [0, 1]).sum() == 2 - - -def test_arithmetic_datetime() -> None: - a = pl.Series("a", [datetime(2021, 1, 1)]) - with pytest.raises(TypeError): - a // 2 - with pytest.raises(TypeError): - a / 2 - with pytest.raises(TypeError): - a * 2 - with pytest.raises(TypeError): - a % 2 - with pytest.raises( - pl.InvalidOperationError, - ): - a**2 - with pytest.raises(TypeError): - 2 / a - with pytest.raises(TypeError): - 2 // a - with pytest.raises(TypeError): - 2 * a - with pytest.raises(TypeError): - 2 % a - with pytest.raises( - pl.InvalidOperationError, - ): - 2**a - - -def test_power() -> None: - a = pl.Series([1, 2], dtype=Int64) - b = pl.Series([None, 2.0], dtype=Float64) - c = pl.Series([date(2020, 2, 28), date(2020, 3, 1)], dtype=Date) - d = pl.Series([1, 2], dtype=UInt8) - e = pl.Series([1, 2], dtype=Int8) - f = pl.Series([1, 2], dtype=UInt16) - g = pl.Series([1, 2], dtype=Int16) - h = pl.Series([1, 2], dtype=UInt32) - i = pl.Series([1, 2], dtype=Int32) - j = pl.Series([1, 2], dtype=UInt64) - k = pl.Series([1, 2], dtype=Int64) - m = pl.Series([2**33, 2**33], dtype=UInt64) - - # pow - assert_series_equal(a**2, pl.Series([1, 4], dtype=Int64)) - assert_series_equal(b**3, pl.Series([None, 8.0], dtype=Float64)) - assert_series_equal(a**a, pl.Series([1, 4], dtype=Int64)) - assert_series_equal(b**b, pl.Series([None, 4.0], dtype=Float64)) - assert_series_equal(a**b, pl.Series([None, 4.0], dtype=Float64)) - assert_series_equal(d**d, pl.Series([1, 4], dtype=UInt8)) - assert_series_equal(e**d, pl.Series([1, 4], dtype=Int8)) - assert_series_equal(f**d, pl.Series([1, 4], dtype=UInt16)) - assert_series_equal(g**d, pl.Series([1, 4], dtype=Int16)) - assert_series_equal(h**d, pl.Series([1, 4], dtype=UInt32)) - assert_series_equal(i**d, pl.Series([1, 4], dtype=Int32)) - assert_series_equal(j**d, pl.Series([1, 4], dtype=UInt64)) - assert_series_equal(k**d, pl.Series([1, 4], dtype=Int64)) - - with pytest.raises( - pl.InvalidOperationError, - match="`pow` operation not supported for dtype `null` as exponent", - ): - a ** pl.lit(None) - - with pytest.raises( - pl.InvalidOperationError, - match="`pow` operation not supported for dtype `date` as base", - ): - c**2 - with pytest.raises( - pl.InvalidOperationError, - match="`pow` operation not supported for dtype `date` as exponent", - ): - 2**c - - with pytest.raises(pl.ColumnNotFoundError): - a ** "hi" # type: ignore[operator] - - # Raising to UInt64: raises if can't be downcast safely to UInt32... - with pytest.raises(pl.ComputeError, match="conversion from `u64` to `u32` failed"): - a**m - # ... but succeeds otherwise. - assert_series_equal(a**j, pl.Series([1, 4], dtype=Int64)) - - # rpow - assert_series_equal(2.0**a, pl.Series("literal", [2.0, 4.0], dtype=Float64)) - assert_series_equal(2**b, pl.Series("literal", [None, 4.0], dtype=Float64)) - - with pytest.raises(pl.ColumnNotFoundError): - "hi" ** a - - # Series.pow() method - assert_series_equal(a.pow(2), pl.Series([1, 4], dtype=Int64)) - - def test_add_string() -> None: s = pl.Series(["hello", "weird"]) result = s + " world" diff --git a/py-polars/tests/unit/test_errors.py b/py-polars/tests/unit/test_errors.py index 1e577d169324..fc00fdd574c8 100644 --- a/py-polars/tests/unit/test_errors.py +++ b/py-polars/tests/unit/test_errors.py @@ -651,3 +651,10 @@ def test_fill_null_invalid_supertype() -> None: def test_raise_array_of_cats() -> None: with pytest.raises(pl.InvalidOperationError, match="is not yet supported"): pl.Series([["a", "b"], ["a", "c"]], dtype=pl.Array(pl.Categorical, 2)) + + +def test_raise_invalid_arithmetic() -> None: + df = pl.Series("a", [object()]).to_frame() + + with pytest.raises(pl.InvalidOperationError): + df.select(pl.col("a") - pl.col("a")) diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index b71bbdc18e01..7b9531accd25 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -172,31 +172,6 @@ def test_group_by_agg_equals_zero_3535() -> None: } -def test_arithmetic_in_aggregation_3739() -> None: - def demean_dot() -> pl.Expr: - x = pl.col("x") - y = pl.col("y") - x1 = x - x.mean() - y1 = y - y.mean() - return (x1 * y1).sum().alias("demean_dot") - - assert ( - pl.DataFrame( - { - "key": ["a", "a", "a", "a"], - "x": [4, 2, 2, 4], - "y": [2, 0, 2, 0], - } - ) - .group_by("key") - .agg( - [ - demean_dot(), - ] - ) - ).to_dict(as_series=False) == {"key": ["a"], "demean_dot": [0.0]} - - def test_dtype_concat_3735() -> None: for dt in [ pl.Int8,