diff --git a/crates/polars-lazy/Cargo.toml b/crates/polars-lazy/Cargo.toml index 4d32f0fbb42ef..99ce52bb1ba37 100644 --- a/crates/polars-lazy/Cargo.toml +++ b/crates/polars-lazy/Cargo.toml @@ -84,6 +84,7 @@ repeat_by = ["polars-plan/repeat_by"] round_series = ["polars-plan/round_series", "polars-ops/round_series"] is_first_distinct = ["polars-plan/is_first_distinct"] is_last_distinct = ["polars-plan/is_last_distinct"] +is_between = ["polars-plan/is_between"] is_unique = ["polars-plan/is_unique"] cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"] asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"] diff --git a/crates/polars-ops/Cargo.toml b/crates/polars-ops/Cargo.toml index a3ee86a39418d..d5b113a5b1aee 100644 --- a/crates/polars-ops/Cargo.toml +++ b/crates/polars-ops/Cargo.toml @@ -66,6 +66,7 @@ is_first_distinct = [] is_last_distinct = [] is_unique = [] unique_counts = [] +is_between = [] approx_unique = [] fused = [] cutqcut = ["dtype-categorical", "dtype-struct"] diff --git a/crates/polars-ops/src/series/ops/is_between.rs b/crates/polars-ops/src/series/ops/is_between.rs new file mode 100644 index 0000000000000..053493d552f66 --- /dev/null +++ b/crates/polars-ops/src/series/ops/is_between.rs @@ -0,0 +1,34 @@ +use std::ops::BitAnd; + +use polars_core::prelude::*; +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum ClosedInterval { + #[default] + Both, + Left, + Right, + None, +} + +pub fn is_between( + s: &Series, + lower: &Series, + upper: &Series, + closed: ClosedInterval, +) -> PolarsResult { + let left_cmp_op = match closed { + ClosedInterval::None | ClosedInterval::Right => Series::gt, + ClosedInterval::Both | ClosedInterval::Left => Series::gt_eq, + }; + let right_cmp_op = match closed { + ClosedInterval::None | ClosedInterval::Left => Series::lt, + ClosedInterval::Both | ClosedInterval::Right => Series::lt_eq, + }; + let left = left_cmp_op(s, lower)?; + let right = right_cmp_op(s, upper)?; + Ok(left.bitand(right)) +} diff --git a/crates/polars-ops/src/series/ops/mod.rs b/crates/polars-ops/src/series/ops/mod.rs index df92959c9efec..9d3d4dd1356d9 100644 --- a/crates/polars-ops/src/series/ops/mod.rs +++ b/crates/polars-ops/src/series/ops/mod.rs @@ -20,6 +20,8 @@ mod fused; mod horizontal; #[cfg(feature = "convert_index")] mod index; +#[cfg(feature = "is_between")] +mod is_between; #[cfg(feature = "is_first_distinct")] mod is_first_distinct; #[cfg(feature = "is_in")] @@ -72,6 +74,8 @@ pub use fused::*; pub use horizontal::*; #[cfg(feature = "convert_index")] pub use index::*; +#[cfg(feature = "is_between")] +pub use is_between::*; #[cfg(feature = "is_first_distinct")] pub use is_first_distinct::*; #[cfg(feature = "is_in")] diff --git a/crates/polars-plan/Cargo.toml b/crates/polars-plan/Cargo.toml index 660b6875a1481..41c10fefee8d0 100644 --- a/crates/polars-plan/Cargo.toml +++ b/crates/polars-plan/Cargo.toml @@ -95,6 +95,7 @@ round_series = ["polars-ops/round_series"] is_first_distinct = ["polars-core/is_first_distinct", "polars-ops/is_first_distinct"] is_last_distinct = ["polars-core/is_last_distinct", "polars-ops/is_last_distinct"] is_unique = ["polars-ops/is_unique"] +is_between = ["polars-ops/is_between"] cross_join = ["polars-ops/cross_join"] asof_join = ["polars-core/asof_join", "polars-time", "polars-ops/asof_join"] concat_str = [] diff --git a/crates/polars-plan/src/dsl/function_expr/boolean.rs b/crates/polars-plan/src/dsl/function_expr/boolean.rs index ed829773ce66c..7c6e0cdad3fc4 100644 --- a/crates/polars-plan/src/dsl/function_expr/boolean.rs +++ b/crates/polars-plan/src/dsl/function_expr/boolean.rs @@ -26,6 +26,10 @@ pub enum BooleanFunction { IsUnique, #[cfg(feature = "is_unique")] IsDuplicated, + #[cfg(feature = "is_between")] + IsBetween { + closed: ClosedInterval, + }, #[cfg(feature = "is_in")] IsIn, AllHorizontal, @@ -64,6 +68,8 @@ impl Display for BooleanFunction { IsUnique => "is_unique", #[cfg(feature = "is_unique")] IsDuplicated => "is_duplicated", + #[cfg(feature = "is_between")] + IsBetween { .. } => "is_between", #[cfg(feature = "is_in")] IsIn => "is_in", AnyHorizontal => "any_horizontal", @@ -94,6 +100,8 @@ impl From for SpecialEq> { IsUnique => map!(is_unique), #[cfg(feature = "is_unique")] IsDuplicated => map!(is_duplicated), + #[cfg(feature = "is_between")] + IsBetween { closed } => map_as_slice!(is_between, closed), #[cfg(feature = "is_in")] IsIn => wrap!(is_in), AllHorizontal => map_as_slice!(all_horizontal), @@ -171,6 +179,14 @@ fn is_duplicated(s: &Series) -> PolarsResult { polars_ops::prelude::is_duplicated(s).map(|ca| ca.into_series()) } +#[cfg(feature = "is_between")] +fn is_between(s: &[Series], closed: ClosedInterval) -> PolarsResult { + let ser = &s[0]; + let lower = &s[1]; + let upper = &s[2]; + polars_ops::prelude::is_between(ser, lower, upper, closed).map(|ca| ca.into_series()) +} + #[cfg(feature = "is_in")] fn is_in(s: &mut [Series]) -> PolarsResult> { let left = &s[0]; diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 01e3d6a296187..c106365fa6e22 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -1018,6 +1018,17 @@ impl Expr { self.apply_private(BooleanFunction::IsDuplicated.into()) } + #[allow(clippy::wrong_self_convention)] + #[cfg(feature = "is_between")] + pub fn is_between>(self, lower: E, upper: E, closed: ClosedInterval) -> Self { + self.map_many_private( + BooleanFunction::IsBetween { closed }.into(), + &[lower.into(), upper.into()], + false, + true, + ) + } + /// Get a mask of unique values. #[allow(clippy::wrong_self_convention)] #[cfg(feature = "is_unique")] diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index ab98a9dc0d855..0d6be622d6e17 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -119,6 +119,7 @@ repeat_by = ["polars-ops/repeat_by", "polars-lazy?/repeat_by"] is_first_distinct = ["polars-lazy?/is_first_distinct", "polars-ops/is_first_distinct"] is_last_distinct = ["polars-lazy?/is_last_distinct", "polars-ops/is_last_distinct"] is_unique = ["polars-lazy?/is_unique", "polars-ops/is_unique"] +is_between = ["polars-lazy?/is_between", "polars-ops/is_between"] asof_join = ["polars-core/asof_join", "polars-lazy?/asof_join", "polars-ops/asof_join"] cross_join = ["polars-lazy?/cross_join", "polars-ops/cross_join"] dot_product = ["polars-core/dot_product"] @@ -308,6 +309,7 @@ docs-selection = [ "checked_arithmetic", "ndarray", "repeat_by", + "is_between", "is_first_distinct", "is_last_distinct", "asof_join", diff --git a/crates/polars/src/lib.rs b/crates/polars/src/lib.rs index 90f800056ae8a..57db9df9299ee 100644 --- a/crates/polars/src/lib.rs +++ b/crates/polars/src/lib.rs @@ -240,6 +240,7 @@ //! - `repeat_by` - [Repeat element in an Array N times, where N is given by another array. //! - `is_first_distinct` - Check if element is first unique value. //! - `is_last_distinct` - Check if element is last unique value. +//! - `is_between` - Check if this expression is between the given lower and upper bounds. //! - `checked_arithmetic` - checked arithmetic/ returning [`None`] on invalid operations. //! - `dot_product` - Dot/inner product on [`Series`] and [`Expr`]. //! - `concat_str` - Concat string data in linear time. diff --git a/py-polars/Cargo.toml b/py-polars/Cargo.toml index 685fcc3b1dadc..b83cada1db3ff 100644 --- a/py-polars/Cargo.toml +++ b/py-polars/Cargo.toml @@ -56,6 +56,7 @@ features = [ "is_first_distinct", "is_last_distinct", "is_unique", + "is_between", "lazy", "list_eval", "list_to_struct", diff --git a/py-polars/docs/source/reference/expressions/boolean.rst b/py-polars/docs/source/reference/expressions/boolean.rst index 73c68917d5157..2575c6426ae62 100644 --- a/py-polars/docs/source/reference/expressions/boolean.rst +++ b/py-polars/docs/source/reference/expressions/boolean.rst @@ -17,6 +17,7 @@ Boolean Expr.is_infinite Expr.is_last Expr.is_last_distinct + Expr.is_between Expr.is_nan Expr.is_not Expr.is_not_nan diff --git a/py-polars/docs/source/reference/series/descriptive.rst b/py-polars/docs/source/reference/series/descriptive.rst index 6ec39e326b9f8..b5c952deffbf0 100644 --- a/py-polars/docs/source/reference/series/descriptive.rst +++ b/py-polars/docs/source/reference/series/descriptive.rst @@ -22,6 +22,7 @@ Descriptive Series.is_integer Series.is_last Series.is_last_distinct + Series.is_between Series.is_nan Series.is_not_nan Series.is_not_null diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 37ccffaa8e147..5bd9fefbc920b 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -5223,7 +5223,7 @@ def is_between( closed: ClosedInterval = "both", ) -> Self: """ - Check if this expression is between the given start and end values. + Check if this expression is between the given lower and upper bounds. Parameters ---------- @@ -5300,22 +5300,12 @@ def is_between( └─────┴────────────┘ """ - lower_bound = self._from_pyexpr(parse_as_expression(lower_bound)) - upper_bound = self._from_pyexpr(parse_as_expression(upper_bound)) + lower_bound = parse_as_expression(lower_bound) + upper_bound = parse_as_expression(upper_bound) - if closed == "none": - return (self > lower_bound) & (self < upper_bound) - elif closed == "both": - return (self >= lower_bound) & (self <= upper_bound) - elif closed == "right": - return (self > lower_bound) & (self <= upper_bound) - elif closed == "left": - return (self >= lower_bound) & (self < upper_bound) - else: - raise ValueError( - "`closed` must be one of {'left', 'right', 'both', 'none'}," - f" got {closed!r}" - ) + return self._from_pyexpr( + self._pyexpr.is_between(lower_bound, upper_bound, closed) + ) def hash( self, diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 2f8ba274d11da..7e35be088a562 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -3894,7 +3894,7 @@ def is_between( closed: ClosedInterval = "both", ) -> Series: """ - Get a boolean mask of the values that fall between the given start/end values. + Get a boolean mask of the values that are between the given lower/upper bounds. Parameters ---------- diff --git a/py-polars/src/conversion.rs b/py-polars/src/conversion.rs index cdc0e9b69b8cb..2e9e14e6e89df 100644 --- a/py-polars/src/conversion.rs +++ b/py-polars/src/conversion.rs @@ -1424,6 +1424,23 @@ impl FromPyObject<'_> for Wrap { } } +impl FromPyObject<'_> for Wrap { + fn extract(ob: &PyAny) -> PyResult { + let parsed = match ob.extract::<&str>()? { + "both" => ClosedInterval::Both, + "left" => ClosedInterval::Left, + "right" => ClosedInterval::Right, + "none" => ClosedInterval::None, + v => { + return Err(PyValueError::new_err(format!( + "`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}", + ))) + }, + }; + Ok(Wrap(parsed)) + } +} + impl FromPyObject<'_> for Wrap { fn extract(ob: &PyAny) -> PyResult { let parsed = match ob.extract::<&str>()? { diff --git a/py-polars/src/expr/general.rs b/py-polars/src/expr/general.rs index d6e335c7945b4..fff48ca9e3a0b 100644 --- a/py-polars/src/expr/general.rs +++ b/py-polars/src/expr/general.rs @@ -397,6 +397,13 @@ impl PyExpr { self.inner.clone().is_unique().into() } + fn is_between(&self, lower: Self, upper: Self, closed: Wrap) -> Self { + self.clone() + .inner + .is_between(lower.inner, upper.inner, closed.0) + .into() + } + fn approx_n_unique(&self) -> Self { self.inner.clone().approx_n_unique().into() }