Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Implement is_between in Rust #11945

Merged
merged 3 commits into from
Jan 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/polars-lazy/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ repeat_by = ["polars-plan/repeat_by"]
round_series = ["polars-plan/round_series", "polars-ops/round_series"]
is_first_distinct = ["polars-plan/is_first_distinct"]
is_last_distinct = ["polars-plan/is_last_distinct"]
is_between = ["polars-plan/is_between"]
is_unique = ["polars-plan/is_unique"]
cross_join = ["polars-plan/cross_join", "polars-pipe?/cross_join", "polars-ops/cross_join"]
asof_join = ["polars-plan/asof_join", "polars-time", "polars-ops/asof_join"]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-ops/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ is_first_distinct = []
is_last_distinct = []
is_unique = []
unique_counts = []
is_between = []
approx_unique = []
fused = []
cutqcut = ["dtype-categorical", "dtype-struct"]
Expand Down
34 changes: 34 additions & 0 deletions crates/polars-ops/src/series/ops/is_between.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
use std::ops::BitAnd;

use polars_core::prelude::*;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ClosedInterval {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too bad we can't reuse the existing ClosedWindow enum as it's in the polars-time crate which is not available here. But I guess it's fine this way.

#[default]
Both,
Left,
Right,
None,
}

pub fn is_between(
s: &Series,
lower: &Series,
upper: &Series,
closed: ClosedInterval,
) -> PolarsResult<BooleanChunked> {
let left_cmp_op = match closed {
ClosedInterval::None | ClosedInterval::Right => Series::gt,
ClosedInterval::Both | ClosedInterval::Left => Series::gt_eq,
};
let right_cmp_op = match closed {
ClosedInterval::None | ClosedInterval::Left => Series::lt,
ClosedInterval::Both | ClosedInterval::Right => Series::lt_eq,
};
let left = left_cmp_op(s, lower)?;
let right = right_cmp_op(s, upper)?;
Ok(left.bitand(right))
orlp marked this conversation as resolved.
Show resolved Hide resolved
}
4 changes: 4 additions & 0 deletions crates/polars-ops/src/series/ops/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ mod floor_divide;
mod fused;
mod horizontal;
mod index;
#[cfg(feature = "is_between")]
mod is_between;
#[cfg(feature = "is_first_distinct")]
mod is_first_distinct;
#[cfg(feature = "is_in")]
Expand Down Expand Up @@ -74,6 +76,8 @@ pub use floor_divide::*;
pub use fused::*;
pub use horizontal::*;
pub use index::*;
#[cfg(feature = "is_between")]
pub use is_between::*;
#[cfg(feature = "is_first_distinct")]
pub use is_first_distinct::*;
#[cfg(feature = "is_in")]
Expand Down
1 change: 1 addition & 0 deletions crates/polars-plan/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ round_series = ["polars-ops/round_series"]
is_first_distinct = ["polars-core/is_first_distinct", "polars-ops/is_first_distinct"]
is_last_distinct = ["polars-core/is_last_distinct", "polars-ops/is_last_distinct"]
is_unique = ["polars-ops/is_unique"]
is_between = ["polars-ops/is_between"]
cross_join = ["polars-ops/cross_join"]
asof_join = ["polars-core/asof_join", "polars-time", "polars-ops/asof_join"]
concat_str = []
Expand Down
16 changes: 16 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/boolean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,10 @@ pub enum BooleanFunction {
IsUnique,
#[cfg(feature = "is_unique")]
IsDuplicated,
#[cfg(feature = "is_between")]
IsBetween {
closed: ClosedInterval,
},
#[cfg(feature = "is_in")]
IsIn,
AllHorizontal,
Expand Down Expand Up @@ -61,6 +65,8 @@ impl Display for BooleanFunction {
IsUnique => "is_unique",
#[cfg(feature = "is_unique")]
IsDuplicated => "is_duplicated",
#[cfg(feature = "is_between")]
IsBetween { .. } => "is_between",
#[cfg(feature = "is_in")]
IsIn => "is_in",
AnyHorizontal => "any_horizontal",
Expand Down Expand Up @@ -91,6 +97,8 @@ impl From<BooleanFunction> for SpecialEq<Arc<dyn SeriesUdf>> {
IsUnique => map!(is_unique),
#[cfg(feature = "is_unique")]
IsDuplicated => map!(is_duplicated),
#[cfg(feature = "is_between")]
IsBetween { closed } => map_as_slice!(is_between, closed),
#[cfg(feature = "is_in")]
IsIn => wrap!(is_in),
AllHorizontal => map_as_slice!(all_horizontal),
Expand Down Expand Up @@ -168,6 +176,14 @@ fn is_duplicated(s: &Series) -> PolarsResult<Series> {
polars_ops::prelude::is_duplicated(s).map(|ca| ca.into_series())
}

#[cfg(feature = "is_between")]
fn is_between(s: &[Series], closed: ClosedInterval) -> PolarsResult<Series> {
let ser = &s[0];
let lower = &s[1];
let upper = &s[2];
polars_ops::prelude::is_between(ser, lower, upper, closed).map(|ca| ca.into_series())
}

#[cfg(feature = "is_in")]
fn is_in(s: &mut [Series]) -> PolarsResult<Option<Series>> {
let left = &s[0];
Expand Down
11 changes: 11 additions & 0 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,17 @@ impl Expr {
self.apply_private(BooleanFunction::IsDuplicated.into())
}

#[allow(clippy::wrong_self_convention)]
#[cfg(feature = "is_between")]
pub fn is_between<E: Into<Expr>>(self, lower: E, upper: E, closed: ClosedInterval) -> Self {
self.map_many_private(
BooleanFunction::IsBetween { closed }.into(),
&[lower.into(), upper.into()],
false,
true,
)
}

/// Get a mask of unique values.
#[allow(clippy::wrong_self_convention)]
#[cfg(feature = "is_unique")]
Expand Down
2 changes: 2 additions & 0 deletions crates/polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ find_many = ["polars-plan/find_many"]
fused = ["polars-ops/fused", "polars-lazy?/fused"]
group_by_list = ["polars-core/group_by_list", "polars-ops/group_by_list"]
interpolate = ["polars-ops/interpolate", "polars-lazy?/interpolate"]
is_between = ["polars-lazy?/is_between", "polars-ops/is_between"]
is_first_distinct = ["polars-lazy?/is_first_distinct", "polars-ops/is_first_distinct"]
is_in = ["polars-lazy?/is_in"]
is_last_distinct = ["polars-lazy?/is_last_distinct", "polars-ops/is_last_distinct"]
Expand Down Expand Up @@ -310,6 +311,7 @@ docs-selection = [
"checked_arithmetic",
"ndarray",
"repeat_by",
"is_between",
"is_first_distinct",
"is_last_distinct",
"asof_join",
Expand Down
1 change: 1 addition & 0 deletions crates/polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@
//! - `repeat_by` - [Repeat element in an Array N times, where N is given by another array.
//! - `is_first_distinct` - Check if element is first unique value.
//! - `is_last_distinct` - Check if element is last unique value.
//! - `is_between` - Check if this expression is between the given lower and upper bounds.
//! - `checked_arithmetic` - checked arithmetic/ returning [`None`] on invalid operations.
//! - `dot_product` - Dot/inner product on [`Series`] and [`Expr`].
//! - `concat_str` - Concat string data in linear time.
Expand Down
1 change: 1 addition & 0 deletions py-polars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ features = [
"is_first_distinct",
"is_last_distinct",
"is_unique",
"is_between",
"lazy",
"list_eval",
"list_to_struct",
Expand Down
25 changes: 7 additions & 18 deletions py-polars/polars/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -5275,7 +5275,7 @@ def is_between(
closed: ClosedInterval = "both",
) -> Self:
"""
Check if this expression is between the given start and end values.
Check if this expression is between the given lower and upper bounds.

Parameters
----------
Expand Down Expand Up @@ -5351,23 +5351,12 @@ def is_between(
│ e ┆ false │
└─────┴────────────┘
"""
lower_bound = self._from_pyexpr(parse_as_expression(lower_bound))
upper_bound = self._from_pyexpr(parse_as_expression(upper_bound))

if closed == "none":
return (self > lower_bound) & (self < upper_bound)
elif closed == "both":
return (self >= lower_bound) & (self <= upper_bound)
elif closed == "right":
return (self > lower_bound) & (self <= upper_bound)
elif closed == "left":
return (self >= lower_bound) & (self < upper_bound)
else:
msg = (
"`closed` must be one of {'left', 'right', 'both', 'none'},"
f" got {closed!r}"
)
raise ValueError(msg)
lower_bound = parse_as_expression(lower_bound)
upper_bound = parse_as_expression(upper_bound)

return self._from_pyexpr(
self._pyexpr.is_between(lower_bound, upper_bound, closed)
)

def hash(
self,
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4188,7 +4188,7 @@ def is_between(
closed: ClosedInterval = "both",
) -> Series:
"""
Get a boolean mask of the values that fall between the given start/end values.
Get a boolean mask of the values that are between the given lower/upper bounds.

Parameters
----------
Expand Down
17 changes: 17 additions & 0 deletions py-polars/src/conversion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1545,6 +1545,23 @@ impl FromPyObject<'_> for Wrap<SearchSortedSide> {
}
}

impl FromPyObject<'_> for Wrap<ClosedInterval> {
fn extract(ob: &PyAny) -> PyResult<Self> {
let parsed = match ob.extract::<&str>()? {
"both" => ClosedInterval::Both,
"left" => ClosedInterval::Left,
"right" => ClosedInterval::Right,
"none" => ClosedInterval::None,
v => {
return Err(PyValueError::new_err(format!(
"`closed` must be one of {{'both', 'left', 'right', 'none'}}, got {v}",
)))
},
};
Ok(Wrap(parsed))
}
}

impl FromPyObject<'_> for Wrap<WindowMapping> {
fn extract(ob: &PyAny) -> PyResult<Self> {
let parsed = match ob.extract::<&str>()? {
Expand Down
7 changes: 7 additions & 0 deletions py-polars/src/expr/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,13 @@ impl PyExpr {
self.inner.clone().is_unique().into()
}

fn is_between(&self, lower: Self, upper: Self, closed: Wrap<ClosedInterval>) -> Self {
self.inner
.clone()
.is_between(lower.inner, upper.inner, closed.0)
.into()
}

fn approx_n_unique(&self) -> Self {
self.inner.clone().approx_n_unique().into()
}
Expand Down