From b0161d405ad752175d39ea6db83733daf5894e83 Mon Sep 17 00:00:00 2001 From: Alexander Beedie Date: Thu, 2 Feb 2023 07:07:55 +0000 Subject: [PATCH] docs(python): add example of using "is_between" with string bounds, and extend test coverage for the same --- py-polars/polars/internals/expr/expr.py | 33 ++++++++++++++++++++----- py-polars/tests/unit/test_lazy.py | 19 ++++++++++++-- 2 files changed, 44 insertions(+), 8 deletions(-) diff --git a/py-polars/polars/internals/expr/expr.py b/py-polars/polars/internals/expr/expr.py index a04a9b24ace2..da345961e942 100644 --- a/py-polars/polars/internals/expr/expr.py +++ b/py-polars/polars/internals/expr/expr.py @@ -3391,19 +3391,19 @@ def repeat_by(self, by: Expr | str) -> Expr: def is_between( self, - start: Expr | datetime | date | time | int | float, - end: Expr | datetime | date | time | int | float, + start: Expr | datetime | date | time | int | float | str, + end: Expr | datetime | date | time | int | float | str, closed: ClosedInterval = "both", ) -> Expr: """ - Check if this expression is between start and end. + Check if this expression is between the given start and end values. Parameters ---------- start - Lower bound as primitive type or datetime. + Lower bound value (can be an expression or literal). end - Upper bound as primitive type or datetime. + Upper bound value (can be an expression or literal). closed : {'both', 'left', 'right', 'none'} Define which sides of the interval are closed (inclusive). @@ -3428,7 +3428,7 @@ def is_between( │ 5 ┆ false │ └─────┴────────────┘ - Use the ``closed`` argument to include or exclude the values at the bounds. + Use the ``closed`` argument to include or exclude the values at the bounds: >>> df.with_columns(pl.col("num").is_between(2, 4, closed="left")) shape: (5, 2) @@ -3444,9 +3444,30 @@ def is_between( │ 5 ┆ false │ └─────┴────────────┘ + Can also use strings as well as numeric/temporal values (note: ensure that + string literals are wrapped with ``lit`` so as not to conflate them with + column names): + + >>> df = pl.DataFrame({"a": ["a", "b", "c", "d", "e"]}) + >>> df.with_columns( + ... pl.col("a").is_between(pl.lit("a"), pl.lit("c"), closed="both") + ... ) + shape: (5, 2) + ┌─────┬────────────┐ + │ a ┆ is_between │ + │ --- ┆ --- │ + │ str ┆ bool │ + ╞═════╪════════════╡ + │ a ┆ true │ + │ b ┆ true │ + │ c ┆ true │ + │ d ┆ false │ + │ e ┆ false │ + └─────┴────────────┘ """ start = expr_to_lit_or_expr(start, str_to_lit=False) end = expr_to_lit_or_expr(end, str_to_lit=False) + if closed == "none": return ((self > start) & (self < end)).alias("is_between") elif closed == "both": diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index d7fbea34c64c..f01e47239b4d 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import date, datetime +from datetime import date, datetime, time from functools import reduce from operator import add from string import ascii_letters @@ -1217,12 +1217,14 @@ def test_is_between_data_types() -> None: { "flt": [1.4, 1.2, 2.5], "int": [2, 3, 4], + "str": ["xyz", "str", "abc"], "date": [date(2020, 1, 1), date(2020, 2, 2), date(2020, 3, 3)], "datetime": [ datetime(2020, 1, 1, 0, 0, 0), datetime(2020, 1, 1, 10, 0, 0), datetime(2020, 1, 1, 12, 0, 0), ], + "tm": [time(10, 30), time(0, 45), time(15, 15)], } ) @@ -1235,7 +1237,6 @@ def test_is_between_data_types() -> None: df.select(pl.col("int").is_between(1.5, 3))[:, 0], pl.Series("is_between", [True, True, False]), ) - assert_series_equal( df.select(pl.col("date").is_between(date(2019, 1, 1), date(2020, 2, 5)))[:, 0], pl.Series("is_between", [True, True, False]), @@ -1248,6 +1249,20 @@ def test_is_between_data_types() -> None: )[:, 0], pl.Series("is_between", [False, True, False]), ) + assert_series_equal( + df.select( + pl.col("str").is_between(pl.lit("str"), pl.lit("zzz"), closed="left") + )[:, 0], + pl.Series("is_between", [True, True, False]), + ) + assert_series_equal( + df.select( + pl.col("tm") + .is_between(time(0, 45), time(10, 30), closed="right") + .alias("tm_between") + )[:, 0], + pl.Series("tm_between", [True, False, False]), + ) def test_unique() -> None: