From f65366d0d8e6d7f0c31ab65f8e9f9696114a0020 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sun, 14 Jan 2024 15:48:36 +0100 Subject: [PATCH] Deprecate `cum_count` function --- crates/polars-plan/src/dsl/mod.rs | 12 ----- py-polars/polars/functions/lazy.py | 52 ++++++------------- py-polars/src/functions/lazy.rs | 5 -- py-polars/src/lib.rs | 2 - .../tests/unit/functions/test_cum_count.py | 3 +- 5 files changed, 17 insertions(+), 57 deletions(-) diff --git a/crates/polars-plan/src/dsl/mod.rs b/crates/polars-plan/src/dsl/mod.rs index 6aa5fe8a39643..516e45dee066d 100644 --- a/crates/polars-plan/src/dsl/mod.rs +++ b/crates/polars-plan/src/dsl/mod.rs @@ -1765,18 +1765,6 @@ pub fn len() -> Expr { Expr::Len } -/// Return the cumulative count of the context. -#[cfg(feature = "range")] -pub fn cum_count(reverse: bool) -> Expr { - let start = lit(1 as IdxSize); - let end = len() + lit(1 as IdxSize); - let mut range = int_range(start, end, 1, IDX_DTYPE); - if reverse { - range = range.reverse() - } - range.alias("cum_count") -} - /// First column in DataFrame. pub fn first() -> Expr { Expr::Nth(0) diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py index 33c0f578ba7fc..e723ca8c43fb6 100644 --- a/py-polars/polars/functions/lazy.py +++ b/py-polars/polars/functions/lazy.py @@ -5,7 +5,7 @@ import polars._reexport as pl import polars.functions as F -from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64 +from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32 from polars.utils._async import _AioDataFrameResult, _GeventDataFrameResult from polars.utils._parse_expr_input import ( parse_as_expression, @@ -93,7 +93,7 @@ def count(*columns: str) -> Expr: """ Return the number of non-null values in the column. - This function is syntactic sugar for `col(column).count()`. + This function is syntactic sugar for `col(columns).count()`. Calling this function without any arguments returns the number of rows in the context. **This way of using the function is deprecated. Please use :func:`len` @@ -168,13 +168,13 @@ def count(*columns: str) -> Expr: def cum_count(*columns: str, reverse: bool = False) -> Expr: """ - Return the cumulative count of the non-null values in the column or of the context. + Return the cumulative count of the non-null values in the column. + + This function is syntactic sugar for `col(columns).cum_count()`. If no arguments are passed, returns the cumulative count of a context. Rows containing null values count towards the result. - Otherwise, this function is syntactic sugar for `col(names).cum_count()`. - Parameters ---------- *columns @@ -184,24 +184,7 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr: Examples -------- - Return the row numbers of a context. Note that rows containing null values are - counted towards the total. - >>> df = pl.DataFrame({"a": [1, 2, None], "b": [3, None, None]}) - >>> df.select(pl.cum_count()) - shape: (3, 1) - ┌───────────┐ - │ cum_count │ - │ --- │ - │ u32 │ - ╞═══════════╡ - │ 1 │ - │ 2 │ - │ 3 │ - └───────────┘ - - Return the cumulative count of non-null values in a column. - >>> df.select(pl.cum_count("a")) shape: (3, 1) ┌─────┐ @@ -213,23 +196,18 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr: │ 2 │ │ 2 │ └─────┘ - - Add row numbers to a DataFrame. - - >>> df.select(pl.cum_count().alias("row_number"), pl.all()) - shape: (3, 3) - ┌────────────┬──────┬──────┐ - │ row_number ┆ a ┆ b │ - │ --- ┆ --- ┆ --- │ - │ u32 ┆ i64 ┆ i64 │ - ╞════════════╪══════╪══════╡ - │ 1 ┆ 1 ┆ 3 │ - │ 2 ┆ 2 ┆ null │ - │ 3 ┆ null ┆ null │ - └────────────┴──────┴──────┘ """ if not columns: - return wrap_expr(plr.cum_count(reverse=reverse)) + issue_deprecation_warning( + "`pl.cum_count()` is deprecated. The same result can be achieved using" + " `pl.int_range(1, pl.len() + 1, dtype=pl.UInt32)`," + " or `int_range(pl.len(), 0, -1, dtype=pl.UInt32)` when `reverse=True`.", + version="0.20.5", + ) + if reverse: + return F.int_range(F.len(), 0, step=-1, dtype=UInt32).alias("cum_count") + else: + return F.int_range(1, F.len() + 1, dtype=UInt32).alias("cum_count") return F.col(*columns).cum_count(reverse=reverse) diff --git a/py-polars/src/functions/lazy.rs b/py-polars/src/functions/lazy.rs index 95e17d79601c1..bedcf6739cc78 100644 --- a/py-polars/src/functions/lazy.rs +++ b/py-polars/src/functions/lazy.rs @@ -187,11 +187,6 @@ pub fn len() -> PyExpr { dsl::len().into() } -#[pyfunction] -pub fn cum_count(reverse: bool) -> PyExpr { - dsl::cum_count(reverse).into() -} - #[pyfunction] pub fn cov(a: PyExpr, b: PyExpr, ddof: u8) -> PyExpr { dsl::cov(a.inner, b.inner, ddof).into() diff --git a/py-polars/src/lib.rs b/py-polars/src/lib.rs index 04475b87d0306..86b88ff65a31f 100644 --- a/py-polars/src/lib.rs +++ b/py-polars/src/lib.rs @@ -150,8 +150,6 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> { .unwrap(); m.add_wrapped(wrap_pyfunction!(functions::len)).unwrap(); m.add_wrapped(wrap_pyfunction!(functions::cov)).unwrap(); - m.add_wrapped(wrap_pyfunction!(functions::cum_count)) - .unwrap(); m.add_wrapped(wrap_pyfunction!(functions::cum_fold)) .unwrap(); m.add_wrapped(wrap_pyfunction!(functions::cum_reduce)) diff --git a/py-polars/tests/unit/functions/test_cum_count.py b/py-polars/tests/unit/functions/test_cum_count.py index 3850624af00ae..bbedad60d5986 100644 --- a/py-polars/tests/unit/functions/test_cum_count.py +++ b/py-polars/tests/unit/functions/test_cum_count.py @@ -9,7 +9,8 @@ @pytest.mark.parametrize(("reverse", "output"), [(False, [1, 2, 3]), (True, [3, 2, 1])]) def test_cum_count_no_args(reverse: bool, output: list[int]) -> None: df = pl.DataFrame({"a": [5, 5, None]}) - result = df.select(pl.cum_count(reverse=reverse)) + with pytest.deprecated_call(): + result = df.select(pl.cum_count(reverse=reverse)) expected = pl.Series("cum_count", output, dtype=pl.UInt32).to_frame() assert_frame_equal(result, expected)