Skip to content

Commit

Permalink
Deprecate cum_count function
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 14, 2024
1 parent 6071d65 commit f65366d
Show file tree
Hide file tree
Showing 5 changed files with 17 additions and 57 deletions.
12 changes: 0 additions & 12 deletions crates/polars-plan/src/dsl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1765,18 +1765,6 @@ pub fn len() -> Expr {
Expr::Len
}

/// Return the cumulative count of the context.
#[cfg(feature = "range")]
pub fn cum_count(reverse: bool) -> Expr {
let start = lit(1 as IdxSize);
let end = len() + lit(1 as IdxSize);
let mut range = int_range(start, end, 1, IDX_DTYPE);
if reverse {
range = range.reverse()
}
range.alias("cum_count")
}

/// First column in DataFrame.
pub fn first() -> Expr {
Expr::Nth(0)
Expand Down
52 changes: 15 additions & 37 deletions py-polars/polars/functions/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import polars._reexport as pl
import polars.functions as F
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64
from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32
from polars.utils._async import _AioDataFrameResult, _GeventDataFrameResult
from polars.utils._parse_expr_input import (
parse_as_expression,
Expand Down Expand Up @@ -93,7 +93,7 @@ def count(*columns: str) -> Expr:
"""
Return the number of non-null values in the column.
This function is syntactic sugar for `col(column).count()`.
This function is syntactic sugar for `col(columns).count()`.
Calling this function without any arguments returns the number of rows in the
context. **This way of using the function is deprecated. Please use :func:`len`
Expand Down Expand Up @@ -168,13 +168,13 @@ def count(*columns: str) -> Expr:

def cum_count(*columns: str, reverse: bool = False) -> Expr:
"""
Return the cumulative count of the non-null values in the column or of the context.
Return the cumulative count of the non-null values in the column.
This function is syntactic sugar for `col(columns).cum_count()`.
If no arguments are passed, returns the cumulative count of a context.
Rows containing null values count towards the result.
Otherwise, this function is syntactic sugar for `col(names).cum_count()`.
Parameters
----------
*columns
Expand All @@ -184,24 +184,7 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr:
Examples
--------
Return the row numbers of a context. Note that rows containing null values are
counted towards the total.
>>> df = pl.DataFrame({"a": [1, 2, None], "b": [3, None, None]})
>>> df.select(pl.cum_count())
shape: (3, 1)
┌───────────┐
│ cum_count │
│ --- │
│ u32 │
╞═══════════╡
│ 1 │
│ 2 │
│ 3 │
└───────────┘
Return the cumulative count of non-null values in a column.
>>> df.select(pl.cum_count("a"))
shape: (3, 1)
┌─────┐
Expand All @@ -213,23 +196,18 @@ def cum_count(*columns: str, reverse: bool = False) -> Expr:
│ 2 │
│ 2 │
└─────┘
Add row numbers to a DataFrame.
>>> df.select(pl.cum_count().alias("row_number"), pl.all())
shape: (3, 3)
┌────────────┬──────┬──────┐
│ row_number ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════════╪══════╪══════╡
│ 1 ┆ 1 ┆ 3 │
│ 2 ┆ 2 ┆ null │
│ 3 ┆ null ┆ null │
└────────────┴──────┴──────┘
"""
if not columns:
return wrap_expr(plr.cum_count(reverse=reverse))
issue_deprecation_warning(
"`pl.cum_count()` is deprecated. The same result can be achieved using"
" `pl.int_range(1, pl.len() + 1, dtype=pl.UInt32)`,"
" or `int_range(pl.len(), 0, -1, dtype=pl.UInt32)` when `reverse=True`.",
version="0.20.5",
)
if reverse:
return F.int_range(F.len(), 0, step=-1, dtype=UInt32).alias("cum_count")
else:
return F.int_range(1, F.len() + 1, dtype=UInt32).alias("cum_count")
return F.col(*columns).cum_count(reverse=reverse)


Expand Down
5 changes: 0 additions & 5 deletions py-polars/src/functions/lazy.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,6 @@ pub fn len() -> PyExpr {
dsl::len().into()
}

#[pyfunction]
pub fn cum_count(reverse: bool) -> PyExpr {
dsl::cum_count(reverse).into()
}

#[pyfunction]
pub fn cov(a: PyExpr, b: PyExpr, ddof: u8) -> PyExpr {
dsl::cov(a.inner, b.inner, ddof).into()
Expand Down
2 changes: 0 additions & 2 deletions py-polars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ fn polars(py: Python, m: &PyModule) -> PyResult<()> {
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::len)).unwrap();
m.add_wrapped(wrap_pyfunction!(functions::cov)).unwrap();
m.add_wrapped(wrap_pyfunction!(functions::cum_count))
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::cum_fold))
.unwrap();
m.add_wrapped(wrap_pyfunction!(functions::cum_reduce))
Expand Down
3 changes: 2 additions & 1 deletion py-polars/tests/unit/functions/test_cum_count.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
@pytest.mark.parametrize(("reverse", "output"), [(False, [1, 2, 3]), (True, [3, 2, 1])])
def test_cum_count_no_args(reverse: bool, output: list[int]) -> None:
df = pl.DataFrame({"a": [5, 5, None]})
result = df.select(pl.cum_count(reverse=reverse))
with pytest.deprecated_call():
result = df.select(pl.cum_count(reverse=reverse))
expected = pl.Series("cum_count", output, dtype=pl.UInt32).to_frame()
assert_frame_equal(result, expected)

Expand Down

0 comments on commit f65366d

Please sign in to comment.