Skip to content

Commit

Permalink
depr(python): Rename GroupBy.apply to map_groups (#10799)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Aug 30, 2023
1 parent 6759b53 commit efa2641
Show file tree
Hide file tree
Showing 22 changed files with 658 additions and 610 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe/group_by.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This namespace is available after calling :code:`DataFrame.group_by(...)`.
GroupBy.first
GroupBy.head
GroupBy.last
GroupBy.map_groups
GroupBy.max
GroupBy.mean
GroupBy.median
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Miscellaneous
DataFrame.corr
DataFrame.frame_equal
DataFrame.lazy
DataFrame.map_rows
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/expressions/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ These functions are available from the polars module root and can be used as exp
last
lit
map
map_groups
max
max_horizontal
mean
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe/group_by.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This namespace comes available by calling `LazyFrame.group_by(..)`.
LazyGroupBy.first
LazyGroupBy.head
LazyGroupBy.last
LazyGroupBy.map_groups
LazyGroupBy.max
LazyGroupBy.mean
LazyGroupBy.median
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
last,
lit,
map,
map_groups,
max,
max_horizontal,
mean,
Expand Down Expand Up @@ -333,6 +334,7 @@
"last",
"lit",
"map",
"map_groups",
"mean",
"median",
"n_unique",
Expand Down
209 changes: 78 additions & 131 deletions py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars._reexport as pl
from polars import functions as F
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import deprecate_renamed_function

if TYPE_CHECKING:
import sys
Expand Down Expand Up @@ -242,7 +243,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
Expand Down Expand Up @@ -273,30 +274,16 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
Examples
--------
For each color group sample two rows:
>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘
For each color group sample two rows:
>>> df.group_by("color").apply(
>>> df.group_by("color").map_groups(
... lambda group_df: group_df.sample(2)
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
Expand Down Expand Up @@ -325,15 +312,15 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
elif isinstance(self.by, Iterable) and all(isinstance(c, str) for c in self.by):
by = list(self.by) # type: ignore[arg-type]
else:
raise TypeError("cannot call `apply` when grouping by an expression")
raise TypeError("cannot call `map_groups` when grouping by an expression")

if all(isinstance(c, str) for c in self.more_by):
by.extend(self.more_by) # type: ignore[arg-type]
else:
raise TypeError("cannot call `apply` when grouping by an expression")
raise TypeError("cannot call `map_groups` when grouping by an expression")

return self.df.__class__._from_pydf(
self.df._df.group_by_apply(by, function, self.maintain_order)
self.df._df.group_by_map_groups(by, function, self.maintain_order)
)

def head(self, n: int = 5) -> DataFrame:
Expand Down Expand Up @@ -760,6 +747,22 @@ def sum(self) -> DataFrame:
"""
return self.agg(F.all().sum())

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
.. deprecated:: 0.19.0
This method has been renamed to :func:`GroupBy.map_groups`.
Parameters
----------
function
Custom function.
"""
return self.map_groups(function)


class RollingGroupBy:
"""
Expand Down Expand Up @@ -866,7 +869,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(
def map_groups(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
Expand All @@ -883,7 +886,7 @@ def apply(
The idiomatic way to apply custom functions over multiple columns is using:
`pl.struct([my_columns]).apply(lambda struct_series: ..)`
`pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
Parameters
----------
Expand All @@ -894,58 +897,6 @@ def apply(
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.
Examples
--------
>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘
For each color group sample two rows:
>>> (
... df.lazy()
... .group_by("color")
... .apply(lambda group_df: group_df.sample(2), schema=None)
... .collect()
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 4 ┆ red ┆ square │
│ 3 ┆ red ┆ triangle │
└─────┴───────┴──────────┘
It is better to implement this with an expression:
>>> (
... df.lazy()
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
"""
return (
self.df.lazy()
Expand All @@ -957,10 +908,34 @@ def apply(
by=self.by,
check_sorted=self.check_sorted,
)
.apply(function, schema)
.map_groups(function, schema)
.collect(no_optimization=True)
)

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
.. deprecated:: 0.19.0
This method has been renamed to :func:`RollingGroupBy.map_groups`.
Parameters
----------
function
Function to apply over each group of the `LazyFrame`.
schema
Schema of the output function. This has to be known statically. If the
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.
"""
return self.map_groups(function, schema)


class DynamicGroupBy:
"""
Expand Down Expand Up @@ -1084,7 +1059,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(
def map_groups(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
Expand All @@ -1101,7 +1076,7 @@ def apply(
The idiomatic way to apply custom functions over multiple columns is using:
`pl.struct([my_columns]).apply(lambda struct_series: ..)`
`pl.struct([my_columns]).map_elements(lambda struct_series: ..)`
Parameters
----------
Expand All @@ -1112,58 +1087,6 @@ def apply(
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.
Examples
--------
>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘
For each color group sample two rows:
>>> (
... df.lazy()
... .group_by("color")
... .apply(lambda group_df: group_df.sample(2), schema=None)
... .collect()
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 4 ┆ red ┆ square │
│ 3 ┆ red ┆ triangle │
└─────┴───────┴──────────┘
It is better to implement this with an expression:
>>> (
... df.lazy()
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
"""
return (
self.df.lazy()
Expand All @@ -1179,6 +1102,30 @@ def apply(
start_by=self.start_by,
check_sorted=self.check_sorted,
)
.apply(function, schema)
.map_groups(function, schema)
.collect(no_optimization=True)
)

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.
.. deprecated:: 0.19.0
This method has been renamed to :func:`DynamicGroupBy.map_groups`.
Parameters
----------
function
Function to apply over each group of the `LazyFrame`.
schema
Schema of the output function. This has to be known statically. If the
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.
"""
return self.map_groups(function, schema)
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
implode,
last,
map,
map_groups,
mean,
median,
n_unique,
Expand Down Expand Up @@ -138,6 +139,7 @@
"last",
"lit",
"map",
"map_groups",
"mean",
"median",
"n_unique",
Expand Down
Loading

0 comments on commit efa2641

Please sign in to comment.