Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python): Rename GroupBy.apply to map_groups #10799

Merged
merged 5 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/dataframe/group_by.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This namespace is available after calling :code:`DataFrame.group_by(...)`.
GroupBy.first
GroupBy.head
GroupBy.last
GroupBy.map_groups
GroupBy.max
GroupBy.mean
GroupBy.median
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ Miscellaneous
DataFrame.corr
DataFrame.frame_equal
DataFrame.lazy
DataFrame.map_rows
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ These functions are available from the polars module root and can be used as exp
last
lit
map
map_groups
max
max_horizontal
mean
Expand Down
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/lazyframe/group_by.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ This namespace comes available by calling `LazyFrame.group_by(..)`.
LazyGroupBy.first
LazyGroupBy.head
LazyGroupBy.last
LazyGroupBy.map_groups
LazyGroupBy.max
LazyGroupBy.mean
LazyGroupBy.median
Expand Down
2 changes: 2 additions & 0 deletions py-polars/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@
last,
lit,
map,
map_groups,
max,
max_horizontal,
mean,
Expand Down Expand Up @@ -333,6 +334,7 @@
"last",
"lit",
"map",
"map_groups",
"mean",
"median",
"n_unique",
Expand Down
209 changes: 78 additions & 131 deletions py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import polars._reexport as pl
from polars import functions as F
from polars.utils.convert import _timedelta_to_pl_duration
from polars.utils.deprecation import deprecate_renamed_function

if TYPE_CHECKING:
import sys
Expand Down Expand Up @@ -242,7 +243,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.

Expand Down Expand Up @@ -273,30 +274,16 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:

Examples
--------
For each color group sample two rows:

>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘

For each color group sample two rows:

>>> df.group_by("color").apply(
>>> df.group_by("color").map_groups(
... lambda group_df: group_df.sample(2)
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
Expand Down Expand Up @@ -325,15 +312,15 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
elif isinstance(self.by, Iterable) and all(isinstance(c, str) for c in self.by):
by = list(self.by) # type: ignore[arg-type]
else:
raise TypeError("cannot call `apply` when grouping by an expression")
raise TypeError("cannot call `map_groups` when grouping by an expression")

if all(isinstance(c, str) for c in self.more_by):
by.extend(self.more_by) # type: ignore[arg-type]
else:
raise TypeError("cannot call `apply` when grouping by an expression")
raise TypeError("cannot call `map_groups` when grouping by an expression")

return self.df.__class__._from_pydf(
self.df._df.group_by_apply(by, function, self.maintain_order)
self.df._df.group_by_map_groups(by, function, self.maintain_order)
)

def head(self, n: int = 5) -> DataFrame:
Expand Down Expand Up @@ -760,6 +747,22 @@ def sum(self) -> DataFrame:
"""
return self.agg(F.all().sum())

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.

.. deprecated:: 0.19.0
This method has been renamed to :func:`GroupBy.map_groups`.

Parameters
----------
function
Custom function.

"""
return self.map_groups(function)


class RollingGroupBy:
"""
Expand Down Expand Up @@ -866,7 +869,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(
def map_groups(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
Expand All @@ -883,7 +886,7 @@ def apply(

The idiomatic way to apply custom functions over multiple columns is using:

`pl.struct([my_columns]).apply(lambda struct_series: ..)`
`pl.struct([my_columns]).map_elements(lambda struct_series: ..)`

Parameters
----------
Expand All @@ -894,58 +897,6 @@ def apply(
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.


Examples
--------
>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘

For each color group sample two rows:

>>> (
... df.lazy()
... .group_by("color")
... .apply(lambda group_df: group_df.sample(2), schema=None)
... .collect()
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 4 ┆ red ┆ square │
│ 3 ┆ red ┆ triangle │
└─────┴───────┴──────────┘

It is better to implement this with an expression:

>>> (
... df.lazy()
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT

"""
return (
self.df.lazy()
Expand All @@ -957,10 +908,34 @@ def apply(
by=self.by,
check_sorted=self.check_sorted,
)
.apply(function, schema)
.map_groups(function, schema)
.collect(no_optimization=True)
)

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.

.. deprecated:: 0.19.0
This method has been renamed to :func:`RollingGroupBy.map_groups`.

Parameters
----------
function
Function to apply over each group of the `LazyFrame`.
schema
Schema of the output function. This has to be known statically. If the
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.

"""
return self.map_groups(function, schema)


class DynamicGroupBy:
"""
Expand Down Expand Up @@ -1084,7 +1059,7 @@ def agg(
.collect(no_optimization=True)
)

def apply(
def map_groups(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
Expand All @@ -1101,7 +1076,7 @@ def apply(

The idiomatic way to apply custom functions over multiple columns is using:

`pl.struct([my_columns]).apply(lambda struct_series: ..)`
`pl.struct([my_columns]).map_elements(lambda struct_series: ..)`

Parameters
----------
Expand All @@ -1112,58 +1087,6 @@ def apply(
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.


Examples
--------
>>> df = pl.DataFrame(
... {
... "id": [0, 1, 2, 3, 4],
... "color": ["red", "green", "green", "red", "red"],
... "shape": ["square", "triangle", "square", "triangle", "square"],
... }
... )
>>> df
shape: (5, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 0 ┆ red ┆ square │
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 3 ┆ red ┆ triangle │
│ 4 ┆ red ┆ square │
└─────┴───────┴──────────┘

For each color group sample two rows:

>>> (
... df.lazy()
... .group_by("color")
... .apply(lambda group_df: group_df.sample(2), schema=None)
... .collect()
... ) # doctest: +IGNORE_RESULT
shape: (4, 3)
┌─────┬───────┬──────────┐
│ id ┆ color ┆ shape │
│ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ str │
╞═════╪═══════╪══════════╡
│ 1 ┆ green ┆ triangle │
│ 2 ┆ green ┆ square │
│ 4 ┆ red ┆ square │
│ 3 ┆ red ┆ triangle │
└─────┴───────┴──────────┘

It is better to implement this with an expression:

>>> (
... df.lazy()
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT

"""
return (
self.df.lazy()
Expand All @@ -1179,6 +1102,30 @@ def apply(
start_by=self.start_by,
check_sorted=self.check_sorted,
)
.apply(function, schema)
.map_groups(function, schema)
.collect(no_optimization=True)
)

@deprecate_renamed_function("map_groups", version="0.19.0")
def apply(
self,
function: Callable[[DataFrame], DataFrame],
schema: SchemaDict | None,
) -> DataFrame:
"""
Apply a custom/user-defined function (UDF) over the groups as a new DataFrame.

.. deprecated:: 0.19.0
This method has been renamed to :func:`DynamicGroupBy.map_groups`.

Parameters
----------
function
Function to apply over each group of the `LazyFrame`.
schema
Schema of the output function. This has to be known statically. If the
given schema is incorrect, this is a bug in the caller's query and may
lead to errors. If set to None, polars assumes the schema is unchanged.

"""
return self.map_groups(function, schema)
2 changes: 2 additions & 0 deletions py-polars/polars/functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
implode,
last,
map,
map_groups,
mean,
median,
n_unique,
Expand Down Expand Up @@ -138,6 +139,7 @@
"last",
"lit",
"map",
"map_groups",
"mean",
"median",
"n_unique",
Expand Down
Loading