From 8f3af1fdb4d20e620e5ab5769cc904eb54ed57ea Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 3 Jan 2023 14:06:48 +0100 Subject: [PATCH 1/3] Remove Groupby.pivot --- .../polars/internals/dataframe/groupby.py | 66 --------------- py-polars/polars/internals/dataframe/pivot.py | 81 ------------------- 2 files changed, 147 deletions(-) delete mode 100644 py-polars/polars/internals/dataframe/pivot.py diff --git a/py-polars/polars/internals/dataframe/groupby.py b/py-polars/polars/internals/dataframe/groupby.py index 4501b4ab0a54..b737d48f7961 100644 --- a/py-polars/polars/internals/dataframe/groupby.py +++ b/py-polars/polars/internals/dataframe/groupby.py @@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Callable, Generic, Iterator, Sequence, TypeVar import polars.internals as pli -from polars.internals.dataframe.pivot import PivotOps from polars.utils import _timedelta_to_pl_duration, is_str_sequence if TYPE_CHECKING: @@ -361,71 +360,6 @@ def tail(self, n: int = 5) -> DF: ) return self._dataframe_class._from_pydf(df._df) - def pivot( - self, pivot_column: str | list[str], values_column: str | list[str] - ) -> PivotOps[DF]: - """ - Do a pivot operation. - - The pivot operation is based on the group key, a pivot column and an aggregation - function on the values column. - - .. deprecated:: 0.13.23 - `DataFrame.groupby.pivot` will be removed in favour of `DataFrame.pivot`. - - Parameters - ---------- - pivot_column - Column to pivot. - values_column - Column that will be aggregated. - - Notes - ----- - Polars'/arrow memory is not ideal for transposing operations like pivots. - If you have a relatively large table, consider using a groupby over a pivot. - - Examples - -------- - >>> df = pl.DataFrame( - ... { - ... "foo": ["one", "one", "one", "two", "two", "two"], - ... "bar": ["A", "B", "C", "A", "B", "C"], - ... "baz": [1, 2, 3, 4, 5, 6], - ... } - ... ) - >>> df.groupby("foo", maintain_order=True).pivot( # doctest: +SKIP - ... pivot_column="bar", values_column="baz" - ... ).first() - shape: (2, 4) - ┌─────┬─────┬─────┬─────┐ - │ foo ┆ A ┆ B ┆ C │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ str ┆ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╪═════╡ - │ one ┆ 1 ┆ 2 ┆ 3 │ - │ two ┆ 4 ┆ 5 ┆ 6 │ - └─────┴─────┴─────┴─────┘ - - """ - warnings.warn( - "`DataFrame.groupby.pivot` is deprecated and will be removed in a future" - " version. Use `DataFrame.pivot` instead.", - DeprecationWarning, - stacklevel=2, - ) - if isinstance(pivot_column, str): - pivot_column = [pivot_column] - if isinstance(values_column, str): - values_column = [values_column] - return PivotOps( - self._df, - self.by, # type: ignore[arg-type] - pivot_column, - values_column, - dataframe_class=self._dataframe_class, - ) - def first(self) -> pli.DataFrame: """ Aggregate the first values in the group. diff --git a/py-polars/polars/internals/dataframe/pivot.py b/py-polars/polars/internals/dataframe/pivot.py deleted file mode 100644 index 9d3c7679ef26..000000000000 --- a/py-polars/polars/internals/dataframe/pivot.py +++ /dev/null @@ -1,81 +0,0 @@ -from __future__ import annotations - -from typing import TYPE_CHECKING, Generic, TypeVar - -import polars.internals as pli -from polars.internals.type_aliases import PivotAgg - -if TYPE_CHECKING: - from polars.polars import PyDataFrame - -# A type variable used to refer to a polars.DataFrame or any subclass of it. -# Used to annotate DataFrame methods which returns the same type as self. -DF = TypeVar("DF", bound="pli.DataFrame") - - -class PivotOps(Generic[DF]): - """ - Utility class returned in a pivot operation. - - .. deprecated:: 0.13.23 - `PivotOps` will be removed in favour of `DataFrame.pivot`. - - """ - - def __init__( - self, - df: PyDataFrame, - by: str | list[str], - pivot_column: str | list[str], - values_column: str | list[str], - dataframe_class: type[DF], - ): - self._df = df - self.by = by - self.pivot_column = pivot_column - self.values_column = values_column - self._dataframe_class = dataframe_class - - def _execute(self, aggregate_fn: PivotAgg) -> DF: - return self._dataframe_class._from_pydf( - pli.wrap_df(self._df) - .pivot( - index=self.by, - columns=self.pivot_column, - values=self.values_column, - aggregate_fn=aggregate_fn, - ) - ._df - ) - - def first(self) -> DF: - """Get the first value per group.""" - return self._execute("first") - - def sum(self) -> DF: - """Get the sum per group.""" - return self._execute("sum") - - def min(self) -> DF: - """Get the minimal value per group.""" - return self._execute("min") - - def max(self) -> DF: - """Get the maximal value per group.""" - return self._execute("max") - - def mean(self) -> DF: - """Get the mean value per group.""" - return self._execute("mean") - - def count(self) -> DF: - """Count the values per group.""" - return self._execute("count") - - def median(self) -> DF: - """Get the median value per group.""" - return self._execute("median") - - def last(self) -> DF: - """Get the last value per group.""" - return self._execute("last") From b2d565f884f072056425445633c1516c3abe6ec2 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 3 Jan 2023 14:07:04 +0100 Subject: [PATCH 2/3] Update tests --- py-polars/tests/unit/test_df.py | 7 ------- py-polars/tests/unit/test_pivot.py | 28 ---------------------------- 2 files changed, 35 deletions(-) diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py index 0d56c82d0e06..fa93b8d1640b 100644 --- a/py-polars/tests/unit/test_df.py +++ b/py-polars/tests/unit/test_df.py @@ -2214,13 +2214,6 @@ class SubClassedDataFrame(pl.DataFrame): groupby = subclassed_df.groupby("a") assert isinstance(groupby.agg(pl.count()), SubClassedDataFrame) - # Round-trips to PivotOps and back should also preserve subclass - with pytest.deprecated_call(): - assert isinstance( - groupby.pivot(pivot_column="a", values_column="b").first(), - SubClassedDataFrame, - ) - def test_explode_empty() -> None: df = ( diff --git a/py-polars/tests/unit/test_pivot.py b/py-polars/tests/unit/test_pivot.py index 70b3087ff731..2414fb613caf 100644 --- a/py-polars/tests/unit/test_pivot.py +++ b/py-polars/tests/unit/test_pivot.py @@ -73,34 +73,6 @@ def test_pivot_aggregate(agg_fn: PivotAgg, expected_rows: list[tuple[Any]]) -> N assert result.rows() == expected_rows -@pytest.mark.parametrize( - ("agg_fn", "expected_rows"), - [ - ("first", [("a", 2, None, None), ("b", None, None, 10)]), - ("count", [("a", 2, None, None), ("b", None, 2, 1)]), - ("min", [("a", 2, None, None), ("b", None, 8, 10)]), - ("max", [("a", 4, None, None), ("b", None, 8, 10)]), - ("sum", [("a", 6, None, None), ("b", None, 8, 10)]), - ("mean", [("a", 3.0, None, None), ("b", None, 8.0, 10.0)]), - ("median", [("a", 3.0, None, None), ("b", None, 8.0, 10.0)]), - ], -) -def test_pivot_groupby_aggregate( - agg_fn: PivotAgg, expected_rows: list[tuple[Any]] -) -> None: - df = pl.DataFrame( - { - "a": [1, 1, 2, 2, 3], - "b": ["a", "a", "b", "b", "b"], - "c": [2, 4, None, 8, 10], - } - ) - with pytest.deprecated_call(): - pivot = df.groupby("b").pivot(pivot_column="a", values_column="c") - result = getattr(pivot, agg_fn)() - assert result.rows() == expected_rows - - def test_pivot_categorical_3968() -> None: df = pl.DataFrame( { From 3b374c2b5a164982b657da5ef2d296e2f00253c1 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Tue, 3 Jan 2023 17:53:15 +0100 Subject: [PATCH 3/3] Update API reference --- .../source/reference/dataframe/groupby.rst | 1 - .../docs/source/reference/dataframe/index.rst | 1 - .../docs/source/reference/dataframe/pivot.rst | 25 ------------------- 3 files changed, 27 deletions(-) delete mode 100644 py-polars/docs/source/reference/dataframe/pivot.rst diff --git a/py-polars/docs/source/reference/dataframe/groupby.rst b/py-polars/docs/source/reference/dataframe/groupby.rst index b63930a35487..83eef84c5f20 100644 --- a/py-polars/docs/source/reference/dataframe/groupby.rst +++ b/py-polars/docs/source/reference/dataframe/groupby.rst @@ -20,7 +20,6 @@ This namespace is available after calling :code:`DataFrame.groupby(...)`. GroupBy.median GroupBy.min GroupBy.n_unique - GroupBy.pivot GroupBy.quantile GroupBy.sum GroupBy.tail diff --git a/py-polars/docs/source/reference/dataframe/index.rst b/py-polars/docs/source/reference/dataframe/index.rst index 59d6d1ab3250..5fdaebbf9e27 100644 --- a/py-polars/docs/source/reference/dataframe/index.rst +++ b/py-polars/docs/source/reference/dataframe/index.rst @@ -16,7 +16,6 @@ This page gives an overview of all public DataFrame methods. groupby modify_select miscellaneous - pivot .. currentmodule:: polars diff --git a/py-polars/docs/source/reference/dataframe/pivot.rst b/py-polars/docs/source/reference/dataframe/pivot.rst deleted file mode 100644 index 9a9f9871216e..000000000000 --- a/py-polars/docs/source/reference/dataframe/pivot.rst +++ /dev/null @@ -1,25 +0,0 @@ -===== -Pivot -===== - -.. currentmodule:: polars - -This namespace is available after calling :code:`DataFrame.groupby(...).pivot` - -.. deprecated:: 0.13.23 - - Note that this API has been deprecated in favor of :meth:`DataFrame.pivot` - and will be removed in a future version. - -.. currentmodule:: polars.internals.dataframe.pivot -.. autosummary:: - :toctree: api/ - - PivotOps.count - PivotOps.first - PivotOps.last - PivotOps.max - PivotOps.mean - PivotOps.median - PivotOps.min - PivotOps.sum