From 568c1add9979b6ed6c87e08aa8935f26857b9f56 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 29 Dec 2019 15:09:22 -0500 Subject: [PATCH 1/8] BUG: DataFrame GroupBy indexing with single items DeprecationWarning(#23566) --- pandas/core/groupby/generic.py | 14 +++++++++++++- pandas/tests/groupby/test_grouping.py | 8 +++++--- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 6b110a0c80c07..d44dcf65bbd01 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,6 +24,7 @@ Union, cast, ) +import warnings import numpy as np @@ -325,7 +326,7 @@ def _aggregate_multiple_funcs(self, arg): return DataFrame(results, columns=columns) def _wrap_series_output( - self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index, + self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index ) -> Union[Series, DataFrame]: """ Wraps the output of a SeriesGroupBy operation into the expected result. @@ -1574,6 +1575,17 @@ def filter(self, func, dropna=True, *args, **kwargs): return self._apply_filter(indices, dropna) + def __getitem__(self, key): + # per GH 23566 + if isinstance(key, tuple): + warnings.warn( + "Indexing with individual keys or with a tuple of keys " + "will be deprecated, use a list instead.", + DeprecationWarning, + stacklevel=2, + ) + return super().__getitem__(key) + def _gotitem(self, key, ndim: int, subset=None): """ sub-classes to define diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index f2af397357e4f..2cfa2b987f4ee 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -71,14 +71,16 @@ def test_getitem_list_of_columns(self): ) result = df.groupby("A")[["C", "D"]].mean() - result2 = df.groupby("A")["C", "D"].mean() - result3 = df.groupby("A")[df.columns[2:4]].mean() + result2 = df.groupby("A")[df.columns[2:4]].mean() expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean() tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) + + # per GH 23566 this should raise a deprecation warning + with tm.assert_produces_warning(DeprecationWarning): + df.groupby("A")["C", "D"].mean() def test_getitem_numeric_column_names(self): # GH #13731 From 90abdf0e61e5852948ce27d6ec30122e436b7767 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 29 Dec 2019 18:22:07 -0500 Subject: [PATCH 2/8] fixed more tests, added to whatsnew in docs --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/tests/groupby/test_grouping.py | 8 +++++--- pandas/tests/groupby/test_transform.py | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 96ea682dd3caf..43346ac534df3 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -523,6 +523,7 @@ Deprecations it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) +- When selecting columns from a :class:`DataFrameGroupBy` object, passing individual items in brackets is deprecated, should pass in a list of items instead. (:issue:`23566`) .. _whatsnew_1000.prior_deprecations: diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 2cfa2b987f4ee..ca2c3e361d435 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -93,14 +93,16 @@ def test_getitem_numeric_column_names(self): } ) result = df.groupby(0)[df.columns[1:3]].mean() - result2 = df.groupby(0)[2, 4].mean() - result3 = df.groupby(0)[[2, 4]].mean() + result2 = df.groupby(0)[[2, 4]].mean() expected = df.loc[:, [0, 2, 4]].groupby(0).mean() tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - tm.assert_frame_equal(result3, expected) + + # per GH 23566 this should raise a deprecation warning + with tm.assert_produces_warning(DeprecationWarning): + df.groupby(0)[2, 4].mean() # grouping diff --git a/pandas/tests/groupby/test_transform.py b/pandas/tests/groupby/test_transform.py index c46180c1d11cd..87bb1e52855c8 100644 --- a/pandas/tests/groupby/test_transform.py +++ b/pandas/tests/groupby/test_transform.py @@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe): def test_transform_select_columns(df): f = lambda x: x.mean() - result = df.groupby("A")["C", "D"].transform(f) + result = df.groupby("A")[["C", "D"]].transform(f) selection = df[["C", "D"]] expected = selection.groupby(df["A"]).transform(f) From c97ca486c9dd0965b1fa767be6f7dd8defaed42c Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 29 Dec 2019 18:23:47 -0500 Subject: [PATCH 3/8] flake8 fix --- pandas/tests/groupby/test_grouping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index ca2c3e361d435..bc9393b50f058 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -99,7 +99,7 @@ def test_getitem_numeric_column_names(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - + # per GH 23566 this should raise a deprecation warning with tm.assert_produces_warning(DeprecationWarning): df.groupby(0)[2, 4].mean() From 7a8f2d7fe991e84ccc5b7b769794cb1acf5f0296 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Sun, 29 Dec 2019 20:37:32 -0500 Subject: [PATCH 4/8] fixed examples in docs, added test, changed warning to only tuple with len > 1 --- .../comparison/comparison_with_sas.rst | 2 +- .../comparison/comparison_with_stata.rst | 2 +- pandas/core/groupby/generic.py | 6 +++--- pandas/tests/groupby/test_grouping.py | 21 +++++++++++++++++++ 4 files changed, 26 insertions(+), 5 deletions(-) diff --git a/doc/source/getting_started/comparison/comparison_with_sas.rst b/doc/source/getting_started/comparison/comparison_with_sas.rst index 69bb700c97b15..4e284fe7b5968 100644 --- a/doc/source/getting_started/comparison/comparison_with_sas.rst +++ b/doc/source/getting_started/comparison/comparison_with_sas.rst @@ -629,7 +629,7 @@ for more details and examples. .. ipython:: python - tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum() + tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() tips_summed.head() diff --git a/doc/source/getting_started/comparison/comparison_with_stata.rst b/doc/source/getting_started/comparison/comparison_with_stata.rst index db687386329bb..fec6bae1e0330 100644 --- a/doc/source/getting_started/comparison/comparison_with_stata.rst +++ b/doc/source/getting_started/comparison/comparison_with_stata.rst @@ -617,7 +617,7 @@ for more details and examples. .. ipython:: python - tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum() + tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum() tips_summed.head() diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d44dcf65bbd01..23243cff127f0 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1577,10 +1577,10 @@ def filter(self, func, dropna=True, *args, **kwargs): def __getitem__(self, key): # per GH 23566 - if isinstance(key, tuple): + if isinstance(key, tuple) and len(key) > 1: warnings.warn( - "Indexing with individual keys or with a tuple of keys " - "will be deprecated, use a list instead.", + "Indexing with multiple keys (implicitly converted to a tuple " + "of keys) will be deprecated, use a list instead.", DeprecationWarning, stacklevel=2, ) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index bc9393b50f058..88603ad30a555 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -104,6 +104,27 @@ def test_getitem_numeric_column_names(self): with tm.assert_produces_warning(DeprecationWarning): df.groupby(0)[2, 4].mean() + def test_getitem_single_column(self): + df = DataFrame( + { + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": ["one", "one", "two", "three", "two", "two", "one", "three"], + "C": np.random.randn(8), + "D": np.random.randn(8), + "E": np.random.randn(8), + } + ) + + result = df.groupby("A")["C"].mean() + + as_frame = df.loc[:, ["A", "C"]].groupby("A").mean() + as_series = as_frame.iloc[:, 0] + expected = as_series + + assert isinstance(result, Series) + assert not isinstance(result, DataFrame) + tm.assert_series_equal(result, expected) + # grouping # -------------------------------- From 8a66e55f4901993b6c6215e43a81e9d9b4c1ede6 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Tue, 31 Dec 2019 22:39:09 -0500 Subject: [PATCH 5/8] Changed to FutureWarning, cleaned up tests --- pandas/core/groupby/generic.py | 4 +++- pandas/tests/groupby/test_grouping.py | 15 +++++++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 23243cff127f0..351bbfb1a0c63 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1578,10 +1578,12 @@ def filter(self, func, dropna=True, *args, **kwargs): def __getitem__(self, key): # per GH 23566 if isinstance(key, tuple) and len(key) > 1: + # if len == 1, then it becomes a SeriesGroupBy and this is actually + # valid syntax, so don't raise warning warnings.warn( "Indexing with multiple keys (implicitly converted to a tuple " "of keys) will be deprecated, use a list instead.", - DeprecationWarning, + FutureWarning, stacklevel=2, ) return super().__getitem__(key) diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 88603ad30a555..04c707acafab2 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -78,10 +78,6 @@ def test_getitem_list_of_columns(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - # per GH 23566 this should raise a deprecation warning - with tm.assert_produces_warning(DeprecationWarning): - df.groupby("A")["C", "D"].mean() - def test_getitem_numeric_column_names(self): # GH #13731 df = DataFrame( @@ -100,10 +96,15 @@ def test_getitem_numeric_column_names(self): tm.assert_frame_equal(result, expected) tm.assert_frame_equal(result2, expected) - # per GH 23566 this should raise a deprecation warning - with tm.assert_produces_warning(DeprecationWarning): + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): df.groupby(0)[2, 4].mean() + def test_getitem_single_list_of_columns(self, df): + # per GH 23566 this should raise a FutureWarning + with tm.assert_produces_warning(FutureWarning): + df.groupby("A")["C", "D"].mean() + def test_getitem_single_column(self): df = DataFrame( { @@ -121,8 +122,6 @@ def test_getitem_single_column(self): as_series = as_frame.iloc[:, 0] expected = as_series - assert isinstance(result, Series) - assert not isinstance(result, DataFrame) tm.assert_series_equal(result, expected) From 991e54c8967c796af1fb0d26450c51a7829812d1 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Wed, 1 Jan 2020 19:41:00 -0500 Subject: [PATCH 6/8] DOC: Added examples in whatsnew --- doc/source/whatsnew/v1.0.0.rst | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a694e0da225aa..c62a5dfec4b0b 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -581,7 +581,37 @@ Deprecations it is recommended to use ``json_normalize`` as :func:`pandas.json_normalize` instead (:issue:`27586`). - :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`) - The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`) -- When selecting columns from a :class:`DataFrameGroupBy` object, passing individual items in brackets is deprecated, should pass in a list of items instead. (:issue:`23566`) + +**Selecting Columns from a Grouped DataFrame** + +When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, +a list of items should be used instead. (:issue:`23566`) For example: + +.. code-block:: ipython + + df = pd.DataFrame({ + "A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"], + "B": np.random.randn(8), + "C": np.random.randn(8), + }) + g = df.groupby('A') + + # single key, returns SeriesGroupBy + g['B'] + + # tuple of single key, returns SeriesGroupBy + g[('B',)] + + # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning + g[('B','C')] + + # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning + # (implicitly converts the passed strings into a single tuple) + g['B','C'] + + # proper way, returns DataFrameGroupBy + g[['B', 'C']] + .. _whatsnew_1000.prior_deprecations: From 87334d0e86e2359d6d6c0789304c557532aa808d Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Wed, 1 Jan 2020 22:43:39 -0500 Subject: [PATCH 7/8] Trailing whitespace --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index fb4a3408c6efe..4dd5fe30b1a50 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -586,7 +586,7 @@ Deprecations **Selecting Columns from a Grouped DataFrame** -When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, +When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated, a list of items should be used instead. (:issue:`23566`) For example: .. code-block:: ipython @@ -607,7 +607,7 @@ a list of items should be used instead. (:issue:`23566`) For example: # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning g[('B','C')] - # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning + # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning # (implicitly converts the passed strings into a single tuple) g['B','C'] From 725e3382fdb0f8967602cff0648fbac3bd0304e7 Mon Sep 17 00:00:00 2001 From: yehoshuadimarsky <24758845+yehoshuadimarsky@users.noreply.github.com> Date: Wed, 1 Jan 2020 23:05:35 -0500 Subject: [PATCH 8/8] Trailing whitespace #2 --- doc/source/whatsnew/v1.0.0.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 4dd5fe30b1a50..2404e60323294 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -605,11 +605,11 @@ a list of items should be used instead. (:issue:`23566`) For example: g[('B',)] # tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning - g[('B','C')] + g[('B', 'C')] # multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning # (implicitly converts the passed strings into a single tuple) - g['B','C'] + g['B', 'C'] # proper way, returns DataFrameGroupBy g[['B', 'C']]