Skip to content

Commit

Permalink
DEPR: DataFrame GroupBy indexing with single items DeprecationWarning (
Browse files Browse the repository at this point in the history
  • Loading branch information
yehoshuadimarsky authored and jreback committed Jan 3, 2020
1 parent 6c1597e commit 6fc9852
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ for more details and examples.

.. ipython:: python
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ for more details and examples.

.. ipython:: python
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
Expand Down
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -577,6 +577,37 @@ Deprecations
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)

**Selecting Columns from a Grouped DataFrame**

When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,
a list of items should be used instead. (:issue:`23566`) For example:

.. code-block:: ipython
df = pd.DataFrame({
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": np.random.randn(8),
"C": np.random.randn(8),
})
g = df.groupby('A')
# single key, returns SeriesGroupBy
g['B']
# tuple of single key, returns SeriesGroupBy
g[('B',)]
# tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning
g[('B', 'C')]
# multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning
# (implicitly converts the passed strings into a single tuple)
g['B', 'C']
# proper way, returns DataFrameGroupBy
g[['B', 'C']]
.. _whatsnew_1000.prior_deprecations:


Expand Down
16 changes: 15 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -326,7 +327,7 @@ def _aggregate_multiple_funcs(self, arg):
return DataFrame(results, columns=columns)

def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
) -> Union[Series, DataFrame]:
"""
Wraps the output of a SeriesGroupBy operation into the expected result.
Expand Down Expand Up @@ -1578,6 +1579,19 @@ def filter(self, func, dropna=True, *args, **kwargs):

return self._apply_filter(indices, dropna)

def __getitem__(self, key):
# per GH 23566
if isinstance(key, tuple) and len(key) > 1:
# if len == 1, then it becomes a SeriesGroupBy and this is actually
# valid syntax, so don't raise warning
warnings.warn(
"Indexing with multiple keys (implicitly converted to a tuple "
"of keys) will be deprecated, use a list instead.",
FutureWarning,
stacklevel=2,
)
return super().__getitem__(key)

def _gotitem(self, key, ndim: int, subset=None):
"""
sub-classes to define
Expand Down
36 changes: 30 additions & 6 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,12 @@ def test_getitem_list_of_columns(self):
)

result = df.groupby("A")[["C", "D"]].mean()
result2 = df.groupby("A")["C", "D"].mean()
result3 = df.groupby("A")[df.columns[2:4]].mean()
result2 = df.groupby("A")[df.columns[2:4]].mean()

expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

def test_getitem_numeric_column_names(self):
# GH #13731
Expand All @@ -91,14 +89,40 @@ def test_getitem_numeric_column_names(self):
}
)
result = df.groupby(0)[df.columns[1:3]].mean()
result2 = df.groupby(0)[2, 4].mean()
result3 = df.groupby(0)[[2, 4]].mean()
result2 = df.groupby(0)[[2, 4]].mean()

expected = df.loc[:, [0, 2, 4]].groupby(0).mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

# per GH 23566 this should raise a FutureWarning
with tm.assert_produces_warning(FutureWarning):
df.groupby(0)[2, 4].mean()

def test_getitem_single_list_of_columns(self, df):
# per GH 23566 this should raise a FutureWarning
with tm.assert_produces_warning(FutureWarning):
df.groupby("A")["C", "D"].mean()

def test_getitem_single_column(self):
df = DataFrame(
{
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
"C": np.random.randn(8),
"D": np.random.randn(8),
"E": np.random.randn(8),
}
)

result = df.groupby("A")["C"].mean()

as_frame = df.loc[:, ["A", "C"]].groupby("A").mean()
as_series = as_frame.iloc[:, 0]
expected = as_series

tm.assert_series_equal(result, expected)


# grouping
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe):

def test_transform_select_columns(df):
f = lambda x: x.mean()
result = df.groupby("A")["C", "D"].transform(f)
result = df.groupby("A")[["C", "D"]].transform(f)

selection = df[["C", "D"]]
expected = selection.groupby(df["A"]).transform(f)
Expand Down

0 comments on commit 6fc9852

Please sign in to comment.