Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: DataFrame GroupBy indexing with single items DeprecationWarning #30546

Merged
merged 11 commits into from
Jan 3, 2020
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -629,7 +629,7 @@ for more details and examples.

.. ipython:: python
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ for more details and examples.

.. ipython:: python
tips_summed = tips.groupby(['sex', 'smoker'])['total_bill', 'tip'].sum()
tips_summed = tips.groupby(['sex', 'smoker'])[['total_bill', 'tip']].sum()
tips_summed.head()
Expand Down
31 changes: 31 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -584,6 +584,37 @@ Deprecations
- :meth:`DataFrame.to_stata`, :meth:`DataFrame.to_feather`, and :meth:`DataFrame.to_parquet` argument "fname" is deprecated, use "path" instead (:issue:`23574`)
- The deprecated internal attributes ``_start``, ``_stop`` and ``_step`` of :class:`RangeIndex` now raise a ``FutureWarning`` instead of a ``DeprecationWarning`` (:issue:`26581`)

**Selecting Columns from a Grouped DataFrame**

When selecting columns from a :class:`DataFrameGroupBy` object, passing individual keys (or a tuple of keys) inside single brackets is deprecated,
a list of items should be used instead. (:issue:`23566`) For example:

.. code-block:: ipython

df = pd.DataFrame({
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": np.random.randn(8),
"C": np.random.randn(8),
})
g = df.groupby('A')

# single key, returns SeriesGroupBy
g['B']

# tuple of single key, returns SeriesGroupBy
g[('B',)]

# tuple of multiple keys, returns DataFrameGroupBy, raises FutureWarning
g[('B','C')]

# multiple keys passed directly, returns DataFrameGroupBy, raises FutureWarning
# (implicitly converts the passed strings into a single tuple)
g['B','C']

# proper way, returns DataFrameGroupBy
g[['B', 'C']]


.. _whatsnew_1000.prior_deprecations:


Expand Down
16 changes: 15 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Union,
cast,
)
import warnings

import numpy as np

Expand Down Expand Up @@ -326,7 +327,7 @@ def _aggregate_multiple_funcs(self, arg):
return DataFrame(results, columns=columns)

def _wrap_series_output(
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index,
self, output: Mapping[base.OutputKey, Union[Series, np.ndarray]], index: Index
) -> Union[Series, DataFrame]:
"""
Wraps the output of a SeriesGroupBy operation into the expected result.
Expand Down Expand Up @@ -1578,6 +1579,19 @@ def filter(self, func, dropna=True, *args, **kwargs):

return self._apply_filter(indices, dropna)

def __getitem__(self, key):
# per GH 23566
if isinstance(key, tuple) and len(key) > 1:
# if len == 1, then it becomes a SeriesGroupBy and this is actually
# valid syntax, so don't raise warning
warnings.warn(
"Indexing with multiple keys (implicitly converted to a tuple "
"of keys) will be deprecated, use a list instead.",
FutureWarning,
stacklevel=2,
)
return super().__getitem__(key)

def _gotitem(self, key, ndim: int, subset=None):
"""
sub-classes to define
Expand Down
36 changes: 30 additions & 6 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,14 +71,12 @@ def test_getitem_list_of_columns(self):
)

result = df.groupby("A")[["C", "D"]].mean()
result2 = df.groupby("A")["C", "D"].mean()
result3 = df.groupby("A")[df.columns[2:4]].mean()
result2 = df.groupby("A")[df.columns[2:4]].mean()

expected = df.loc[:, ["A", "C", "D"]].groupby("A").mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

def test_getitem_numeric_column_names(self):
# GH #13731
Expand All @@ -91,14 +89,40 @@ def test_getitem_numeric_column_names(self):
}
)
result = df.groupby(0)[df.columns[1:3]].mean()
result2 = df.groupby(0)[2, 4].mean()
result3 = df.groupby(0)[[2, 4]].mean()
result2 = df.groupby(0)[[2, 4]].mean()

expected = df.loc[:, [0, 2, 4]].groupby(0).mean()

tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result2, expected)
tm.assert_frame_equal(result3, expected)

# per GH 23566 this should raise a FutureWarning
with tm.assert_produces_warning(FutureWarning):
df.groupby(0)[2, 4].mean()

def test_getitem_single_list_of_columns(self, df):
# per GH 23566 this should raise a FutureWarning
with tm.assert_produces_warning(FutureWarning):
df.groupby("A")["C", "D"].mean()

def test_getitem_single_column(self):
df = DataFrame(
{
"A": ["foo", "bar", "foo", "bar", "foo", "bar", "foo", "foo"],
"B": ["one", "one", "two", "three", "two", "two", "one", "three"],
"C": np.random.randn(8),
"D": np.random.randn(8),
"E": np.random.randn(8),
}
)

result = df.groupby("A")["C"].mean()

as_frame = df.loc[:, ["A", "C"]].groupby("A").mean()
as_series = as_frame.iloc[:, 0]
expected = as_series

tm.assert_series_equal(result, expected)


# grouping
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ def test_dispatch_transform(tsframe):

def test_transform_select_columns(df):
f = lambda x: x.mean()
result = df.groupby("A")["C", "D"].transform(f)
result = df.groupby("A")[["C", "D"]].transform(f)

selection = df[["C", "D"]]
expected = selection.groupby(df["A"]).transform(f)
Expand Down