Skip to content

Commit

Permalink
whatsnew 1.3.3, move tests, restore mypy
Browse files Browse the repository at this point in the history
  • Loading branch information
Dr-Irv committed Sep 9, 2021
1 parent e141123 commit 45f54d6
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 61 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.3.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ Fixed regressions
- Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
- Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
- Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`)

.. ---------------------------------------------------------------------------
Expand Down
10 changes: 9 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1152,11 +1152,14 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = Fals
def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
"""
Determine subclass-specific default value for 'numeric_only'.
For SeriesGroupBy we want the default to be False (to match Series behavior).
For DataFrameGroupBy we want it to be True (for backwards-compat).
Parameters
----------
numeric_only : bool or lib.no_default
Returns
-------
bool
Expand All @@ -1167,14 +1170,19 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
if self.obj.ndim == 2:
# i.e. DataFrameGroupBy
numeric_only = True
# GH#42395 GH#43108 GH#43154
# Regression from 1.2.5 to 1.3 caused object columns to be dropped
obj = self._obj_with_exclusions
check = obj._get_numeric_data()
if len(obj.columns) and not len(check.columns) and not obj.empty:
numeric_only = False
# TODO: v1.4+ Add FutureWarning

else:
numeric_only = False
return numeric_only
# error: Incompatible return value type (got "Union[bool, NoDefault]",
# expected "bool")
return numeric_only # type: ignore[return-value]

@cache_readonly
def _group_keys_index(self) -> Index:
Expand Down
60 changes: 0 additions & 60 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,8 @@
from pandas import (
DataFrame,
Index,
Int64Index,
MultiIndex,
Series,
Timedelta,
Timestamp,
date_range,
)
Expand Down Expand Up @@ -264,64 +262,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):

tm.assert_index_equal(result.columns, expected_columns)

def test_groupby_aggregation_non_numeric_dtype(self):
# GH #43108
df = DataFrame(
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
)

expected = DataFrame(
{
"v": [[1, 1], [10, 20]],
},
index=Index(["M", "W"], dtype="object", name="MW"),
)

gb = df.groupby(by=["MW"])
result = gb.sum()
tm.assert_frame_equal(result, expected)

def test_groupby_aggregation_multi_non_numeric_dtype(self):
# GH #42395
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
}
)

expected = DataFrame(
{
"y": [Timedelta(i, "days") for i in range(7, 9)],
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
},
index=Int64Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)

def test_groupby_aggregation_numeric_with_non_numeric_dtype(self):
# GH #43108
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": [i for i in range(1, 6)],
}
)

expected = DataFrame(
{"z": [7, 8]},
index=Int64Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


class TestGroupByNonCythonPaths:
# GH#5610 non-cython calls should not include the grouper
Expand Down
63 changes: 63 additions & 0 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
DataFrame,
Grouper,
Index,
Int64Index,
MultiIndex,
RangeIndex,
Series,
Timedelta,
Timestamp,
date_range,
read_csv,
Expand Down Expand Up @@ -2392,6 +2394,67 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_non_numeric_dtype():
# GH #43108
df = DataFrame(
[["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
)

expected = DataFrame(
{
"v": [[1, 1], [10, 20]],
},
index=Index(["M", "W"], dtype="object", name="MW"),
)

gb = df.groupby(by=["MW"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_multi_non_numeric_dtype():
# GH #42395
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": [Timedelta(i * 10, "days") for i in range(1, 6)],
}
)

expected = DataFrame(
{
"y": [Timedelta(i, "days") for i in range(7, 9)],
"z": [Timedelta(i * 10, "days") for i in range(7, 9)],
},
index=Int64Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_aggregation_numeric_with_non_numeric_dtype():
# GH #43108
df = DataFrame(
{
"x": [1, 0, 1, 1, 0],
"y": [Timedelta(i, "days") for i in range(1, 6)],
"z": list(range(1, 6)),
}
)

expected = DataFrame(
{"z": [7, 8]},
index=Int64Index([0, 1], dtype="int64", name="x"),
)

gb = df.groupby(by=["x"])
result = gb.sum()
tm.assert_frame_equal(result, expected)


def test_groupby_filtered_df_std():
# GH 16174
dicts = [
Expand Down

0 comments on commit 45f54d6

Please sign in to comment.