whatsnew 1.3.3, move tests, restore mypy

pandas-dev · Sep 9, 2021 · 45f54d6 · 45f54d6
1 parent e141123
commit 45f54d6
Show file tree

Hide file tree

Showing 4 changed files with 73 additions and 61 deletions.
diff --git a/doc/source/whatsnew/v1.3.3.rst b/doc/source/whatsnew/v1.3.3.rst
@@ -26,6 +26,7 @@ Fixed regressions
 - Fixed regression in :func:`is_list_like` where objects with ``__iter__`` set to ``None`` would be identified as iterable (:issue:`43373`)
 - Fixed regression in :meth:`.Resampler.aggregate` when used after column selection would raise if ``func`` is a list of aggregation functions (:issue:`42905`)
 - Fixed regression in :meth:`DataFrame.corr` where Kendall correlation would produce incorrect results for columns with repeated values (:issue:`43401`)
+- Fixed regression in :meth:`DataFrame.groupby` where aggregation on columns with object types dropped results on those columns (:issue:`42395`, :issue:`43108`)
 
 .. ---------------------------------------------------------------------------
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -1152,11 +1152,14 @@ def _wrap_applied_output(self, data, keys, values, not_indexed_same: bool = Fals
     def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
         """
         Determine subclass-specific default value for 'numeric_only'.
+
         For SeriesGroupBy we want the default to be False (to match Series behavior).
         For DataFrameGroupBy we want it to be True (for backwards-compat).
+
         Parameters
         ----------
         numeric_only : bool or lib.no_default
+
         Returns
         -------
         bool
@@ -1167,14 +1170,19 @@ def _resolve_numeric_only(self, numeric_only: bool | lib.NoDefault) -> bool:
             if self.obj.ndim == 2:
                 # i.e. DataFrameGroupBy
                 numeric_only = True
+                # GH#42395 GH#43108 GH#43154
+                # Regression from 1.2.5 to 1.3 caused object columns to be dropped
                 obj = self._obj_with_exclusions
                 check = obj._get_numeric_data()
                 if len(obj.columns) and not len(check.columns) and not obj.empty:
                     numeric_only = False
+                    # TODO: v1.4+ Add FutureWarning
 
             else:
                 numeric_only = False
-        return numeric_only
+        # error: Incompatible return value type (got "Union[bool, NoDefault]",
+        # expected "bool")
+        return numeric_only  # type: ignore[return-value]
 
     @cache_readonly
     def _group_keys_index(self) -> Index:

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
@@ -10,10 +10,8 @@
 from pandas import (
     DataFrame,
     Index,
-    Int64Index,
     MultiIndex,
     Series,
-    Timedelta,
     Timestamp,
     date_range,
 )
@@ -264,64 +262,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric):
 
         tm.assert_index_equal(result.columns, expected_columns)
 
-    def test_groupby_aggregation_non_numeric_dtype(self):
-        # GH #43108
-        df = DataFrame(
-            [["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
-        )
-
-        expected = DataFrame(
-            {
-                "v": [[1, 1], [10, 20]],
-            },
-            index=Index(["M", "W"], dtype="object", name="MW"),
-        )
-
-        gb = df.groupby(by=["MW"])
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_aggregation_multi_non_numeric_dtype(self):
-        # GH #42395
-        df = DataFrame(
-            {
-                "x": [1, 0, 1, 1, 0],
-                "y": [Timedelta(i, "days") for i in range(1, 6)],
-                "z": [Timedelta(i * 10, "days") for i in range(1, 6)],
-            }
-        )
-
-        expected = DataFrame(
-            {
-                "y": [Timedelta(i, "days") for i in range(7, 9)],
-                "z": [Timedelta(i * 10, "days") for i in range(7, 9)],
-            },
-            index=Int64Index([0, 1], dtype="int64", name="x"),
-        )
-
-        gb = df.groupby(by=["x"])
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
-    def test_groupby_aggregation_numeric_with_non_numeric_dtype(self):
-        # GH #43108
-        df = DataFrame(
-            {
-                "x": [1, 0, 1, 1, 0],
-                "y": [Timedelta(i, "days") for i in range(1, 6)],
-                "z": [i for i in range(1, 6)],
-            }
-        )
-
-        expected = DataFrame(
-            {"z": [7, 8]},
-            index=Int64Index([0, 1], dtype="int64", name="x"),
-        )
-
-        gb = df.groupby(by=["x"])
-        result = gb.sum()
-        tm.assert_frame_equal(result, expected)
-
 
 class TestGroupByNonCythonPaths:
     # GH#5610 non-cython calls should not include the grouper

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -14,9 +14,11 @@
     DataFrame,
     Grouper,
     Index,
+    Int64Index,
     MultiIndex,
     RangeIndex,
     Series,
+    Timedelta,
     Timestamp,
     date_range,
     read_csv,
@@ -2392,6 +2394,67 @@ def test_groupby_empty_multi_column(as_index, numeric_only):
     tm.assert_frame_equal(result, expected)
 
 
+def test_groupby_aggregation_non_numeric_dtype():
+    # GH #43108
+    df = DataFrame(
+        [["M", [1]], ["M", [1]], ["W", [10]], ["W", [20]]], columns=["MW", "v"]
+    )
+
+    expected = DataFrame(
+        {
+            "v": [[1, 1], [10, 20]],
+        },
+        index=Index(["M", "W"], dtype="object", name="MW"),
+    )
+
+    gb = df.groupby(by=["MW"])
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_aggregation_multi_non_numeric_dtype():
+    # GH #42395
+    df = DataFrame(
+        {
+            "x": [1, 0, 1, 1, 0],
+            "y": [Timedelta(i, "days") for i in range(1, 6)],
+            "z": [Timedelta(i * 10, "days") for i in range(1, 6)],
+        }
+    )
+
+    expected = DataFrame(
+        {
+            "y": [Timedelta(i, "days") for i in range(7, 9)],
+            "z": [Timedelta(i * 10, "days") for i in range(7, 9)],
+        },
+        index=Int64Index([0, 1], dtype="int64", name="x"),
+    )
+
+    gb = df.groupby(by=["x"])
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_aggregation_numeric_with_non_numeric_dtype():
+    # GH #43108
+    df = DataFrame(
+        {
+            "x": [1, 0, 1, 1, 0],
+            "y": [Timedelta(i, "days") for i in range(1, 6)],
+            "z": list(range(1, 6)),
+        }
+    )
+
+    expected = DataFrame(
+        {"z": [7, 8]},
+        index=Int64Index([0, 1], dtype="int64", name="x"),
+    )
+
+    gb = df.groupby(by=["x"])
+    result = gb.sum()
+    tm.assert_frame_equal(result, expected)
+
+
 def test_groupby_filtered_df_std():
     # GH 16174
     dicts = [