From 477d813b5a630bb792196391f19adac6012016f1 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 1 Apr 2021 20:20:24 -0400
Subject: [PATCH 01/16] BUG: groupby.agg/transform downcasts UDF results

---
 doc/source/whatsnew/v1.3.0.rst                |  1 +
 pandas/core/groupby/generic.py                | 12 +----
 pandas/core/groupby/groupby.py                |  3 --
 .../tests/groupby/aggregate/test_aggregate.py | 52 +++++++++++++++++--
 pandas/tests/groupby/aggregate/test_cython.py |  3 ++
 pandas/tests/groupby/test_categorical.py      |  2 +-
 pandas/tests/groupby/test_function.py         |  5 +-
 pandas/tests/groupby/test_groupby.py          |  6 +--
 .../tests/groupby/transform/test_transform.py |  8 +--
 pandas/tests/resample/test_datetime_index.py  |  5 ++
 .../tests/resample/test_resampler_grouper.py  |  2 +-
 pandas/tests/resample/test_timedelta.py       |  2 +-
 pandas/tests/reshape/test_crosstab.py         |  1 +
 pandas/tests/reshape/test_pivot.py            |  1 -
 14 files changed, 74 insertions(+), 29 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 5e95cd6e5ee10..1f5a3d2e5db1b 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -753,6 +753,7 @@ Groupby/resample/rolling
 - Bug in :class:`core.window.ewm.ExponentialMovingWindow` when calling ``__getitem__`` would not retain ``com``, ``span``, ``alpha`` or ``halflife`` attributes  (:issue:`40164`)
 - :class:`core.window.ewm.ExponentialMovingWindow` now raises a ``NotImplementedError`` when specifying ``times`` with ``adjust=False`` due to an incorrect calculation (:issue:`40098`)
 - Bug in :meth:`Series.asfreq` and :meth:`DataFrame.asfreq` dropping rows when the index is not sorted (:issue:`39805`)
+- Bug in :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`DataFrameGroupBy.transform`, and :meth:`SeriesGroupBy.transform` would possibly change the result dtype when ``func`` is callable (:issue:`21240`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 537772125e5fe..92df9b9b420e9 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -50,7 +50,6 @@
 )
 
 from pandas.core.dtypes.cast import (
-    find_common_type,
     maybe_cast_result_dtype,
     maybe_downcast_numeric,
 )
@@ -61,7 +60,6 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
-    is_numeric_dtype,
     is_scalar,
     needs_i8_conversion,
 )
@@ -562,8 +560,9 @@ def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs):
 
     def _transform_general(self, func, *args, **kwargs):
         """
-        Transform with a non-str `func`.
+        Transform with a callable func`.
         """
+        assert callable(func)
         klass = type(self._selected_obj)
 
         results = []
@@ -584,13 +583,6 @@ def _transform_general(self, func, *args, **kwargs):
             result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
-        # we will only try to coerce the result type if
-        # we have a numeric dtype, as these are *always* user-defined funcs
-        # the cython take a different path (and casting)
-        if is_numeric_dtype(result.dtype):
-            common_dtype = find_common_type([self._selected_obj.dtype, result.dtype])
-            if common_dtype is result.dtype:
-                result = maybe_downcast_numeric(result, self._selected_obj.dtype)
 
         result.name = self._selected_obj.name
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index a6c3cb3ff5d0b..fd70ef1ac8c04 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1232,9 +1232,6 @@ def _python_agg_general(self, func, *args, **kwargs):
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
 
-            if is_numeric_dtype(obj.dtype):
-                result = maybe_downcast_numeric(result, obj.dtype)
-
             if self.grouper._filter_empty_groups:
                 mask = counts.ravel() > 0
 
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index fc0b4d86e81bf..96c8b243daf2c 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -235,10 +235,10 @@ def test_aggregate_item_by_item(df):
 
     # GH5782
     # odd comparisons can result here, so cast to make easy
-    exp = Series(np.array([foo] * K), index=list("BCD"), dtype=np.float64, name="foo")
+    exp = Series(np.array([foo] * K), index=list("BCD"), name="foo")
     tm.assert_series_equal(result.xs("foo"), exp)
 
-    exp = Series(np.array([bar] * K), index=list("BCD"), dtype=np.float64, name="bar")
+    exp = Series(np.array([bar] * K), index=list("BCD"), name="bar")
     tm.assert_almost_equal(result.xs("bar"), exp)
 
     def aggfun(ser):
@@ -442,6 +442,48 @@ def test_bool_agg_dtype(op):
     assert is_integer_dtype(result)
 
 
+@pytest.mark.parametrize(
+    "keys, agg_index",
+    [
+        (["a"], Index([1], name="a")),
+        (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])),
+    ],
+)
+@pytest.mark.parametrize("input", [True, 1, 1.0])
+@pytest.mark.parametrize("dtype", [bool, int, float])
+@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"])
+def test_callable_result_dtype_frame(keys, agg_index, input, dtype, method):
+    # GH 21240
+    df = DataFrame({"a": [1], "b": [2], "c": [input]})
+    op = getattr(df.groupby(keys)[["c"]], method)
+    result = op(lambda x: x.astype(dtype).iloc[0])
+    expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
+    expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype(dtype)
+    if method == "apply":
+        expected.columns.names = [0]
+    tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize(
+    "keys, agg_index",
+    [
+        (["a"], Index([1], name="a")),
+        (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])),
+    ],
+)
+@pytest.mark.parametrize("input", [True, 1, 1.0])
+@pytest.mark.parametrize("dtype", [bool, int, float])
+@pytest.mark.parametrize("method", ["apply", "aggregate", "transform"])
+def test_callable_result_dtype_series(keys, agg_index, input, dtype, method):
+    # GH 21240
+    df = DataFrame({"a": [1], "b": [2], "c": [input]})
+    op = getattr(df.groupby(keys)["c"], method)
+    result = op(lambda x: x.astype(dtype).iloc[0])
+    expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
+    expected = Series([df["c"].iloc[0]], index=expected_index, name="c").astype(dtype)
+    tm.assert_series_equal(result, expected)
+
+
 def test_order_aggregate_multiple_funcs():
     # GH 25692
     df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
@@ -849,7 +891,11 @@ def test_multiindex_custom_func(func):
     data = [[1, 4, 2], [5, 7, 1]]
     df = DataFrame(data, columns=MultiIndex.from_arrays([[1, 1, 2], [3, 4, 3]]))
     result = df.groupby(np.array([0, 1])).agg(func)
-    expected_dict = {(1, 3): {0: 1, 1: 5}, (1, 4): {0: 4, 1: 7}, (2, 3): {0: 2, 1: 1}}
+    expected_dict = {
+        (1, 3): {0: 1.0, 1: 5.0},
+        (1, 4): {0: 4.0, 1: 7.0},
+        (2, 3): {0: 2.0, 1: 1.0},
+    }
     expected = DataFrame(expected_dict)
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index 4a8aabe41b754..a6eabac77c9bf 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -196,6 +196,9 @@ def test_cython_agg_empty_buckets(op, targop, observed):
 
     g = df.groupby(pd.cut(df[0], grps), observed=observed)
     expected = g.agg(lambda x: targop(x))
+    if observed and op not in ("min", "max"):
+        # TODO: cython_agg_general with mean/var should be float64
+        expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py
index da438826a939a..a522f9625a80a 100644
--- a/pandas/tests/groupby/test_categorical.py
+++ b/pandas/tests/groupby/test_categorical.py
@@ -1597,7 +1597,7 @@ def test_aggregate_categorical_with_isnan():
     index = MultiIndex.from_arrays([[1, 1], [1, 2]], names=("A", "B"))
     expected = DataFrame(
         data={
-            "numerical_col": [1.0, 0.0],
+            "numerical_col": [1, 0],
             "object_col": [0, 0],
             "categorical_col": [0, 0],
         },
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 515774eae009b..caf2ae752dbf0 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -440,6 +440,9 @@ def test_median_empty_bins(observed):
 
     result = df.groupby(bins, observed=observed).median()
     expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
+    if observed:
+        # TODO: groupby(..).median should be float64
+        expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 
@@ -616,7 +619,7 @@ def test_ops_general(op, targop):
     df = DataFrame(np.random.randn(1000))
     labels = np.random.randint(0, 50, size=1000).astype(float)
 
-    result = getattr(df.groupby(labels), op)().astype(float)
+    result = getattr(df.groupby(labels), op)()
     expected = df.groupby(labels).agg(targop)
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index de508b8cd78ec..5d1eb8a034960 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -302,10 +302,8 @@ def f(x):
         return float(len(x))
 
     agged = grouped.agg(f)
-    expected = Series([4, 2], index=["bar", "foo"])
-
-    tm.assert_series_equal(agged, expected, check_dtype=False)
-    assert issubclass(agged.dtype.type, np.dtype(dtype).type)
+    expected = Series([4.0, 2.0], index=["bar", "foo"])
+    tm.assert_series_equal(agged, expected)
 
 
 def test_indices_concatenation_order():
diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py
index 9350a3fcd3036..aa0f90e83ab5b 100644
--- a/pandas/tests/groupby/transform/test_transform.py
+++ b/pandas/tests/groupby/transform/test_transform.py
@@ -242,7 +242,7 @@ def test_transform_bug():
     # transforming on a datetime column
     df = DataFrame({"A": Timestamp("20130101"), "B": np.arange(5)})
     result = df.groupby("A")["B"].transform(lambda x: x.rank(ascending=False))
-    expected = Series(np.arange(5, 0, step=-1), name="B")
+    expected = Series(np.arange(5, 0, step=-1), name="B", dtype="float64")
     tm.assert_series_equal(result, expected)
 
 
@@ -493,7 +493,7 @@ def test_groupby_transform_with_int():
     )
     with np.errstate(all="ignore"):
         result = df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
-    expected = DataFrame({"B": np.nan, "C": [-1, 0, 1, -1, 0, 1]})
+    expected = DataFrame({"B": np.nan, "C": [-1.0, 0.0, 1.0, -1.0, 0.0, 1.0]})
     tm.assert_frame_equal(result, expected)
 
     # int that needs float conversion
@@ -509,9 +509,9 @@ def test_groupby_transform_with_int():
     expected = DataFrame({"B": np.nan, "C": concat([s1, s2])})
     tm.assert_frame_equal(result, expected)
 
-    # int downcasting
+    # int doesn't get downcasted
     result = df.groupby("A").transform(lambda x: x * 2 / 2)
-    expected = DataFrame({"B": 1, "C": [2, 3, 4, 10, 5, -1]})
+    expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]})
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 71e6aa38d60e5..a8edb2fddfaab 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1204,6 +1204,9 @@ def test_resample_median_bug_1688():
 
         result = df.resample("T").apply(lambda x: x.mean())
         exp = df.asfreq("T")
+        if dtype == "float32":
+            # TODO: fastpath for apply comes back at float64
+            exp = exp.astype("float64")
         tm.assert_frame_equal(result, exp)
 
         result = df.resample("T").median()
@@ -1684,6 +1687,8 @@ def f(data, add_arg):
     df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
     result = df.groupby("A").resample("D").agg(f, multiplier)
     expected = df.groupby("A").resample("D").mean().multiply(multiplier)
+    # TODO: resample(...).mean should be a float64
+    expected = expected.astype("float64")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index 999d8a6c90ba2..b688594c388b5 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -289,7 +289,7 @@ def test_apply_columns_multilevel():
     agg_dict = {col: (np.sum if col[3] == "one" else np.mean) for col in df.columns}
     result = df.resample("H").apply(lambda x: agg_dict[x.name](x))
     expected = DataFrame(
-        np.array([0] * 4).reshape(2, 2),
+        2 * [[0, 0.0]],
         index=date_range(start="2017-01-01", freq="1H", periods=2),
         columns=pd.MultiIndex.from_tuples(
             [("A", "a", "", "one"), ("B", "b", "i", "two")]
diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py
index c6ee295208607..b1560623cd871 100644
--- a/pandas/tests/resample/test_timedelta.py
+++ b/pandas/tests/resample/test_timedelta.py
@@ -162,7 +162,7 @@ def test_resample_with_timedelta_yields_no_empty_groups():
     result = df.loc["1s":, :].resample("3s").apply(lambda x: len(x))
 
     expected = DataFrame(
-        [[768.0] * 4] * 12 + [[528.0] * 4],
+        [[768] * 4] * 12 + [[528] * 4],
         index=timedelta_range(start="1s", periods=13, freq="3s"),
     )
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 5cc65feee869b..fe77e7fbd6737 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -559,6 +559,7 @@ def test_crosstab_with_numpy_size(self):
         expected = DataFrame(
             expected_data, index=expected_index, columns=expected_column
         )
+        expected["All"] = expected["All"].astype("int64")
         tm.assert_frame_equal(result, expected)
 
     def test_crosstab_duplicate_names(self):
diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py
index 8d8a83c233444..669c281c03ac1 100644
--- a/pandas/tests/reshape/test_pivot.py
+++ b/pandas/tests/reshape/test_pivot.py
@@ -988,7 +988,6 @@ def test_margins_dtype(self):
 
         tm.assert_frame_equal(expected, result)
 
-    @pytest.mark.xfail(reason="GH#17035 (len of floats is casted back to floats)")
     def test_margins_dtype_len(self):
         mi_val = list(product(["bar", "foo"], ["one", "two"])) + [("All", "")]
         mi = MultiIndex.from_tuples(mi_val, names=("A", "B"))

From f2069a7b214ea0ac15b75ea854d09af825cac47d Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 10 Apr 2021 10:48:35 -0400
Subject: [PATCH 02/16] Reverted behavior change when input and output are the
 same kind

---
 pandas/core/dtypes/cast.py                   | 7 ++++++-
 pandas/core/groupby/generic.py               | 8 ++++++++
 pandas/core/groupby/groupby.py               | 3 +++
 pandas/tests/groupby/test_groupby.py         | 6 +++++-
 pandas/tests/resample/test_datetime_index.py | 3 ---
 5 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index f62aa95e1e814..f642ffa8384f5 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -304,7 +304,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
 
 
 def maybe_downcast_numeric(
-    result: ArrayLike, dtype: DtypeObj, do_round: bool = False
+    result: ArrayLike, dtype: DtypeObj, do_round: bool = False, same_kind: bool = False
 ) -> ArrayLike:
     """
     Subset of maybe_downcast_to_dtype restricted to numeric dtypes.
@@ -314,6 +314,9 @@ def maybe_downcast_numeric(
     result : ndarray or ExtensionArray
     dtype : np.dtype or ExtensionDtype
     do_round : bool
+    same_kind: bool
+        Whether to only possibly downcast when result.dtype is the same kind
+        as dtype.
 
     Returns
     -------
@@ -332,6 +335,8 @@ def trans(x):
         # don't allow upcasts here (except if empty)
         if result.dtype.itemsize <= dtype.itemsize and result.size:
             return result
+    elif same_kind:
+        return result
 
     if is_bool_dtype(dtype) or is_integer_dtype(dtype):
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 1b8cd7a2cb21b..ae0987a5dd70c 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -55,6 +55,7 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
+    is_numeric_dtype,
     is_scalar,
     needs_i8_conversion,
 )
@@ -578,6 +579,13 @@ def _transform_general(self, func, *args, **kwargs):
             result = self._set_result_index_ordered(concatenated)
         else:
             result = self.obj._constructor(dtype=np.float64)
+        # we will only try to coerce the result type if
+        # we have a numeric dtype, as these are *always* user-defined funcs
+        # the cython take a different path (and casting)
+        if is_numeric_dtype(result.dtype):
+            result = maybe_downcast_numeric(
+                result, self._selected_obj.dtype, same_kind=True
+            )
 
         result.name = self._selected_obj.name
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 8b87ad4538c6b..b63c4320a6b38 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1226,6 +1226,9 @@ def _python_agg_general(self, func, *args, **kwargs):
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
 
+            if is_numeric_dtype(obj.dtype):
+                result = maybe_downcast_numeric(result, obj.dtype, same_kind=True)
+
             if self.grouper._filter_empty_groups:
                 mask = counts.ravel() > 0
 
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 951c0d994a864..f639d7edf2b87 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -302,7 +302,11 @@ def f(x):
         return float(len(x))
 
     agged = grouped.agg(f)
-    expected = Series([4.0, 2.0], index=["bar", "foo"])
+
+    # precision will only be preserved when the input dtype is the same kind as output
+    expected = Series(
+        [4.0, 2.0], index=["bar", "foo"], dtype=dtype if dtype == "float32" else None
+    )
     tm.assert_series_equal(agged, expected)
 
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index a8edb2fddfaab..bf9d8624c363d 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1204,9 +1204,6 @@ def test_resample_median_bug_1688():
 
         result = df.resample("T").apply(lambda x: x.mean())
         exp = df.asfreq("T")
-        if dtype == "float32":
-            # TODO: fastpath for apply comes back at float64
-            exp = exp.astype("float64")
         tm.assert_frame_equal(result, exp)
 
         result = df.resample("T").median()

From 35c789f580c6fd204155d498fed2be41d4a9f876 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 10 Apr 2021 15:04:00 -0400
Subject: [PATCH 03/16] Patch via maybe_convert_objects

---
 pandas/_libs/lib.pyx                          | 63 +++++++++++--------
 pandas/core/dtypes/cast.py                    |  7 +--
 pandas/core/groupby/generic.py                |  5 --
 pandas/core/groupby/groupby.py                |  3 -
 .../frame/constructors/test_from_records.py   |  2 +-
 pandas/tests/frame/test_constructors.py       |  2 +-
 .../tests/groupby/aggregate/test_aggregate.py | 21 +++++--
 pandas/tests/groupby/test_groupby.py          | 10 +--
 pandas/tests/indexing/test_coercion.py        |  2 +-
 pandas/tests/resample/test_datetime_index.py  |  3 +
 10 files changed, 61 insertions(+), 57 deletions(-)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e816bd4cd4026..ab8021018b363 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -2211,7 +2211,7 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
         Array of converted object values to more specific dtypes if applicable.
     """
     cdef:
-        Py_ssize_t i, n
+        Py_ssize_t i, n, itemsize = 0
         ndarray[float64_t] floats
         ndarray[complex128_t] complexes
         ndarray[int64_t] ints
@@ -2244,6 +2244,12 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
 
     for i in range(n):
         val = objects[i]
+        if (
+            hasattr(val, "dtype")
+            and hasattr(val.dtype, "itemsize")
+            and val.dtype.itemsize > itemsize
+        ):
+            itemsize = val.dtype.itemsize
 
         if val is None:
             seen.null_ = True
@@ -2345,50 +2351,51 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
         seen.object_ = True
 
     if not seen.object_:
+        result = None
         if not safe:
             if seen.null_ or seen.nan_:
                 if seen.is_float_or_complex:
                     if seen.complex_:
-                        return complexes
+                        result = complexes
                     elif seen.float_:
-                        return floats
+                        result = floats
                     elif seen.int_:
                         if convert_to_nullable_integer:
                             from pandas.core.arrays import IntegerArray
-                            return IntegerArray(ints, mask)
+                            result = IntegerArray(ints, mask)
                         else:
-                            return floats
+                            result = floats
                     elif seen.nan_:
-                        return floats
+                        result = floats
             else:
                 if not seen.bool_:
                     if seen.datetime_:
                         if not seen.numeric_ and not seen.timedelta_:
-                            return datetimes
+                            result = datetimes
                     elif seen.timedelta_:
                         if not seen.numeric_:
-                            return timedeltas
+                            result = timedeltas
                     elif seen.nat_:
                         if not seen.numeric_:
                             if convert_datetime and convert_timedelta:
                                 # TODO: array full of NaT ambiguity resolve here needed
                                 pass
                             elif convert_datetime:
-                                return datetimes
+                                result = datetimes
                             elif convert_timedelta:
-                                return timedeltas
+                                result = timedeltas
                     else:
                         if seen.complex_:
-                            return complexes
+                            result = complexes
                         elif seen.float_:
-                            return floats
+                            result = floats
                         elif seen.int_:
                             if seen.uint_:
-                                return uints
+                                result = uints
                             else:
-                                return ints
+                                result = ints
                 elif seen.is_bool:
-                    return bools.view(np.bool_)
+                    result = bools.view(np.bool_)
 
         else:
             # don't cast int to float, etc.
@@ -2396,41 +2403,45 @@ def maybe_convert_objects(ndarray[object] objects, bint try_float=False,
                 if seen.is_float_or_complex:
                     if seen.complex_:
                         if not seen.int_:
-                            return complexes
+                            result = complexes
                     elif seen.float_ or seen.nan_:
                         if not seen.int_:
-                            return floats
+                            result = floats
             else:
                 if not seen.bool_:
                     if seen.datetime_:
                         if not seen.numeric_ and not seen.timedelta_:
-                            return datetimes
+                            result = datetimes
                     elif seen.timedelta_:
                         if not seen.numeric_:
-                            return timedeltas
+                            result = timedeltas
                     elif seen.nat_:
                         if not seen.numeric_:
                             if convert_datetime and convert_timedelta:
                                 # TODO: array full of NaT ambiguity resolve here needed
                                 pass
                             elif convert_datetime:
-                                return datetimes
+                                result = datetimes
                             elif convert_timedelta:
-                                return timedeltas
+                                result = timedeltas
                     else:
                         if seen.complex_:
                             if not seen.int_:
-                                return complexes
+                                result = complexes
                         elif seen.float_ or seen.nan_:
                             if not seen.int_:
-                                return floats
+                                result = floats
                         elif seen.int_:
                             if seen.uint_:
-                                return uints
+                                result = uints
                             else:
-                                return ints
+                                result = ints
                 elif seen.is_bool and not seen.nan_:
-                    return bools.view(np.bool_)
+                    result = bools.view(np.bool_)
+        if result is not None:
+            if itemsize > 0 and itemsize != result.dtype.itemsize:
+                result = result.astype(result.dtype.kind + str(itemsize))
+            return result
 
     return objects
 
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index f642ffa8384f5..f62aa95e1e814 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -304,7 +304,7 @@ def maybe_downcast_to_dtype(result: ArrayLike, dtype: str | np.dtype) -> ArrayLi
 
 
 def maybe_downcast_numeric(
-    result: ArrayLike, dtype: DtypeObj, do_round: bool = False, same_kind: bool = False
+    result: ArrayLike, dtype: DtypeObj, do_round: bool = False
 ) -> ArrayLike:
     """
     Subset of maybe_downcast_to_dtype restricted to numeric dtypes.
@@ -314,9 +314,6 @@ def maybe_downcast_numeric(
     result : ndarray or ExtensionArray
     dtype : np.dtype or ExtensionDtype
     do_round : bool
-    same_kind: bool
-        Whether to only possibly downcast when result.dtype is the same kind
-        as dtype.
 
     Returns
     -------
@@ -335,8 +332,6 @@ def trans(x):
         # don't allow upcasts here (except if empty)
         if result.dtype.itemsize <= dtype.itemsize and result.size:
             return result
-    elif same_kind:
-        return result
 
     if is_bool_dtype(dtype) or is_integer_dtype(dtype):
 
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ae0987a5dd70c..e02d96e6dd3f3 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -55,7 +55,6 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
-    is_numeric_dtype,
     is_scalar,
     needs_i8_conversion,
 )
@@ -582,10 +581,6 @@ def _transform_general(self, func, *args, **kwargs):
         # we will only try to coerce the result type if
         # we have a numeric dtype, as these are *always* user-defined funcs
         # the cython take a different path (and casting)
-        if is_numeric_dtype(result.dtype):
-            result = maybe_downcast_numeric(
-                result, self._selected_obj.dtype, same_kind=True
-            )
 
         result.name = self._selected_obj.name
         return result
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index b63c4320a6b38..8b87ad4538c6b 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -1226,9 +1226,6 @@ def _python_agg_general(self, func, *args, **kwargs):
             assert result is not None
             key = base.OutputKey(label=name, position=idx)
 
-            if is_numeric_dtype(obj.dtype):
-                result = maybe_downcast_numeric(result, obj.dtype, same_kind=True)
-
             if self.grouper._filter_empty_groups:
                 mask = counts.ravel() > 0
 
diff --git a/pandas/tests/frame/constructors/test_from_records.py b/pandas/tests/frame/constructors/test_from_records.py
index e8d0a789e7cbd..35ad9f3e9693b 100644
--- a/pandas/tests/frame/constructors/test_from_records.py
+++ b/pandas/tests/frame/constructors/test_from_records.py
@@ -117,7 +117,7 @@ def test_from_records_sequencelike(self):
         result = DataFrame.from_records(tuples, exclude=exclude)
         result.columns = [columns[i] for i in sorted(columns_to_test)]
         tm.assert_series_equal(result["C"], df["C"])
-        tm.assert_series_equal(result["E1"], df["E1"].astype("float64"))
+        tm.assert_series_equal(result["E1"], df["E1"])
 
     def test_from_records_sequencelike_empty(self):
         # empty case
diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py
index ca68885fdc470..966467dd878e2 100644
--- a/pandas/tests/frame/test_constructors.py
+++ b/pandas/tests/frame/test_constructors.py
@@ -1929,7 +1929,7 @@ def test_constructor_for_list_with_dtypes(self):
 
         df = DataFrame([np.array(np.arange(5), dtype="int32") for x in range(5)])
         result = df.dtypes
-        expected = Series([np.dtype("int64")] * 5)
+        expected = Series([np.dtype("int32")] * 5)
         tm.assert_series_equal(result, expected)
 
         # overflow issue? (we always expected int64 upcasting here)
diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 96c8b243daf2c..4398cdc3f7b5d 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -449,16 +449,25 @@ def test_bool_agg_dtype(op):
         (["a", "b"], MultiIndex([[1], [2]], [[0], [0]], names=["a", "b"])),
     ],
 )
-@pytest.mark.parametrize("input", [True, 1, 1.0])
-@pytest.mark.parametrize("dtype", [bool, int, float])
+@pytest.mark.parametrize(
+    "input_dtype", ["bool", "int32", "int64", "float32", "float64"]
+)
+@pytest.mark.parametrize(
+    "result_dtype", ["bool", "int32", "int64", "float32", "float64"]
+)
 @pytest.mark.parametrize("method", ["apply", "aggregate", "transform"])
-def test_callable_result_dtype_frame(keys, agg_index, input, dtype, method):
+def test_callable_result_dtype_frame(
+    keys, agg_index, input_dtype, result_dtype, method
+):
     # GH 21240
-    df = DataFrame({"a": [1], "b": [2], "c": [input]})
+    df = DataFrame({"a": [1], "b": [2], "c": [True]})
+    df["c"] = df["c"].astype(input_dtype)
     op = getattr(df.groupby(keys)[["c"]], method)
-    result = op(lambda x: x.astype(dtype).iloc[0])
+    result = op(lambda x: x.astype(result_dtype).iloc[0])
     expected_index = pd.RangeIndex(0, 1) if method == "transform" else agg_index
-    expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype(dtype)
+    expected = DataFrame({"c": [df["c"].iloc[0]]}, index=expected_index).astype(
+        result_dtype
+    )
     if method == "apply":
         expected.columns.names = [0]
     tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index f639d7edf2b87..a26efa5bbb3f5 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -99,10 +99,7 @@ def max_value(group):
 
     applied = df.groupby("A").apply(max_value)
     result = applied.dtypes
-    expected = Series(
-        [np.dtype("object")] * 2 + [np.dtype("float64")] * 2 + [np.dtype("int64")],
-        index=["A", "B", "C", "D", "value"],
-    )
+    expected = df.dtypes
     tm.assert_series_equal(result, expected)
 
 
@@ -303,10 +300,7 @@ def f(x):
 
     agged = grouped.agg(f)
 
-    # precision will only be preserved when the input dtype is the same kind as output
-    expected = Series(
-        [4.0, 2.0], index=["bar", "foo"], dtype=dtype if dtype == "float32" else None
-    )
+    expected = Series([4.0, 2.0], index=["bar", "foo"])
     tm.assert_series_equal(agged, expected)
 
 
diff --git a/pandas/tests/indexing/test_coercion.py b/pandas/tests/indexing/test_coercion.py
index 7642f78076dcb..2bb9b51df2285 100644
--- a/pandas/tests/indexing/test_coercion.py
+++ b/pandas/tests/indexing/test_coercion.py
@@ -641,7 +641,7 @@ def test_where_series_complex128(self, fill_val, exp_dtype):
             values = klass([True, False, True, True])
         else:
             values = klass(x * fill_val for x in [5, 6, 7, 8])
-        exp = klass([1 + 1j, values[1], 3 + 3j, values[3]])
+        exp = klass([1 + 1j, values[1], 3 + 3j, values[3]], dtype=exp_dtype)
         self._assert_where_conversion(obj, cond, values, exp, exp_dtype)
 
     @pytest.mark.parametrize(
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index bf9d8624c363d..6f6ba29dde358 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1204,6 +1204,9 @@ def test_resample_median_bug_1688():
 
         result = df.resample("T").apply(lambda x: x.mean())
         exp = df.asfreq("T")
+        if dtype == "float32":
+            # x.mean() is returning a python float
+            exp = exp.astype("float64")
         tm.assert_frame_equal(result, exp)
 
         result = df.resample("T").median()

From 1cb216ed24c63d971337e1f14fca96d76c88dd1e Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 21 Apr 2021 22:33:37 -0400
Subject: [PATCH 04/16] fixups

---
 pandas/tests/groupby/aggregate/test_aggregate.py | 1 -
 pandas/tests/resample/test_datetime_index.py     | 4 ++--
 pandas/tests/reshape/test_crosstab.py            | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py
index 267e4ef89797d..ad8f5514eb7fb 100644
--- a/pandas/tests/groupby/aggregate/test_aggregate.py
+++ b/pandas/tests/groupby/aggregate/test_aggregate.py
@@ -234,7 +234,6 @@ def test_aggregate_item_by_item(df):
     K = len(result.columns)
 
     # GH5782
-    # odd comparisons can result here, so cast to make easy
     exp = Series(np.array([foo] * K), index=list("BCD"), name="foo")
     tm.assert_series_equal(result.xs("foo"), exp)
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 11d4c19b9d0bc..5d4f0f53200ee 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1205,7 +1205,7 @@ def test_resample_median_bug_1688():
         result = df.resample("T").apply(lambda x: x.mean())
         exp = df.asfreq("T")
         if dtype == "float32":
-            # Empty groups cause x.mean() to return float64
+            # TODO: Empty groups cause x.mean() to return float64
             exp = exp.astype("float64")
         tm.assert_frame_equal(result, exp)
 
@@ -1687,7 +1687,7 @@ def f(data, add_arg):
     df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
     result = df.groupby("A").resample("D").agg(f, multiplier)
     expected = df.groupby("A").resample("D").mean().multiply(multiplier)
-    # TODO: resample(...).mean should be a float64
+    # TODO: resample(...).mean should be a float instead of int
     expected = expected.astype("float64")
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py
index 1cbca3aae0c70..62fd93026d5e2 100644
--- a/pandas/tests/reshape/test_crosstab.py
+++ b/pandas/tests/reshape/test_crosstab.py
@@ -559,6 +559,7 @@ def test_crosstab_with_numpy_size(self):
         expected = DataFrame(
             expected_data, index=expected_index, columns=expected_column
         )
+        # aggfunc is np.size, resulting in integers
         expected["All"] = expected["All"].astype("int64")
         tm.assert_frame_equal(result, expected)
 

From 0cafcee7f3853277f3db9e65d4c9a1aabf0dedb6 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Wed, 21 Apr 2021 23:29:44 -0400
Subject: [PATCH 05/16] whatsnew

---
 doc/source/whatsnew/v1.3.0.rst | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 8ce0a8f42a629..0094a08d5694f 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -298,6 +298,36 @@ Preserve dtypes in  :meth:`~pandas.DataFrame.combine_first`
 
    combined.dtypes
 
+Group by methods agg and transform no longer changes return dtype for callables
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Previously the methods :meth:`.DataFrameGroupBy.aggregate`,
+:meth:`.SeriesGroupBy.aggregate`, :meth:`.DataFrameGroupBy.transform`, and
+:meth:`.SeriesGroupBy.transform` might cast the result dtype when the argument ``func``
+is callable, possibly leading to undesirable results (:issue:`21240`). The cast would
+occur if the result is numeric and casting back to the input dtype does not change any
+values as measured by ``np.allclose``. Now no such casting occurs.
+
+.. ipython:: python
+
+    df = pd.DataFrame({'key': [1, 1], 'a': [True, False], 'b': [True, True]})
+    df
+
+*pandas 1.2.x*
+
+.. code-block:: ipython
+
+    In [5]: df.groupby('key').agg(lambda x: x.sum())
+    Out[5]:
+            a  b
+    key
+    1    True  2
+
+*pandas 1.3.0*
+
+.. ipython:: python
+
+    In [5]: df.groupby('key').agg(lambda x: x.sum())
 
 Try operating inplace when setting values with ``loc`` and ``iloc``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -846,7 +876,6 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` incorrectly rounding integer values near the ``int64`` implementations bounds (:issue:`40767`)
 - Bug in :meth:`.GroupBy.rank` with nullable dtypes incorrectly raising ``TypeError`` (:issue:`41010`)
 - Bug in :meth:`.GroupBy.cummin` and :meth:`.GroupBy.cummax` computing wrong result with nullable data types too large to roundtrip when casting to float (:issue:`37493`)
-- Bug in :meth:`DataFrameGroupBy.aggregate`, :meth:`SeriesGroupBy.aggregate`, :meth:`DataFrameGroupBy.transform`, and :meth:`SeriesGroupBy.transform` would possibly change the result dtype when ``func`` is callable (:issue:`21240`)
 
 Reshaping
 ^^^^^^^^^

From 785ac9d734896d6777e8a9b3e77907ef9ad1ca33 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 23 Apr 2021 00:09:21 -0400
Subject: [PATCH 06/16] dtype test fixes

---
 pandas/tests/groupby/test_function.py           | 3 ---
 pandas/tests/resample/test_resampler_grouper.py | 2 ++
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 4f55173b6680b..77fc7d360f619 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -408,9 +408,6 @@ def test_median_empty_bins(observed):
 
     result = df.groupby(bins, observed=observed).median()
     expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
-    if observed:
-        # TODO: groupby(..).median should be float64
-        expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 
diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py
index b688594c388b5..0fa9ad60f32e7 100644
--- a/pandas/tests/resample/test_resampler_grouper.py
+++ b/pandas/tests/resample/test_resampler_grouper.py
@@ -258,6 +258,8 @@ def f(x):
         return x.resample("2s").apply(lambda y: y.sum())
 
     result = g.apply(f)
+    # y.sum() results in int64 instead of int32 on 32-bit architectures
+    expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
 

From de0f7b5d1d500c5e6e3e43b511401679173ed093 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Fri, 23 Apr 2021 22:43:51 -0400
Subject: [PATCH 07/16] fixup

---
 pandas/core/groupby/generic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index de018c9beb629..836af9468fa70 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -51,6 +51,7 @@
     is_dict_like,
     is_integer_dtype,
     is_interval_dtype,
+    is_numeric_dtype,
     is_scalar,
 )
 from pandas.core.dtypes.missing import (

From 4ef679463524d255769c97bc1e3ca2498db1a1c8 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 24 Apr 2021 08:36:46 -0400
Subject: [PATCH 08/16] Fixup

---
 pandas/tests/groupby/test_function.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index 77fc7d360f619..edae272238c83 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -408,7 +408,8 @@ def test_median_empty_bins(observed):
 
     result = df.groupby(bins, observed=observed).median()
     expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
-    tm.assert_frame_equal(result, expected)
+    # TODO: .median() should be float
+    tm.assert_frame_equal(result, expected, check_dtype=False)
 
 
 @pytest.mark.parametrize(

From 4f972887d870fabf34b9c1c6773e7cbb2a9ab024 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 24 Apr 2021 10:03:53 -0400
Subject: [PATCH 09/16] Add GH issue to TODOs

---
 pandas/tests/groupby/aggregate/test_cython.py | 2 +-
 pandas/tests/groupby/test_function.py         | 2 +-
 pandas/tests/resample/test_datetime_index.py  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py
index a6eabac77c9bf..ded10ab11d5a8 100644
--- a/pandas/tests/groupby/aggregate/test_cython.py
+++ b/pandas/tests/groupby/aggregate/test_cython.py
@@ -197,7 +197,7 @@ def test_cython_agg_empty_buckets(op, targop, observed):
     g = df.groupby(pd.cut(df[0], grps), observed=observed)
     expected = g.agg(lambda x: targop(x))
     if observed and op not in ("min", "max"):
-        # TODO: cython_agg_general with mean/var should be float64
+        # TODO: GH 41137
         expected = expected.astype("int64")
     tm.assert_frame_equal(result, expected)
 
diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py
index edae272238c83..3c620b59dbc2a 100644
--- a/pandas/tests/groupby/test_function.py
+++ b/pandas/tests/groupby/test_function.py
@@ -408,7 +408,7 @@ def test_median_empty_bins(observed):
 
     result = df.groupby(bins, observed=observed).median()
     expected = df.groupby(bins, observed=observed).agg(lambda x: x.median())
-    # TODO: .median() should be float
+    # TODO: GH 41137
     tm.assert_frame_equal(result, expected, check_dtype=False)
 
 
diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py
index 4c9d20e1a7636..66cb2f2291e98 100644
--- a/pandas/tests/resample/test_datetime_index.py
+++ b/pandas/tests/resample/test_datetime_index.py
@@ -1689,7 +1689,7 @@ def f(data, add_arg):
     df = DataFrame({"A": 1, "B": 2}, index=date_range("2017", periods=10))
     result = df.groupby("A").resample("D").agg(f, multiplier)
     expected = df.groupby("A").resample("D").mean().multiply(multiplier)
-    # TODO: resample(...).mean should be a float instead of int
+    # TODO: GH 41137
     expected = expected.astype("float64")
     tm.assert_frame_equal(result, expected)
 

From ad7d9905dfbbeee2c91ddbd9e7ab9a851f6f01db Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 24 Apr 2021 22:27:18 -0400
Subject: [PATCH 10/16] Added docs to user guide, agg docstring

---
 doc/source/user_guide/gotchas.rst |  2 +-
 doc/source/user_guide/groupby.rst | 31 +++++++++++++++++++++++++++++--
 pandas/core/groupby/generic.py    | 21 +++++++++++++++++++--
 pandas/core/groupby/groupby.py    |  3 +++
 4 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/doc/source/user_guide/gotchas.rst b/doc/source/user_guide/gotchas.rst
index 180f833a2753d..1de978b195382 100644
--- a/doc/source/user_guide/gotchas.rst
+++ b/doc/source/user_guide/gotchas.rst
@@ -178,7 +178,7 @@ To test for membership in the values, use the method :meth:`~pandas.Series.isin`
 For ``DataFrames``, likewise, ``in`` applies to the column axis,
 testing for membership in the list of column names.
 
-.. _udf-mutation:
+.. _gotchas.udf-mutation:
 
 Mutating with User Defined Function (UDF) methods
 -------------------------------------------------
diff --git a/doc/source/user_guide/groupby.rst b/doc/source/user_guide/groupby.rst
index afb2e72cbff07..3f596388ca226 100644
--- a/doc/source/user_guide/groupby.rst
+++ b/doc/source/user_guide/groupby.rst
@@ -739,6 +739,26 @@ optimized Cython implementations:
 Of course ``sum`` and ``mean`` are implemented on pandas objects, so the above
 code would work even without the special versions via dispatching (see below).
 
+.. _groupby.aggregate.udfs:
+
+Aggregations with User-Defined Functions
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Users can also provide their own functions for custom aggregations. When aggregating
+with a User-Defined Function (UDF), the UDF should not mutate the provided ``Series``, see
+:ref:`gotchas.udf-mutation` for more information.
+
+.. ipython:: python
+
+   animals.groupby("kind")[["height"]].agg(lambda x: set(x))
+
+The resulting dtype will reflect that of the aggregating function. If the results from different groups have
+different dtypes, then a common dtype will be determined in the same way as ``DataFrame`` construction.
+
+.. ipython:: python
+
+   animals.groupby("kind")[["height"]].agg(lambda x: x.astype(int).sum())
+
 .. _groupby.transform:
 
 Transformation
@@ -759,7 +779,11 @@ as the one being grouped. The transform function must:
 * (Optionally) operates on the entire group chunk. If this is supported, a
   fast path is used starting from the *second* chunk.
 
-For example, suppose we wished to standardize the data within each group:
+Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
+transformation function. If the results from different groups have different dtypes, then
+a common dtype will be determined in the same way as ``DataFrame`` construction.
+
+Suppose we wished to standardize the data within each group:
 
 .. ipython:: python
 
@@ -1065,13 +1089,16 @@ that is itself a series, and possibly upcast the result to a DataFrame:
     s
     s.apply(f)
 
-
 .. note::
 
    ``apply`` can act as a reducer, transformer, *or* filter function, depending on exactly what is passed to it.
    So depending on the path taken, and exactly what you are grouping. Thus the grouped columns(s) may be included in
    the output as well as set the indices.
 
+Similar to :ref:`groupby.aggregate.udfs`, the resulting dtype will reflect that of the
+apply function. If the results from different groups have different dtypes, then
+a common dtype will be determined in the same way as ``DataFrame`` construction.
+
 
 Numba Accelerated Routines
 --------------------------
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 836af9468fa70..81278a239907b 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -223,7 +223,14 @@ def _selection_name(self):
     ... )
        minimum  maximum
     1        1        2
-    2        3        4"""
+    2        3        4
+
+    The resulting dtype will reflect that of the aggregating function.
+
+    >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
+    1    1.0
+    2    3.0
+    dtype: float64"""
     )
 
     @Appender(
@@ -1017,7 +1024,17 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
       ``['column', 'aggfunc']`` to make it clearer what the arguments are.
       As usual, the aggregation can be a callable or a string alias.
 
-    See :ref:`groupby.aggregate.named` for more."""
+    See :ref:`groupby.aggregate.named` for more.
+
+    The resulting dtype will reflect that of the aggregating function.
+
+    >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
+          B
+    A
+    1   1.0
+    2   3.0
+
+    """
     )
 
     @doc(_agg_template, examples=_agg_examples_doc, klass="DataFrame")
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 60b2df81a88f4..19b22fb0ccbcd 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -474,6 +474,9 @@ class providing the base-class of operations.
 Functions that mutate the passed object can produce unexpected
 behavior or errors and are not supported. See :ref:`udf-mutation`
 for more details.
+
+The resulting dtype will reflect that of the passed ``func``, see the examples
+below.
 """
 
 

From 11529e381e6298354f258b678a13160d1d7bdc93 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sun, 25 Apr 2021 08:56:40 -0400
Subject: [PATCH 11/16] Updated docs

---
 pandas/core/frame.py           |  2 +-
 pandas/core/groupby/generic.py |  8 +++---
 pandas/core/groupby/groupby.py | 50 ++++++++++++++++++++++++----------
 pandas/core/series.py          |  2 +-
 pandas/core/shared_docs.py     |  4 +--
 5 files changed, 43 insertions(+), 23 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 7f970a72cb12c..3749325c006a7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8552,7 +8552,7 @@ def apply(
         Notes
         -----
         Functions that mutate the passed object can produce unexpected
-        behavior or errors and are not supported. See :ref:`udf-mutation`
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
         for more details.
 
         Examples
diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 81278a239907b..414df3015e685 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -225,7 +225,7 @@ def _selection_name(self):
     1        1        2
     2        3        4
 
-    The resulting dtype will reflect that of the aggregating function.
+    The resulting dtype will reflect the return value of the aggregating function.
 
     >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
     1    1.0
@@ -643,7 +643,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
         Notes
         -----
         Functions that mutate the passed object can produce unexpected
-        behavior or errors and are not supported. See :ref:`udf-mutation`
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
         for more details.
 
         Examples
@@ -1026,7 +1026,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
 
     See :ref:`groupby.aggregate.named` for more.
 
-    The resulting dtype will reflect that of the aggregating function.
+    The resulting dtype will reflect the return value of the aggregating function.
 
     >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
           B
@@ -1598,7 +1598,7 @@ def filter(self, func, dropna=True, *args, **kwargs):
         which group you are working on.
 
         Functions that mutate the passed object can produce unexpected
-        behavior or errors and are not supported. See :ref:`udf-mutation`
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
         for more details.
 
         Examples
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 19b22fb0ccbcd..7735bb4cd9d15 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -158,6 +158,9 @@ class providing the base-class of operations.
     side-effects, as they will take effect twice for the first
     group.
 
+    The resulting dtype will reflect the return value of the passed ``func``,
+    see the examples below.
+
     Examples
     --------
     {examples}
@@ -165,7 +168,7 @@ class providing the base-class of operations.
     "dataframe_examples": """
     >>> df = pd.DataFrame({'A': 'a a b'.split(),
     ...                    'B': [1,2,3],
-    ...                    'C': [4,6, 5]})
+    ...                    'C': [4,6,5]})
     >>> g = df.groupby('A')
 
     Notice that ``g`` has two groups, ``a`` and ``b``.
@@ -183,13 +186,14 @@ class providing the base-class of operations.
 
     Example 2: The function passed to `apply` takes a DataFrame as
     its argument and returns a Series.  `apply` combines the result for
-    each group together into a new DataFrame:
+    each group together into a new DataFrame, using the dtype returned
+    from the function for the result dtype:
 
-    >>> g[['B', 'C']].apply(lambda x: x.max() - x.min())
-       B  C
+    >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
+         B    C
     A
-    a  1  2
-    b  0  0
+    a  1.0  2.0
+    b  0.0  0.0
 
     Example 3: The function passed to `apply` takes a DataFrame as
     its argument and returns a scalar. `apply` combines the result for
@@ -210,12 +214,13 @@ class providing the base-class of operations.
 
     Example 1: The function passed to `apply` takes a Series as
     its argument and returns a Series.  `apply` combines the result for
-    each group together into a new Series:
+    each group together into a new Series, using the dtype returned
+    from the function for the result dtype:
 
-    >>> g.apply(lambda x:  x*2 if x.name == 'b' else x/2)
+    >>> g.apply(lambda x:  x*2 if x.name == 'a' else x/2)
     a    0.0
-    a    0.5
-    b    4.0
+    a    1.0
+    b    2.0
     dtype: float64
 
     Example 2: The function passed to `apply` takes a Series as
@@ -367,12 +372,15 @@ class providing the base-class of operations.
   in the subframe. If f also supports application to the entire subframe,
   then a fast path is used starting from the second chunk.
 * f must not mutate groups. Mutation is not supported and may
-  produce unexpected results. See :ref:`udf-mutation` for more details.
+  produce unexpected results. See :ref:`gotchas.udf-mutation` for more details.
 
 When using ``engine='numba'``, there will be no "fall back" behavior internally.
 The group data and group index will be passed as numpy arrays to the JITed
 user defined function, and no alternative execution attempts will be tried.
 
+The resulting dtype will reflect the return value of the passed ``func``,
+see the examples below.
+
 Examples
 --------
 
@@ -402,6 +410,18 @@ class providing the base-class of operations.
 3  3  8.0
 4  4  6.0
 5  3  8.0
+
+The resulting dtype will reflect the return value of the transformation function,
+for example:
+
+>>> grouped[['C', 'D']].transform(lambda x: x.astype(int).max())
+   C  D
+0  5  8
+1  5  9
+2  5  8
+3  5  9
+4  5  8
+5  5  9
 """
 
 _agg_template = """
@@ -469,14 +489,14 @@ class providing the base-class of operations.
 When using ``engine='numba'``, there will be no "fall back" behavior internally.
 The group data and group index will be passed as numpy arrays to the JITed
 user defined function, and no alternative execution attempts will be tried.
-{examples}
 
 Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`udf-mutation`
+behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 for more details.
 
-The resulting dtype will reflect that of the passed ``func``, see the examples
-below.
+The resulting dtype will reflect the return value of the passed ``func``,
+see the examples below.
+{examples}
 """
 
 
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 85c30096b1001..5068f87e4c1c2 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4191,7 +4191,7 @@ def apply(
         Notes
         -----
         Functions that mutate the passed object can produce unexpected
-        behavior or errors and are not supported. See :ref:`udf-mutation`
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
         for more details.
 
         Examples
diff --git a/pandas/core/shared_docs.py b/pandas/core/shared_docs.py
index a4ee4bb636450..a3fa24c7ee1e0 100644
--- a/pandas/core/shared_docs.py
+++ b/pandas/core/shared_docs.py
@@ -42,7 +42,7 @@
 `agg` is an alias for `aggregate`. Use the alias.
 
 Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`udf-mutation`
+behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 for more details.
 
 A passed user-defined-function will be passed a Series for evaluation.
@@ -303,7 +303,7 @@
 Notes
 -----
 Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`udf-mutation`
+behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 for more details.
 
 Examples

From a0a2640711a138b5819a930170dd99e62d707232 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 26 Apr 2021 22:54:59 -0400
Subject: [PATCH 12/16] Fixup

---
 pandas/core/groupby/groupby.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7735bb4cd9d15..091760b1769a9 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -496,8 +496,7 @@ class providing the base-class of operations.
 
 The resulting dtype will reflect the return value of the passed ``func``,
 see the examples below.
-{examples}
-"""
+{examples}"""
 
 
 @final

From eb1943a8a1d1414e56b5507f2253ca64ea51666f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Mon, 26 Apr 2021 23:00:59 -0400
Subject: [PATCH 13/16] Fixup

---
 pandas/core/groupby/groupby.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 091760b1769a9..12eafcd52646d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -217,10 +217,10 @@ class providing the base-class of operations.
     each group together into a new Series, using the dtype returned
     from the function for the result dtype:
 
-    >>> g.apply(lambda x:  x*2 if x.name == 'a' else x/2)
+    >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2)
     a    0.0
-    a    1.0
-    b    2.0
+    a    2.0
+    b    1.0
     dtype: float64
 
     Example 2: The function passed to `apply` takes a Series as

From 180bc231ef27d23099fcb339dd28e01a43eb11ee Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Thu, 29 Apr 2021 16:14:48 -0400
Subject: [PATCH 14/16] docsting fixup

---
 pandas/core/groupby/generic.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index 414df3015e685..722a71d9b11c2 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -1032,9 +1032,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
           B
     A
     1   1.0
-    2   3.0
-
-    """
+    2   3.0"""
     )
 
     @doc(_agg_template, examples=_agg_examples_doc, klass="DataFrame")

From 4a0978ed921729adb7e83ef57e8aa7f497997ea2 Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 1 May 2021 08:17:12 -0400
Subject: [PATCH 15/16] Add versionchanged

---
 doc/source/whatsnew/v1.3.0.rst | 2 +-
 pandas/core/groupby/groupby.py | 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
index 192729a02c8fe..040d38c3cd67b 100644
--- a/doc/source/whatsnew/v1.3.0.rst
+++ b/doc/source/whatsnew/v1.3.0.rst
@@ -327,7 +327,7 @@ values as measured by ``np.allclose``. Now no such casting occurs.
 
 .. ipython:: python
 
-    In [5]: df.groupby('key').agg(lambda x: x.sum())
+    df.groupby('key').agg(lambda x: x.sum())
 
 Try operating inplace when setting values with ``loc`` and ``iloc``
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 5d61e8ad2ca06..19aa49a05bd5d 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -158,8 +158,9 @@ class providing the base-class of operations.
     side-effects, as they will take effect twice for the first
     group.
 
-    The resulting dtype will reflect the return value of the passed ``func``,
-    see the examples below.
+    .. versionchanged:: 1.2.0
+        The resulting dtype will reflect the return value of the passed ``func``,
+        see the examples below.
 
     Examples
     --------

From 6b80c1084d337f448edcb21617be6bee0cf1c49f Mon Sep 17 00:00:00 2001
From: Richard Shadrach <rhshadrach@gmail.com>
Date: Sat, 1 May 2021 09:04:25 -0400
Subject: [PATCH 16/16] Added versionchanged

---
 pandas/core/groupby/generic.py |  8 ++++++--
 pandas/core/groupby/groupby.py | 35 +++++++++++++++++++++++-----------
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
index ccb569b39c4cf..6d334dd181216 100644
--- a/pandas/core/groupby/generic.py
+++ b/pandas/core/groupby/generic.py
@@ -224,7 +224,9 @@ def _selection_name(self):
     1        1        2
     2        3        4
 
-    The resulting dtype will reflect the return value of the aggregating function.
+    .. versionchanged:: 1.3.0
+
+        The resulting dtype will reflect the return value of the aggregating function.
 
     >>> s.groupby([1, 1, 2, 2]).agg(lambda x: x.astype(float).min())
     1    1.0
@@ -1004,7 +1006,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
 
     See :ref:`groupby.aggregate.named` for more.
 
-    The resulting dtype will reflect the return value of the aggregating function.
+    .. versionchanged:: 1.3.0
+
+        The resulting dtype will reflect the return value of the aggregating function.
 
     >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
           B
diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
index 7158ebf9c2b20..62b12341fd63c 100644
--- a/pandas/core/groupby/groupby.py
+++ b/pandas/core/groupby/groupby.py
@@ -158,7 +158,8 @@ class providing the base-class of operations.
     side-effects, as they will take effect twice for the first
     group.
 
-    .. versionchanged:: 1.2.0
+    .. versionchanged:: 1.3.0
+
         The resulting dtype will reflect the return value of the passed ``func``,
         see the examples below.
 
@@ -187,8 +188,11 @@ class providing the base-class of operations.
 
     Example 2: The function passed to `apply` takes a DataFrame as
     its argument and returns a Series.  `apply` combines the result for
-    each group together into a new DataFrame, using the dtype returned
-    from the function for the result dtype:
+    each group together into a new DataFrame.
+
+    .. versionchanged:: 1.3.0
+
+        The resulting dtype will reflect the return value of the passed ``func``.
 
     >>> g[['B', 'C']].apply(lambda x: x.astype(float).max() - x.min())
          B    C
@@ -215,8 +219,11 @@ class providing the base-class of operations.
 
     Example 1: The function passed to `apply` takes a Series as
     its argument and returns a Series.  `apply` combines the result for
-    each group together into a new Series, using the dtype returned
-    from the function for the result dtype:
+    each group together into a new Series.
+
+    .. versionchanged:: 1.3.0
+
+        The resulting dtype will reflect the return value of the passed ``func``.
 
     >>> g.apply(lambda x: x*2 if x.name == 'a' else x/2)
     a    0.0
@@ -379,8 +386,10 @@ class providing the base-class of operations.
 The group data and group index will be passed as numpy arrays to the JITed
 user defined function, and no alternative execution attempts will be tried.
 
-The resulting dtype will reflect the return value of the passed ``func``,
-see the examples below.
+.. versionchanged:: 1.3.0
+
+    The resulting dtype will reflect the return value of the passed ``func``,
+    see the examples below.
 
 Examples
 --------
@@ -412,8 +421,10 @@ class providing the base-class of operations.
 4  4  6.0
 5  3  8.0
 
-The resulting dtype will reflect the return value of the transformation function,
-for example:
+.. versionchanged:: 1.3.0
+
+    The resulting dtype will reflect the return value of the passed ``func``,
+    for example:
 
 >>> grouped[['C', 'D']].transform(lambda x: x.astype(int).max())
    C  D
@@ -495,8 +506,10 @@ class providing the base-class of operations.
 behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
 for more details.
 
-The resulting dtype will reflect the return value of the passed ``func``,
-see the examples below.
+.. versionchanged:: 1.3.0
+
+    The resulting dtype will reflect the return value of the passed ``func``,
+    see the examples below.
 {examples}"""