pandas-dev · jorisvandenbossche · Mar 20, 2020 · Mar 20, 2020 · Mar 20, 2020 · Mar 20, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -1149,6 +1149,7 @@ ExtensionArray
 - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
 - Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`)
 - Fixed bug in ``IntegerArray.astype`` to correctly copy the mask as well (:issue:`34931`).
+- Fixed bug where DataFrame reductions with Int64 columns casts to float64 (:issue:`32651`)
 
 Other
 ^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -8501,6 +8501,22 @@ def _count_level(self, level, axis=0, numeric_only=False):
     def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
     ):
+        """
+        Reduce DataFrame over axis with given operation.
+
+        Parameters
+        ----------
+        op : func
+            The reducing function to be called on the values.
+        name : str
+            The name of the reduction.
+        axis : int
+        numeric_only : bool, optional
+        filter_type : None or "bool"
+            Set to "bool" for ops that only work on boolean values.
+        skipna, **kwds : keywords to pass to the `op` function
+
+        """
 
         assert filter_type is None or filter_type == "bool", filter_type
 
@@ -8551,6 +8567,7 @@ def _get_data(axis_matters):
                 raise NotImplementedError(msg)
             return data
 
+        # special case for block-wise
         if numeric_only is not None and axis in [0, 1]:
             df = self
             if numeric_only is True:
@@ -8579,33 +8596,47 @@ def blk_func(values):
                 out[:] = coerce_to_dtypes(out.values, df.dtypes)
             return out
 
-        if not self._is_homogeneous_type:
-            # try to avoid self.values call
+        def array_func(values):
+            if isinstance(values, ExtensionArray):
+                return values._reduce(name, skipna=skipna, **kwds)
+            else:
+                return op(values, skipna=skipna, **kwds)
 
-            if filter_type is None and axis == 0 and len(self) > 0:
-                # operate column-wise
+        # all other options with axis=0 are done column-array-wise
+        if axis == 0:
 
-                # numeric_only must be None here, as other cases caught above
-                # require len(self) > 0 bc frame_apply messes up empty prod/sum
+            def _constructor(df, result, index=None):
+                index = index if index is not None else df.columns
+                if len(result):
+                    return df._constructor_sliced(result, index=index)
+                else:
+                    # set correct dtype for empty result
+                    dtype = "bool" if filter_type == "bool" else "float64"
+                    return df._constructor_sliced(result, index=index, dtype=dtype)
 
-                # this can end up with a non-reduction
-                # but not always. if the types are mixed
-                # with datelike then need to make sure a series
+            df = self
+            if numeric_only is True:
+                df = _get_data(axis_matters=True)
 
-                # we only end up here if we have not specified
-                # numeric_only and yet we have tried a
-                # column-by-column reduction, where we have mixed type.
-                # So let's just do what we can
-                from pandas.core.apply import frame_apply
+            if numeric_only is not None:
+                result = [array_func(arr) for arr in df._iter_column_arrays()]
+                return _constructor(df, result)
+            else:
+                # with numeric_only=None, need to ignore exceptions per column
+                result = []
+                indices = []
+                for i, arr in enumerate(df._iter_column_arrays()):
+                    try:
+                        res = array_func(arr)
+                    except Exception:
+                        pass
+                    else:
+                        result.append(res)
+                        indices.append(i)
 
-                opa = frame_apply(
-                    self, func=f, result_type="expand", ignore_failures=True
-                )
-                result = opa.get_result()
-                if result.ndim == self.ndim:
-                    result = result.iloc[0].rename(None)
-                return result
+                return _constructor(df, result, index=df.columns[indices])
 
+        # remaining cases for axis=1 or axis=None
         if numeric_only is None:
             data = self
             values = data.values

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
@@ -11402,7 +11402,7 @@ def stat_func(
         if level is not None:
             return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
         return self._reduce(
-            func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
+            func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only,
         )
 
     return set_function_name(stat_func, name, cls)

diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
@@ -133,6 +133,15 @@ def test_integer_array_numpy_sum(values, expected):
     assert result == expected
 
 
+@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
+def test_dataframe_reductions(op):
+    # https://github.com/pandas-dev/pandas/pull/32867
+    # ensure the integers are not cast to float during reductions
+    df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
+    result = df.max()
+    assert isinstance(result["a"], np.int64)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
@@ -415,7 +415,7 @@ def test_stat_operators_attempt_obj_array(self, method):
         for df in [df1, df2]:
             assert df.values.dtype == np.object_
             result = getattr(df, method)(1)
-            expected = getattr(df.astype("f8"), method)(1)
+            expected = getattr(df, method)(1)
 
             if method in ["sum", "prod"]:
                 tm.assert_series_equal(result, expected)
@@ -1303,3 +1303,26 @@ def test_preserve_timezone(self, initial: str, method):
         df = DataFrame([expected])
         result = getattr(df, method)(axis=1)
         tm.assert_series_equal(result, expected)
+
+
+def test_mixed_frame_with_integer_sum():
+    # https://github.com/pandas-dev/pandas/issues/34520
+    df = pd.DataFrame([["a", 1]], columns=list("ab"))
+    df = df.astype({"b": "Int64"})
+    result = df.sum()
+    expected = pd.Series(["a", 1], index=["a", "b"])
+    tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("numeric_only", [True, False, None])
+@pytest.mark.parametrize("method", ["min", "max"])
+def test_minmax_extensionarray(method, numeric_only):
+    # https://github.com/pandas-dev/pandas/issues/32651
+    int64_info = np.iinfo("int64")
+    ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype())
+    df = DataFrame({"Int64": ser})
+    result = getattr(df, method)(numeric_only=numeric_only)
+    expected = Series(
+        [getattr(int64_info, method)], index=pd.Index(["Int64"], dtype="object")
+    )
+    tm.assert_series_equal(result, expected)