From a9ca0fa52616b725fc5e9d2edd35bdfd095878b0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 14:58:31 +0100
Subject: [PATCH 01/20] ENH/PERF: enable column-wise reductions for EA-backed
 columns

---
 pandas/core/frame.py              | 65 +++++++++++++++++++++++++++++++
 pandas/core/generic.py            | 10 ++++-
 pandas/core/internals/managers.py |  8 ++++
 pandas/core/series.py             |  1 +
 4 files changed, 83 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index b9e43b1cd9b05..08877b6d5ef92 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7852,6 +7852,23 @@ def _count_level(self, level, axis=0, numeric_only=False):
     def _reduce(
         self, op, name, axis=0, skipna=True, numeric_only=None, filter_type=None, **kwds
     ):
+        """
+        Reduce DataFrame over axis with given operation.
+
+        Parameters
+        ----------
+        op : func
+            The reducing function to be called on the values.
+        name : str
+            The name of the reduction.
+        axis : int
+        numeric_only : bool, optional
+        filter_type : None or "bool"
+            Set to "bool" for ops that give boolean results.
+        skipna, **kwds : keywords to pass to the `op` function
+
+        """
+        column_wise = kwds.pop("column_wise", False)
 
         assert filter_type is None or filter_type == "bool", filter_type
 
@@ -7898,6 +7915,13 @@ def _get_data(axis_matters):
                 raise NotImplementedError(msg)
             return data
 
+        if axis == 0 and column_wise:
+            # column-wise reduction
+            df = self
+            if numeric_only is True:
+                df = _get_data(axis_matters=True)
+            return DataFrame._reduce_columns(df, op, name, skipna=skipna, **kwds)
+
         if numeric_only is not None and axis in [0, 1]:
             df = self
             if numeric_only is True:
@@ -7994,6 +8018,47 @@ def blk_func(values):
             result = self._constructor_sliced(result, index=labels)
         return result
 
+    def _reduce_columns(self, op, name, skipna=True, **kwds):
+        """
+        Reduce DataFrame column-wise.
+
+        Parameters
+        ----------
+        op : func
+            The reducing function to be called on the values. Only used
+            for columns backed by a numpy ndarray.
+        name : str
+            The name of the reduction.
+        skipna, **kwds : keywords to pass to the `op` function
+
+        Returns
+        -------
+        Series
+        """
+        result = []
+
+        for arr in self._iter_arrays():
+            if isinstance(arr, ExtensionArray):
+                # dispatch to ExtensionArray interface
+                val = arr._reduce(name, skipna=skipna, **kwds)
+            else:
+                # dispatch to numpy arrays
+                with np.errstate(all="ignore"):
+                    val = op(arr, skipna=skipna, **kwds)
+
+            result.append(val)
+
+        return self._constructor_sliced(result, index=self.columns)
+
+    def _iter_arrays(self):
+        """
+        Iterate over the arrays of all columns in order.
+
+        This returns the values as stored in the Block (ndarray or ExtensionArray).
+        """
+        for i in range(len(self.columns)):
+            yield self._data.iget_values(i)
+
     def nunique(self, axis=0, dropna=True) -> Series:
         """
         Count distinct observations over requested axis.
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 6b0f7de11a3e7..8d47c1c4494b7 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11067,6 +11067,7 @@ def stat_func(
         min_count=0,
         **kwargs,
     ):
+        column_wise = kwargs.pop("column_wise", False)
         if name == "sum":
             nv.validate_sum(tuple(), kwargs)
         elif name == "prod":
@@ -11088,6 +11089,7 @@ def stat_func(
             skipna=skipna,
             numeric_only=numeric_only,
             min_count=min_count,
+            column_wise=column_wise,
         )
 
     return set_function_name(stat_func, name, cls)
@@ -11117,6 +11119,7 @@ def _make_stat_function(
     def stat_func(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
+        column_wise = kwargs.pop("column_wise", False)
         if name == "median":
             nv.validate_median(tuple(), kwargs)
         else:
@@ -11128,7 +11131,12 @@ def stat_func(
         if level is not None:
             return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
         return self._reduce(
-            func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
+            func,
+            name=name,
+            axis=axis,
+            skipna=skipna,
+            numeric_only=numeric_only,
+            column_wise=column_wise,
         )
 
     return set_function_name(stat_func, name, cls)
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 66e96af05eb71..b41e42fc820ee 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -998,6 +998,14 @@ def iget(self, i: int) -> "SingleBlockManager":
             fastpath=True,
         )
 
+    def iget_values(self, i: int):
+        """
+        Return the data for column i as the values (ndarray or ExtensionArray).
+        """
+        block = self.blocks[self.blknos[i]]
+        values = block.iget(self.blklocs[i])
+        return values
+
     def delete(self, item):
         """
         Delete selected item (items if non-unique) in-place.
diff --git a/pandas/core/series.py b/pandas/core/series.py
index aaaeadc0cf618..c5299f8ee654c 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -3871,6 +3871,7 @@ def _reduce(
         If we have an ndarray as a value, then simply perform the operation,
         otherwise delegate to the object.
         """
+        kwds.pop("column_wise", None)
         delegate = self._values
 
         if axis is not None:

From 21aee0d16e0ef07e91feb1b64b2d489fead9d052 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 15:40:03 +0100
Subject: [PATCH 02/20] fix numeric_only for EAs

---
 pandas/core/frame.py              |  2 +-
 pandas/core/internals/managers.py | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 08877b6d5ef92..4a0b77d523a7b 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7940,7 +7940,7 @@ def blk_func(values):
 
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
-            res = df._data.reduce(blk_func)
+            res = df._data.reduce(blk_func, name, skipna, **kwds)
             assert isinstance(res, dict)
             if len(res):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index b41e42fc820ee..61a80d0b4a2b6 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -349,16 +349,19 @@ def _verify_integrity(self) -> None:
                 f"tot_items: {tot_items}"
             )
 
-    def reduce(self, func, *args, **kwargs):
+    def reduce(self, func, name, skipna=True, **kwds):
         # If 2D, we assume that we're operating column-wise
         if self.ndim == 1:
             # we'll be returning a scalar
             blk = self.blocks[0]
-            return func(blk.values, *args, **kwargs)
+            return func(blk.values)
 
         res = {}
         for blk in self.blocks:
-            bres = func(blk.values, *args, **kwargs)
+            if isinstance(blk, ExtensionBlock):
+                bres = blk.values._reduce(name, skipna=skipna, **kwds)
+            else:
+                bres = func(blk.values)
 
             if np.ndim(bres) == 0:
                 # EA
@@ -366,7 +369,7 @@ def reduce(self, func, *args, **kwargs):
                 new_res = zip(blk.mgr_locs.as_array, [bres])
             else:
                 assert bres.ndim == 1, bres.shape
-                assert blk.shape[0] == len(bres), (blk.shape, bres.shape, args, kwargs)
+                assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
                 new_res = zip(blk.mgr_locs.as_array, bres)
 
             nr = dict(new_res)

From 9f83f6e09f3295736f215261b63ee869d423ca3c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 15:59:26 +0100
Subject: [PATCH 03/20] fix _reduce_columns call

---
 pandas/core/frame.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4a0b77d523a7b..8427bd220231a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7920,7 +7920,7 @@ def _get_data(axis_matters):
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
-            return DataFrame._reduce_columns(df, op, name, skipna=skipna, **kwds)
+            return df._reduce_columns(op, name, skipna=skipna, **kwds)
 
         if numeric_only is not None and axis in [0, 1]:
             df = self

From a9706e0dae4ceba83dc859e4a2d4bcffdf28edc0 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 16:30:09 +0100
Subject: [PATCH 04/20] move EA._reduce call into blk_func

---
 pandas/core/frame.py              | 10 +++++-----
 pandas/core/internals/managers.py |  7 ++-----
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 8427bd220231a..19692a059c4c7 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7933,14 +7933,14 @@ def _get_data(axis_matters):
             out_dtype = "bool" if filter_type == "bool" else None
 
             def blk_func(values):
-                if values.ndim == 1 and not isinstance(values, np.ndarray):
-                    # we can't pass axis=1
-                    return op(values, axis=0, skipna=skipna, **kwds)
-                return op(values, axis=1, skipna=skipna, **kwds)
+                if isinstance(values, ExtensionArray):
+                    return values._reduce(name, skipna=skipna, **kwds)
+                else:
+                    return op(values, axis=1, skipna=skipna, **kwds)
 
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
-            res = df._data.reduce(blk_func, name, skipna, **kwds)
+            res = df._data.reduce(blk_func)
             assert isinstance(res, dict)
             if len(res):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 61a80d0b4a2b6..b8198e49f7dc5 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -349,7 +349,7 @@ def _verify_integrity(self) -> None:
                 f"tot_items: {tot_items}"
             )
 
-    def reduce(self, func, name, skipna=True, **kwds):
+    def reduce(self, func):
         # If 2D, we assume that we're operating column-wise
         if self.ndim == 1:
             # we'll be returning a scalar
@@ -358,10 +358,7 @@ def reduce(self, func, name, skipna=True, **kwds):
 
         res = {}
         for blk in self.blocks:
-            if isinstance(blk, ExtensionBlock):
-                bres = blk.values._reduce(name, skipna=skipna, **kwds)
-            else:
-                bres = func(blk.values)
+            bres = func(blk.values)
 
             if np.ndim(bres) == 0:
                 # EA

From 07372e38b63fe0c3bca6a771d82ef521833119d6 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 16:51:26 +0100
Subject: [PATCH 05/20] reuse blk_func for column-wise, inline _iter_arrays

---
 pandas/core/frame.py | 42 +++++++++++-------------------------------
 1 file changed, 11 insertions(+), 31 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 19692a059c4c7..1bb6510603831 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7915,12 +7915,18 @@ def _get_data(axis_matters):
                 raise NotImplementedError(msg)
             return data
 
+        def blk_func(values):
+            if isinstance(values, ExtensionArray):
+                return values._reduce(name, skipna=skipna, **kwds)
+            else:
+                return op(values, axis=1, skipna=skipna, **kwds)
+
         if axis == 0 and column_wise:
             # column-wise reduction
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
-            return df._reduce_columns(op, name, skipna=skipna, **kwds)
+            return df._reduce_columns(blk_func)
 
         if numeric_only is not None and axis in [0, 1]:
             df = self
@@ -7932,12 +7938,6 @@ def _get_data(axis_matters):
 
             out_dtype = "bool" if filter_type == "bool" else None
 
-            def blk_func(values):
-                if isinstance(values, ExtensionArray):
-                    return values._reduce(name, skipna=skipna, **kwds)
-                else:
-                    return op(values, axis=1, skipna=skipna, **kwds)
-
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
             res = df._data.reduce(blk_func)
@@ -8018,18 +8018,14 @@ def blk_func(values):
             result = self._constructor_sliced(result, index=labels)
         return result
 
-    def _reduce_columns(self, op, name, skipna=True, **kwds):
+    def _reduce_columns(self, op):
         """
         Reduce DataFrame column-wise.
 
         Parameters
         ----------
         op : func
-            The reducing function to be called on the values. Only used
-            for columns backed by a numpy ndarray.
-        name : str
-            The name of the reduction.
-        skipna, **kwds : keywords to pass to the `op` function
+            The reducing function to be called on the values.
 
         Returns
         -------
@@ -8037,28 +8033,12 @@ def _reduce_columns(self, op, name, skipna=True, **kwds):
         """
         result = []
 
-        for arr in self._iter_arrays():
-            if isinstance(arr, ExtensionArray):
-                # dispatch to ExtensionArray interface
-                val = arr._reduce(name, skipna=skipna, **kwds)
-            else:
-                # dispatch to numpy arrays
-                with np.errstate(all="ignore"):
-                    val = op(arr, skipna=skipna, **kwds)
-
+        for i in range(len(self.columns)):
+            val = op(self._data.iget_values(i))
             result.append(val)
 
         return self._constructor_sliced(result, index=self.columns)
 
-    def _iter_arrays(self):
-        """
-        Iterate over the arrays of all columns in order.
-
-        This returns the values as stored in the Block (ndarray or ExtensionArray).
-        """
-        for i in range(len(self.columns)):
-            yield self._data.iget_values(i)
-
     def nunique(self, axis=0, dropna=True) -> Series:
         """
         Count distinct observations over requested axis.

From 2d084509ffdf51a92992500d21db8a939aa49680 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 20:13:47 +0100
Subject: [PATCH 06/20] temp

---
 pandas/core/frame.py   | 2 +-
 pandas/core/generic.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 1bb6510603831..8d9d5dcd9672c 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -7864,7 +7864,7 @@ def _reduce(
         axis : int
         numeric_only : bool, optional
         filter_type : None or "bool"
-            Set to "bool" for ops that give boolean results.
+            Set to "bool" for ops that only work on boolean values.
         skipna, **kwds : keywords to pass to the `op` function
 
         """
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8d47c1c4494b7..451c3954e578d 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6480,7 +6480,7 @@ def replace(
             raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool")
 
         self._consolidate_inplace()
-
+        breakpoint()
         if value is None:
             # passing a single value that is scalar like
             # when value is None (GH5319), for compat

From 9e2a780a45f1a9a122a45df8e3e50ecccf9b6fea Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 27 Mar 2020 18:15:29 +0100
Subject: [PATCH 07/20] first attempts of going block-wise with
 numeric_only=None

---
 pandas/core/frame.py                 | 47 ++++++++++++++++++++-------
 pandas/core/generic.py               |  1 -
 pandas/core/internals/managers.py    | 48 +++++++++++++++++++++++++---
 pandas/tests/frame/test_analytics.py |  2 +-
 4 files changed, 80 insertions(+), 18 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 62ac5a51c2e06..f3ce11fde02a3 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8001,29 +8001,54 @@ def blk_func(values):
                 df = _get_data(axis_matters=True)
             return df._reduce_columns(blk_func)
 
-        if numeric_only is not None and axis in [0, 1]:
+        # if numeric_only is not None and axis in [0, 1]:
+        if axis in [0, 1]:
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
             if axis == 1:
                 df = df.T
-                axis = 0
+                # axis = 0
 
             out_dtype = "bool" if filter_type == "bool" else None
 
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
-            res = df._data.reduce(blk_func)
+            try:
+                res = df._data.reduce(blk_func, ignore_failures=numeric_only is None)
+            except TypeError:
+                # if block-wise fails and numeric_only was None, we try
+                # again after removing non-numerical columns.
+                # (got here with mixed float + string frame and axis=1 -> need
+                # to remove non-numerical columns before transposing)
+                if numeric_only is None:
+                    df = _get_data(axis_matters=True)
+                    if axis == 1:
+                        df = df.T
+                else:
+                    raise
+                res = df._data.reduce(blk_func)
+
+            # breakpoint()
             assert isinstance(res, dict)
-            if len(res):
-                assert len(res) == max(list(res.keys())) + 1, res.keys()
-            out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
-            out.index = df.columns
-            if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
-                # FIXME: needs_i8_conversion check is kludge, not sure
-                #  why it is necessary in this case and this case alone
-                out[:] = coerce_to_dtypes(out.values, df.dtypes)
+            # if len(res):
+            #     assert len(res) == max(list(res.keys())) + 1, res.keys()
+
+            out = df._constructor_sliced(
+                res, index=list(res.keys()), dtype=out_dtype
+            ).sort_index()
+            if len(res) < len(df.columns):
+                out.index = df.columns[np.sort(list(res.keys()))]
+            else:
+                out.index = df.columns
+            # if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
+            #     # FIXME: needs_i8_conversion check is kludge, not sure
+            #     #  why it is necessary in this case and this case alone
+            #     out[:] = coerce_to_dtypes(out.values, df.dtypes)
             return out
+        else:
+            # axis is None
+            return f(self.values)
 
         if numeric_only is None:
             data = self
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 0424dd4e3ea07..eabe34efd0391 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -6478,7 +6478,6 @@ def replace(
             raise AssertionError("'to_replace' must be 'None' if 'regex' is not a bool")
 
         self._consolidate_inplace()
-        breakpoint()
         if value is None:
             # passing a single value that is scalar like
             # when value is None (GH5319), for compat
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index a77842ba4b9d0..a8e97b6b42b39 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -41,6 +41,7 @@
     CategoricalBlock,
     DatetimeTZBlock,
     ExtensionBlock,
+    ObjectBlock,
     ObjectValuesExtensionBlock,
     _extend_blocks,
     _merge_blocks,
@@ -350,7 +351,7 @@ def _verify_integrity(self) -> None:
                 f"tot_items: {tot_items}"
             )
 
-    def reduce(self, func):
+    def reduce(self, func, ignore_failures=False):
         # If 2D, we assume that we're operating column-wise
         if self.ndim == 1:
             # we'll be returning a scalar
@@ -359,16 +360,53 @@ def reduce(self, func):
 
         res = {}
         for blk in self.blocks:
-            bres = func(blk.values)
+            placement = blk.mgr_locs.as_array
+            if isinstance(blk, CategoricalBlock):
+                try:
+                    bres = func(blk.values)
+                except TypeError:
+                    # not all operations (eg any, all) are supported on
+                    # Categorical, so fallback to operating on dense array
+                    # eg pandas/tests/frame/test_analytics.py::TestDataFrameAnalytics::test_any_all_np_func
+                    bres = func(np.asarray(blk.values).reshape(1, len(blk.values)))
+            elif isinstance(blk, ObjectBlock):
+                try:
+                    bres = func(blk.values)
+                except TypeError:
+                    # object dtype can have different type of objects in
+                    # different columns, so for this specific case we need
+                    # to fall back to apply the function column-wise
+                    values = blk.values
+                    n_cols = values.shape[0]
+                    results = []
+                    locs = []
+                    for i in range(n_cols):
+                        # need to keep as 2D since the func expects that
+                        col_values = values[[i], :]
+                        try:
+                            col_res = func(col_values)
+                        except TypeError:
+                            if ignore_failures:
+                                pass
+                            else:
+                                raise
+                        else:
+                            results.extend(col_res.tolist())
+                            locs.append(placement[i])
+                    bres = np.array(results, dtype=object)
+                    placement = locs
+
+            else:
+                bres = func(blk.values)
 
             if np.ndim(bres) == 0:
                 # EA
                 assert blk.shape[0] == 1
-                new_res = zip(blk.mgr_locs.as_array, [bres])
+                new_res = zip(placement, [bres])
             else:
                 assert bres.ndim == 1, bres.shape
-                assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
-                new_res = zip(blk.mgr_locs.as_array, bres)
+                # assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
+                new_res = zip(placement, bres)
 
             nr = dict(new_res)
             assert not any(key in res for key in nr)
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 3a7df29ae9091..b230f4c848264 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -408,7 +408,7 @@ def test_stat_operators_attempt_obj_array(self, method):
         for df in [df1, df2]:
             assert df.values.dtype == np.object_
             result = getattr(df, method)(1)
-            expected = getattr(df.astype("f8"), method)(1)
+            expected = getattr(df, method)(1)
 
             if method in ["sum", "prod"]:
                 tm.assert_series_equal(result, expected)

From 594d2b0ce7cdd5e3f0f5c57a566d233c34872315 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 3 Apr 2020 09:29:16 +0200
Subject: [PATCH 08/20] TEMP

---
 pandas/core/frame.py  | 16 ++++++++++------
 pandas/core/series.py |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 33282e8325e82..d7926dcb47491 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -102,6 +102,7 @@
     ABCSeries,
 )
 from pandas.core.dtypes.missing import isna, notna
+from pandas.core.dtypes.base import ExtensionDtype
 
 from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
@@ -7967,7 +7968,10 @@ def _reduce(
             constructor = self._constructor
 
         def f(x):
-            return op(x, axis=axis, skipna=skipna, **kwds)
+            if isinstance(x.dtype, ExtensionDtype):
+                return x._values._reduce(name, skipna=skipna, **kwds)
+            else:
+                return op(x, axis=axis, skipna=skipna, **kwds)
 
         def _get_data(axis_matters):
             if filter_type is None:
@@ -8000,7 +8004,7 @@ def blk_func(values):
             return df._reduce_columns(blk_func)
 
         # if numeric_only is not None and axis in [0, 1]:
-        if axis in [0, 1]:
+        if numeric_only is not None and axis in [0, 1]:
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
@@ -8044,11 +8048,11 @@ def blk_func(values):
             #     #  why it is necessary in this case and this case alone
             #     out[:] = coerce_to_dtypes(out.values, df.dtypes)
             return out
-        else:
-            # axis is None
-            return f(self.values)
+        # else:
+        #     # axis is None
+        #     return f(self.values)
 
-        if not self._is_homogeneous_type:
+        if True: #not self._is_homogeneous_type:
             # try to avoid self.values call
 
             if filter_type is None and axis == 0 and len(self) > 0:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 877242523fa93..6a491d01a10eb 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -203,7 +203,7 @@ class Series(base.IndexOpsMixin, generic.NDFrame):
     def __init__(
         self, data=None, index=None, dtype=None, name=None, copy=False, fastpath=False
     ):
-
+        # breakpoint()
         # we are called internally, so short-circuit
         if fastpath:
 

From 5b0370e0751301aa7c3175deea76dda9d8193e3a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 29 May 2020 16:14:18 +0200
Subject: [PATCH 09/20] use iter_column_arrays

---
 pandas/core/frame.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index dd6a8c44dcc35..aea865a6cd6bb 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -67,6 +67,7 @@
     validate_percentile,
 )
 
+from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
     cast_scalar_to_array,
     coerce_to_dtypes,
@@ -112,7 +113,6 @@
     ABCSeries,
 )
 from pandas.core.dtypes.missing import isna, notna
-from pandas.core.dtypes.base import ExtensionDtype
 
 from pandas.core import algorithms, common as com, nanops, ops
 from pandas.core.accessor import CachedAccessor
@@ -8353,7 +8353,7 @@ def blk_func(values):
         #     # axis is None
         #     return f(self.values)
 
-        if True: #not self._is_homogeneous_type:
+        if True:  # not self._is_homogeneous_type:
             # try to avoid self.values call
 
             if filter_type is None and axis == 0 and len(self) > 0:
@@ -8443,9 +8443,8 @@ def _reduce_columns(self, op):
         """
         result = []
 
-        for i in range(len(self.columns)):
-            val = op(self._data.iget_values(i))
-            result.append(val)
+        for arr in self._iter_column_arrays():
+            result.append(op(arr))
 
         return self._constructor_sliced(result, index=self.columns)
 

From 7088cfcbbf37cff20fe06432dd5062bcabfe1f09 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 29 May 2020 18:00:06 +0200
Subject: [PATCH 10/20] intermediate clean-up: remove BM.reduce changes + do
 column-wise for axis=0

---
 pandas/core/frame.py              | 139 ++++++++++++++++--------------
 pandas/core/internals/managers.py |  48 ++---------
 2 files changed, 78 insertions(+), 109 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 441845386cf07..bddf9f1ff3e6e 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8507,7 +8507,7 @@ def _reduce(
         skipna, **kwds : keywords to pass to the `op` function
 
         """
-        column_wise = kwds.pop("column_wise", False)
+        # column_wise = kwds.pop("column_wise", False)
 
         assert filter_type is None or filter_type == "bool", filter_type
 
@@ -8561,69 +8561,96 @@ def _get_data(axis_matters):
                 raise NotImplementedError(msg)
             return data
 
-        def blk_func(values):
-            if isinstance(values, ExtensionArray):
-                return values._reduce(name, skipna=skipna, **kwds)
-            else:
-                return op(values, axis=1, skipna=skipna, **kwds)
-
-        if axis == 0 and column_wise:
-            # column-wise reduction
-            df = self
-            if numeric_only is True:
-                df = _get_data(axis_matters=True)
-            return df._reduce_columns(blk_func)
-
-        # if numeric_only is not None and axis in [0, 1]:
-        if numeric_only is not None and axis in [0, 1]:
+        # special case for block-wise
+        if (
+            not self._mgr.any_extension_types
+            and numeric_only is not None
+            and axis in [0, 1]
+        ):
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
             if axis == 1:
                 df = df.T
-                # axis = 0
+                axis = 0
 
             out_dtype = "bool" if filter_type == "bool" else None
 
+            def blk_func(values):
+                if isinstance(values, ExtensionArray):
+                    return values._reduce(name, skipna=skipna, **kwds)
+                else:
+                    return op(values, axis=1, skipna=skipna, **kwds)
+
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
-            try:
-                res = df._mgr.reduce(blk_func, ignore_failures=numeric_only is None)
-            except TypeError:
-                # if block-wise fails and numeric_only was None, we try
-                # again after removing non-numerical columns.
-                # (got here with mixed float + string frame and axis=1 -> need
-                # to remove non-numerical columns before transposing)
-                if numeric_only is None:
-                    df = _get_data(axis_matters=True)
-                    if axis == 1:
-                        df = df.T
-                else:
-                    raise
-                res = df._mgr.reduce(blk_func)
+            res = df._mgr.reduce(blk_func)
 
             # breakpoint()
             assert isinstance(res, dict)
+            if len(res):
+                assert len(res) == max(list(res.keys())) + 1, res.keys()
+            out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
+            out.index = df.columns
+            if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
+                # FIXME: needs_i8_conversion check is kludge, not sure
+                #  why it is necessary in this case and this case alone
+                out[:] = coerce_to_dtypes(out.values, df.dtypes)
+            return out
+
+        def array_func(values):
+            if isinstance(values, ExtensionArray):
+                return values._reduce(name, skipna=skipna, **kwds)
+            else:
+                return op(values, skipna=skipna, **kwds)
+
+        # all other options with axis=1 are done column-array-wise
+        if axis == 0:
+            # column-wise reduction
 
-            # if len(res):
-            #     assert len(res) == max(list(res.keys())) + 1, res.keys()
+            def _constructor(df, result, index=None):
+                index = index if index is not None else df.columns
+                if len(result):
+                    return df._constructor_sliced(result, index=index)
+                else:
+                    return df._constructor_sliced(result, index=index, dtype="float64")
+
+            def _reduce_columns(df, op):
+                result = [op(arr) for arr in df._iter_column_arrays()]
+                return _constructor(df, result)
+
+            df = self
+            if numeric_only is True:
+                df = _get_data(axis_matters=True)
 
-            out = df._constructor_sliced(
-                res, index=list(res.keys()), dtype=out_dtype
-            ).sort_index()
-            if len(res) < len(df.columns):
-                out.index = df.columns[np.sort(list(res.keys()))]
+            if numeric_only is not None:
+                return _reduce_columns(df, array_func)
             else:
-                out.index = df.columns
+                # need to catch and ignore exceptions when numeric_
+                try:
+                    return _reduce_columns(df, array_func)
+                except TypeError:
+                    # if column-wise fails and numeric_only was None, we try
+                    # again after removing non-numerical columns.
+                    # (got here with mixed float + string frame and axis=1 -> need
+                    # to remove non-numerical columns before transposing)
+
+                    # df = _get_data(axis_matters=True)
+                    # return _reduce_columns(df, array_func)
+                    result = []
+                    indices = []
+                    for i, arr in enumerate(df._iter_column_arrays()):
+                        try:
+                            res = array_func(arr)
+                        except Exception:
+                            pass
+                        else:
+                            result.append(res)
+                            indices.append(i)
 
-            # if axis == 0 and is_object_dtype(out.dtype):
-            #     out[:] = coerce_to_dtypes(out.values, df.dtypes)
-            return out
-        # else:
-        #     # axis is None
-        #     return f(self.values)
+                    return _constructor(df, result, index=df.columns[indices])
 
-        if True:  # not self._is_homogeneous_type:
+        if not self._is_homogeneous_type:
             # try to avoid self.values call
 
             if filter_type is None and axis == 0 and len(self) > 0:
@@ -8698,26 +8725,6 @@ def blk_func(values):
             result = self._constructor_sliced(result, index=labels)
         return result
 
-    def _reduce_columns(self, op):
-        """
-        Reduce DataFrame column-wise.
-
-        Parameters
-        ----------
-        op : func
-            The reducing function to be called on the values.
-
-        Returns
-        -------
-        Series
-        """
-        result = []
-
-        for arr in self._iter_column_arrays():
-            result.append(op(arr))
-
-        return self._constructor_sliced(result, index=self.columns)
-
     def nunique(self, axis=0, dropna=True) -> Series:
         """
         Count distinct observations over requested axis.
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
index 8364ee4b9a9a2..e496694ee7899 100644
--- a/pandas/core/internals/managers.py
+++ b/pandas/core/internals/managers.py
@@ -41,7 +41,6 @@
     CategoricalBlock,
     DatetimeTZBlock,
     ExtensionBlock,
-    ObjectBlock,
     ObjectValuesExtensionBlock,
     _extend_blocks,
     _safe_reshape,
@@ -328,7 +327,7 @@ def _verify_integrity(self) -> None:
                 f"tot_items: {tot_items}"
             )
 
-    def reduce(self, func, ignore_failures=False):
+    def reduce(self, func):
         # If 2D, we assume that we're operating column-wise
         if self.ndim == 1:
             # we'll be returning a scalar
@@ -337,53 +336,16 @@ def reduce(self, func, ignore_failures=False):
 
         res = {}
         for blk in self.blocks:
-            placement = blk.mgr_locs.as_array
-            if isinstance(blk, CategoricalBlock):
-                try:
-                    bres = func(blk.values)
-                except TypeError:
-                    # not all operations (eg any, all) are supported on
-                    # Categorical, so fallback to operating on dense array
-                    # eg pandas/tests/frame/test_analytics.py::TestDataFrameAnalytics::test_any_all_np_func
-                    bres = func(np.asarray(blk.values).reshape(1, len(blk.values)))
-            elif isinstance(blk, ObjectBlock):
-                try:
-                    bres = func(blk.values)
-                except TypeError:
-                    # object dtype can have different type of objects in
-                    # different columns, so for this specific case we need
-                    # to fall back to apply the function column-wise
-                    values = blk.values
-                    n_cols = values.shape[0]
-                    results = []
-                    locs = []
-                    for i in range(n_cols):
-                        # need to keep as 2D since the func expects that
-                        col_values = values[[i], :]
-                        try:
-                            col_res = func(col_values)
-                        except TypeError:
-                            if ignore_failures:
-                                pass
-                            else:
-                                raise
-                        else:
-                            results.extend(col_res.tolist())
-                            locs.append(placement[i])
-                    bres = np.array(results, dtype=object)
-                    placement = locs
-
-            else:
-                bres = func(blk.values)
+            bres = func(blk.values)
 
             if np.ndim(bres) == 0:
                 # EA
                 assert blk.shape[0] == 1
-                new_res = zip(placement, [bres])
+                new_res = zip(blk.mgr_locs.as_array, [bres])
             else:
                 assert bres.ndim == 1, bres.shape
-                # assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
-                new_res = zip(placement, bres)
+                assert blk.shape[0] == len(bres), (blk.shape, bres.shape)
+                new_res = zip(blk.mgr_locs.as_array, bres)
 
             nr = dict(new_res)
             assert not any(key in res for key in nr)

From 925d660cb2c8d8f9eb7ccb03fd396f9412674418 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 29 May 2020 18:01:43 +0200
Subject: [PATCH 11/20] fixup

---
 pandas/core/frame.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index bddf9f1ff3e6e..826827e029145 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8508,6 +8508,7 @@ def _reduce(
 
         """
         # column_wise = kwds.pop("column_wise", False)
+        kwds.pop("column_wise", False)
 
         assert filter_type is None or filter_type == "bool", filter_type
 

From 852331e167e87b4e6e603cc61fc69f36c3f5767e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 30 May 2020 09:45:05 +0200
Subject: [PATCH 12/20] fix dtype of empty result

---
 pandas/core/frame.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 826827e029145..98dffc630e00d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8605,16 +8605,16 @@ def array_func(values):
             else:
                 return op(values, skipna=skipna, **kwds)
 
-        # all other options with axis=1 are done column-array-wise
+        # all other options with axis=0 are done column-array-wise
         if axis == 0:
-            # column-wise reduction
 
             def _constructor(df, result, index=None):
                 index = index if index is not None else df.columns
                 if len(result):
                     return df._constructor_sliced(result, index=index)
                 else:
-                    return df._constructor_sliced(result, index=index, dtype="float64")
+                    dtype = "bool" if filter_type == "bool" else "float64"
+                    return df._constructor_sliced(result, index=index, dtype=dtype)
 
             def _reduce_columns(df, op):
                 result = [op(arr) for arr in df._iter_column_arrays()]
@@ -8632,9 +8632,7 @@ def _reduce_columns(df, op):
                     return _reduce_columns(df, array_func)
                 except TypeError:
                     # if column-wise fails and numeric_only was None, we try
-                    # again after removing non-numerical columns.
-                    # (got here with mixed float + string frame and axis=1 -> need
-                    # to remove non-numerical columns before transposing)
+                    # again but removing those columns for which it fails
 
                     # df = _get_data(axis_matters=True)
                     # return _reduce_columns(df, array_func)

From 34731f213f8ef3f8841d7b012dfd0162b1df2e0f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 6 Jun 2020 10:23:41 +0200
Subject: [PATCH 13/20] clean-up

---
 pandas/core/frame.py   | 19 ++++---------------
 pandas/core/generic.py | 10 +---------
 pandas/core/series.py  |  1 -
 3 files changed, 5 insertions(+), 25 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 932dcb9b12469..4da72782ec02a 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -72,7 +72,6 @@
     validate_percentile,
 )
 
-from pandas.core.dtypes.base import ExtensionDtype
 from pandas.core.dtypes.cast import (
     cast_scalar_to_array,
     coerce_to_dtypes,
@@ -8498,8 +8497,6 @@ def _reduce(
         skipna, **kwds : keywords to pass to the `op` function
 
         """
-        # column_wise = kwds.pop("column_wise", False)
-        kwds.pop("column_wise", False)
 
         assert filter_type is None or filter_type == "bool", filter_type
 
@@ -8531,10 +8528,7 @@ def _reduce(
             constructor = self._constructor
 
         def f(x):
-            if isinstance(x.dtype, ExtensionDtype):
-                return x._values._reduce(name, skipna=skipna, **kwds)
-            else:
-                return op(x, axis=axis, skipna=skipna, **kwds)
+            return op(x, axis=axis, skipna=skipna, **kwds)
 
         def _get_data(axis_matters):
             if filter_type is None:
@@ -8578,15 +8572,12 @@ def blk_func(values):
             #  simple case where we can use BlockManager._reduce
             res = df._mgr.reduce(blk_func)
 
-            # breakpoint()
             assert isinstance(res, dict)
             if len(res):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()
             out = df._constructor_sliced(res, index=range(len(res)), dtype=out_dtype)
             out.index = df.columns
-            if axis == 0 and df.dtypes.apply(needs_i8_conversion).any():
-                # FIXME: needs_i8_conversion check is kludge, not sure
-                #  why it is necessary in this case and this case alone
+            if axis == 0 and is_object_dtype(out.dtype):
                 out[:] = coerce_to_dtypes(out.values, df.dtypes)
             return out
 
@@ -8604,6 +8595,7 @@ def _constructor(df, result, index=None):
                 if len(result):
                     return df._constructor_sliced(result, index=index)
                 else:
+                    # set correct dtype for empty result
                     dtype = "bool" if filter_type == "bool" else "float64"
                     return df._constructor_sliced(result, index=index, dtype=dtype)
 
@@ -8618,15 +8610,12 @@ def _reduce_columns(df, op):
             if numeric_only is not None:
                 return _reduce_columns(df, array_func)
             else:
-                # need to catch and ignore exceptions when numeric_
+                # need to catch and ignore exceptions when numeric_only=None
                 try:
                     return _reduce_columns(df, array_func)
                 except TypeError:
                     # if column-wise fails and numeric_only was None, we try
                     # again but removing those columns for which it fails
-
-                    # df = _get_data(axis_matters=True)
-                    # return _reduce_columns(df, array_func)
                     result = []
                     indices = []
                     for i, arr in enumerate(df._iter_column_arrays()):
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c7c4f9e0fb5b9..41f828bb84705 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -11336,7 +11336,6 @@ def stat_func(
         min_count=0,
         **kwargs,
     ):
-        column_wise = kwargs.pop("column_wise", False)
         if name == "sum":
             nv.validate_sum(tuple(), kwargs)
         elif name == "prod":
@@ -11358,7 +11357,6 @@ def stat_func(
             skipna=skipna,
             numeric_only=numeric_only,
             min_count=min_count,
-            column_wise=column_wise,
         )
 
     return set_function_name(stat_func, name, cls)
@@ -11388,7 +11386,6 @@ def _make_stat_function(
     def stat_func(
         self, axis=None, skipna=None, level=None, numeric_only=None, **kwargs
     ):
-        column_wise = kwargs.pop("column_wise", False)
         if name == "median":
             nv.validate_median(tuple(), kwargs)
         else:
@@ -11400,12 +11397,7 @@ def stat_func(
         if level is not None:
             return self._agg_by_level(name, axis=axis, level=level, skipna=skipna)
         return self._reduce(
-            func,
-            name=name,
-            axis=axis,
-            skipna=skipna,
-            numeric_only=numeric_only,
-            column_wise=column_wise,
+            func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only,
         )
 
     return set_function_name(stat_func, name, cls)
diff --git a/pandas/core/series.py b/pandas/core/series.py
index 4f8d23cb4afd2..6b5ed86027806 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -4135,7 +4135,6 @@ def _reduce(
         If we have an ndarray as a value, then simply perform the operation,
         otherwise delegate to the object.
         """
-        kwds.pop("column_wise", None)
         delegate = self._values
 
         if axis is not None:

From a8e61d01d97562d4fb51415fc8a3f56990b10561 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 6 Jun 2020 10:27:04 +0200
Subject: [PATCH 14/20] whitespace

---
 pandas/core/frame.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 4da72782ec02a..55da3b7833157 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8571,7 +8571,6 @@ def blk_func(values):
             # After possibly _get_data and transposing, we are now in the
             #  simple case where we can use BlockManager._reduce
             res = df._mgr.reduce(blk_func)
-
             assert isinstance(res, dict)
             if len(res):
                 assert len(res) == max(list(res.keys())) + 1, res.keys()

From 15ec9b6333591210d1857f18e343e8efd35755ad Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 21:35:44 +0200
Subject: [PATCH 15/20] add test case for GH34520, copied from GH35112

Co-authored-by: Simon Hawkins <simonjayhawkins@gmail.com>
---
 pandas/tests/frame/test_analytics.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index 7e67fc40a6d90..cde9420cf3066 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1303,3 +1303,12 @@ def test_preserve_timezone(self, initial: str, method):
         df = DataFrame([expected])
         result = getattr(df, method)(axis=1)
         tm.assert_series_equal(result, expected)
+
+
+def test_mixed_frame_with_integer_sum():
+    # https://github.com/pandas-dev/pandas/issues/34520
+    df = pd.DataFrame([["a", 1]], columns=list("ab"))
+    df = df.astype({"b": "Int64"})
+    result = df.sum()
+    expected = pd.Series(["a", 1], index=["a", "b"])
+    tm.assert_series_equal(result, expected)

From 2653d0298922a6f93aa46d84390b697e111ca484 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 21:46:38 +0200
Subject: [PATCH 16/20] add test to ensure EA op is used for integer array

---
 pandas/tests/arrays/integer/test_function.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/pandas/tests/arrays/integer/test_function.py b/pandas/tests/arrays/integer/test_function.py
index 44c3077228e80..a81434339fdae 100644
--- a/pandas/tests/arrays/integer/test_function.py
+++ b/pandas/tests/arrays/integer/test_function.py
@@ -133,6 +133,15 @@ def test_integer_array_numpy_sum(values, expected):
     assert result == expected
 
 
+@pytest.mark.parametrize("op", ["sum", "prod", "min", "max"])
+def test_dataframe_reductions(op):
+    # https://github.com/pandas-dev/pandas/pull/32867
+    # ensure the integers are not cast to float during reductions
+    df = pd.DataFrame({"a": pd.array([1, 2], dtype="Int64")})
+    result = df.max()
+    assert isinstance(result["a"], np.int64)
+
+
 # TODO(jreback) - these need testing / are broken
 
 # shift

From 64e0069ea9052415d03523dda25eb4cc1123834b Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 21:59:41 +0200
Subject: [PATCH 17/20] remove try except

---
 pandas/core/frame.py | 36 ++++++++++++++----------------------
 1 file changed, 14 insertions(+), 22 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 08e5b5115c5de..07b567651dcda 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8618,35 +8618,27 @@ def _constructor(df, result, index=None):
                     dtype = "bool" if filter_type == "bool" else "float64"
                     return df._constructor_sliced(result, index=index, dtype=dtype)
 
-            def _reduce_columns(df, op):
-                result = [op(arr) for arr in df._iter_column_arrays()]
-                return _constructor(df, result)
-
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)
 
             if numeric_only is not None:
-                return _reduce_columns(df, array_func)
+                result = [op(arr) for arr in df._iter_column_arrays()]
+                return _constructor(df, result)
             else:
-                # need to catch and ignore exceptions when numeric_only=None
-                try:
-                    return _reduce_columns(df, array_func)
-                except TypeError:
-                    # if column-wise fails and numeric_only was None, we try
-                    # again but removing those columns for which it fails
-                    result = []
-                    indices = []
-                    for i, arr in enumerate(df._iter_column_arrays()):
-                        try:
-                            res = array_func(arr)
-                        except Exception:
-                            pass
-                        else:
-                            result.append(res)
-                            indices.append(i)
+                # with numeric_only=None, need to ignore exceptions per column
+                result = []
+                indices = []
+                for i, arr in enumerate(df._iter_column_arrays()):
+                    try:
+                        res = array_func(arr)
+                    except Exception:
+                        pass
+                    else:
+                        result.append(res)
+                        indices.append(i)
 
-                    return _constructor(df, result, index=df.columns[indices])
+                return _constructor(df, result, index=df.columns[indices])
 
         if not self._is_homogeneous_type:
             # try to avoid self.values call

From bb0a47bae9c709841714484b0fadf42cd85d7833 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 22:21:51 +0200
Subject: [PATCH 18/20] remove unused code

---
 pandas/core/frame.py | 28 +---------------------------
 1 file changed, 1 insertion(+), 27 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 07b567651dcda..f92e736fc50d1 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8640,33 +8640,7 @@ def _constructor(df, result, index=None):
 
                 return _constructor(df, result, index=df.columns[indices])
 
-        if not self._is_homogeneous_type:
-            # try to avoid self.values call
-
-            if filter_type is None and axis == 0 and len(self) > 0:
-                # operate column-wise
-
-                # numeric_only must be None here, as other cases caught above
-                # require len(self) > 0 bc frame_apply messes up empty prod/sum
-
-                # this can end up with a non-reduction
-                # but not always. if the types are mixed
-                # with datelike then need to make sure a series
-
-                # we only end up here if we have not specified
-                # numeric_only and yet we have tried a
-                # column-by-column reduction, where we have mixed type.
-                # So let's just do what we can
-                from pandas.core.apply import frame_apply
-
-                opa = frame_apply(
-                    self, func=f, result_type="expand", ignore_failures=True
-                )
-                result = opa.get_result()
-                if result.ndim == self.ndim:
-                    result = result.iloc[0].rename(None)
-                return result
-
+        # remaining cases for axis=1 or axis=None
         if numeric_only is None:
             data = self
             values = data.values

From 9323f0e74be37cf9239673b2810c2750419051e5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 22:32:12 +0200
Subject: [PATCH 19/20] add test for GH32651, copied from GH34210

Co-authored-by: Simon Hawkins <simonjayhawkins@gmail.com>
---
 pandas/core/frame.py                 |  2 +-
 pandas/tests/frame/test_analytics.py | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f92e736fc50d1..f8ac2ac65ee55 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8623,7 +8623,7 @@ def _constructor(df, result, index=None):
                 df = _get_data(axis_matters=True)
 
             if numeric_only is not None:
-                result = [op(arr) for arr in df._iter_column_arrays()]
+                result = [array_func(arr) for arr in df._iter_column_arrays()]
                 return _constructor(df, result)
             else:
                 # with numeric_only=None, need to ignore exceptions per column
diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py
index cde9420cf3066..7c473fb9c6847 100644
--- a/pandas/tests/frame/test_analytics.py
+++ b/pandas/tests/frame/test_analytics.py
@@ -1312,3 +1312,17 @@ def test_mixed_frame_with_integer_sum():
     result = df.sum()
     expected = pd.Series(["a", 1], index=["a", "b"])
     tm.assert_series_equal(result, expected)
+
+
+@pytest.mark.parametrize("numeric_only", [True, False, None])
+@pytest.mark.parametrize("method", ["min", "max"])
+def test_minmax_extensionarray(method, numeric_only):
+    # https://github.com/pandas-dev/pandas/issues/32651
+    int64_info = np.iinfo("int64")
+    ser = Series([int64_info.max, None, int64_info.min], dtype=pd.Int64Dtype())
+    df = DataFrame({"Int64": ser})
+    result = getattr(df, method)(numeric_only=numeric_only)
+    expected = Series(
+        [getattr(int64_info, method)], index=pd.Index(["Int64"], dtype="object")
+    )
+    tm.assert_series_equal(result, expected)

From eb33f8630bb476c446cdc8e8c2e85f3ccc516b9f Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sun, 12 Jul 2020 22:35:34 +0200
Subject: [PATCH 20/20] remove check for EAs for block-wise path

---
 doc/source/whatsnew/v1.1.0.rst | 1 +
 pandas/core/frame.py           | 6 +-----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 5f93e08d51baa..2f23de6a45516 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -1149,6 +1149,7 @@ ExtensionArray
 - Fixed bug that `DataFrame(columns=.., dtype='string')` would fail (:issue:`27953`, :issue:`33623`)
 - Bug where :class:`DataFrame` column set to scalar extension type was considered an object type rather than the extension type (:issue:`34832`)
 - Fixed bug in ``IntegerArray.astype`` to correctly copy the mask as well (:issue:`34931`).
+- Fixed bug where DataFrame reductions with Int64 columns casts to float64 (:issue:`32651`)
 
 Other
 ^^^^^
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index f8ac2ac65ee55..be60c4b504410 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -8568,11 +8568,7 @@ def _get_data(axis_matters):
             return data
 
         # special case for block-wise
-        if (
-            not self._mgr.any_extension_types
-            and numeric_only is not None
-            and axis in [0, 1]
-        ):
+        if numeric_only is not None and axis in [0, 1]:
             df = self
             if numeric_only is True:
                 df = _get_data(axis_matters=True)