From 9795f353bc8654803797a50965357aaefb93b97e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 13 Jan 2020 21:29:44 +0100
Subject: [PATCH 01/18] POC masked ops for reductions

---
 pandas/core/arrays/integer.py       |  5 ++++-
 pandas/core/ops/mask_ops.py         | 29 +++++++++++++++++++++++++++++
 pandas/tests/arrays/test_integer.py |  2 +-
 3 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index cb1e7115cd3c2..dad00b4322696 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -24,7 +24,7 @@
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops, ops
-from pandas.core.ops import invalid_comparison
+from pandas.core.ops import invalid_comparison, mask_ops
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.tools.numeric import to_numeric
 
@@ -548,6 +548,9 @@ def _reduce(self, name, skipna=True, **kwargs):
         data = self._data
         mask = self._mask
 
+        if name == "sum":
+            return mask_ops.sum(data, mask, skipna=skipna)
+
         # coerce to a nan-aware float if needed
         if mask.any():
             data = self._data.astype("float64")
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 8fb81faf313d7..fc204a29bdda2 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -6,6 +6,7 @@
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
+from pandas.compat.numpy import _np_version_under1p17
 
 
 def kleene_or(
@@ -176,3 +177,31 @@ def kleene_and(
 def raise_for_nan(value, method):
     if lib.is_float(value) and np.isnan(value):
         raise ValueError(f"Cannot perform logical '{method}' with floating NaN")
+
+
+def sum(
+    values: np.ndarray, mask: np.ndarray, skipna: bool,
+):
+    """
+    Sum for 1D masked array.
+
+    Parameters
+    ----------
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (False for missing)
+    skipna: bool, default True
+        Whether to skip NA.
+    """
+    if not skipna:
+        if mask.any():
+            return libmissing.NA
+        else:
+            return np.sum(values)
+    else:
+        if _np_version_under1p17:
+            return np.sum(values[mask])
+        else:
+            return np.sum(values, where=~mask)
diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index f1a7cc741603d..9bfaa3b433fce 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -887,7 +887,7 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    assert isinstance(result, int)
+    assert isinstance(result, np.int64)
 
     # groupby
     result = getattr(df.groupby("A"), op)()

From cd2692055521343479d275f5db8912eade6d1b5a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 11:05:52 +0100
Subject: [PATCH 02/18] fix mask for older numpy

---
 pandas/core/ops/mask_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index fc204a29bdda2..c42f99e9a912a 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -202,6 +202,6 @@ def sum(
             return np.sum(values)
     else:
         if _np_version_under1p17:
-            return np.sum(values[mask])
+            return np.sum(values[~mask])
         else:
             return np.sum(values, where=~mask)

From 28cd331e4d0e4384850b58d9c700c5779d1a6a3c Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 11:10:12 +0100
Subject: [PATCH 03/18] also use in boolean

---
 pandas/core/arrays/boolean.py | 6 +++++-
 pandas/core/arrays/integer.py | 2 +-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 590b40b0434e5..7ebcbd5fe8417 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -28,6 +28,7 @@
 
 from pandas.core import nanops, ops
 from pandas.core.indexers import check_array_indexer
+from pandas.core.ops import mask_ops
 
 from .masked import BaseMaskedArray
 
@@ -697,6 +698,9 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         data = self._data
         mask = self._mask
 
+        if name == "sum":
+            return mask_ops.sum(data, mask, skipna=skipna)
+
         # coerce to a nan-aware float if needed
         if self._hasna:
             data = self.to_numpy("float64", na_value=np.nan)
@@ -708,7 +712,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
             return libmissing.NA
 
         # if we have numeric op that would result in an int, coerce to int if possible
-        if name in ["sum", "prod"] and notna(result):
+        if name == "prod" and notna(result):
             int_result = np.int64(result)
             if int_result == result:
                 result = int_result
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 3064566a25751..2d3da458f26a5 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -587,7 +587,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
 
         # if we have a preservable numeric op,
         # provide coercion back to an integer type if possible
-        elif name in ["sum", "min", "max", "prod"]:
+        elif name in ["min", "max", "prod"]:
             # GH#31409 more performant than casting-then-checking
             result = com.cast_scalar_indexer(result)
 

From 6298fbd0d136c80cd8788e9094274b31c69eb885 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 12:01:30 +0100
Subject: [PATCH 04/18] add min_count support

---
 pandas/core/arrays/integer.py              |  2 +-
 pandas/core/ops/mask_ops.py                | 28 ++++++++++++++++++++--
 pandas/tests/reductions/test_reductions.py | 27 +++++++++++++--------
 3 files changed, 44 insertions(+), 13 deletions(-)

diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 2d3da458f26a5..7565fd86337c7 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -568,7 +568,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         mask = self._mask
 
         if name == "sum":
-            return mask_ops.sum(data, mask, skipna=skipna)
+            return mask_ops.sum(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         # (we explicitly use NaN within reductions)
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index c42f99e9a912a..53a12019ad167 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -180,7 +180,7 @@ def raise_for_nan(value, method):
 
 
 def sum(
-    values: np.ndarray, mask: np.ndarray, skipna: bool,
+    values: np.ndarray, mask: np.ndarray, skipna: bool, min_count: int = 0,
 ):
     """
     Sum for 1D masked array.
@@ -192,16 +192,40 @@ def sum(
         operation).
     mask : np.ndarray
         Boolean numpy array (False for missing)
-    skipna: bool, default True
+    skipna : bool, default True
         Whether to skip NA.
+    min_count : int, default 0
+        The required number of valid values to perform the operation. If fewer than
+        ``min_count`` non-NA values are present the result will be NA.
     """
     if not skipna:
         if mask.any():
             return libmissing.NA
         else:
+            if _below_min_count(values, None, min_count):
+                return libmissing.NA
             return np.sum(values)
     else:
+        if _below_min_count(values, mask, min_count):
+            return libmissing.NA
+
         if _np_version_under1p17:
             return np.sum(values[~mask])
         else:
             return np.sum(values, where=~mask)
+
+
+def _below_min_count(values, mask, min_count):
+    """
+    Check for the `min_count` keyword. Returns True if below `min_count` (when
+    pd.NA should be returned from the reduction).
+    """
+    if min_count > 0:
+        if mask is None:
+            # no missing values, only check size
+            non_nulls = values.size
+        else:
+            non_nulls = mask.size - mask.sum()
+        if non_nulls < min_count:
+            return True
+    return False
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 0b312fe2f8990..0815e0cdb441c 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -528,13 +528,14 @@ def test_sum_inf(self):
         res = nanops.nansum(arr, axis=1)
         assert np.isinf(res).all()
 
+    @pytest.mark.parametrize("dtype", ["float64", "Int64"])
     @pytest.mark.parametrize("use_bottleneck", [True, False])
     @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
-    def test_empty(self, method, unit, use_bottleneck):
+    def test_empty(self, method, unit, use_bottleneck, dtype):
         with pd.option_context("use_bottleneck", use_bottleneck):
             # GH#9422 / GH#18921
             # Entirely empty
-            s = Series([], dtype=object)
+            s = Series([], dtype=dtype)
             # NA by default
             result = getattr(s, method)()
             assert result == unit
@@ -557,8 +558,14 @@ def test_empty(self, method, unit, use_bottleneck):
             result = getattr(s, method)(skipna=True, min_count=1)
             assert pd.isna(result)
 
+            result = getattr(s, method)(skipna=False, min_count=0)
+            assert result == unit
+
+            result = getattr(s, method)(skipna=False, min_count=1)
+            assert pd.isna(result)
+
             # All-NA
-            s = Series([np.nan])
+            s = Series([np.nan], dtype=dtype)
             # NA by default
             result = getattr(s, method)()
             assert result == unit
@@ -582,7 +589,7 @@ def test_empty(self, method, unit, use_bottleneck):
             assert pd.isna(result)
 
             # Mix of valid, empty
-            s = Series([np.nan, 1])
+            s = Series([np.nan, 1], dtype=dtype)
             # Default
             result = getattr(s, method)()
             assert result == 1.0
@@ -601,22 +608,22 @@ def test_empty(self, method, unit, use_bottleneck):
             result = getattr(s, method)(skipna=True, min_count=0)
             assert result == 1.0
 
-            result = getattr(s, method)(skipna=True, min_count=1)
-            assert result == 1.0
-
             # GH#844 (changed in GH#9422)
             df = DataFrame(np.empty((10, 0)))
             assert (getattr(df, method)(1) == unit).all()
 
-            s = pd.Series([1])
+            s = pd.Series([1], dtype=dtype)
             result = getattr(s, method)(min_count=2)
             assert pd.isna(result)
 
-            s = pd.Series([np.nan])
+            result = getattr(s, method)(skipna=False, min_count=2)
+            assert pd.isna(result)
+
+            s = pd.Series([np.nan], dtype=dtype)
             result = getattr(s, method)(min_count=2)
             assert pd.isna(result)
 
-            s = pd.Series([np.nan, 1])
+            s = pd.Series([np.nan, 1], dtype=dtype)
             result = getattr(s, method)(min_count=2)
             assert pd.isna(result)
 

From 735a741c33b72ea5a97bc3c719f74d052c8106dd Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 12:03:56 +0100
Subject: [PATCH 05/18] fix preserve_dtypes test

---
 pandas/tests/arrays/test_integer.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/test_integer.py b/pandas/tests/arrays/test_integer.py
index da9a74296a486..e34da6e45afc3 100644
--- a/pandas/tests/arrays/test_integer.py
+++ b/pandas/tests/arrays/test_integer.py
@@ -911,7 +911,10 @@ def test_preserve_dtypes(op):
 
     # op
     result = getattr(df.C, op)()
-    assert isinstance(result, np.int64)
+    if op == "sum":
+        assert isinstance(result, np.int64)
+    else:
+        assert isinstance(result, int)
 
     # groupby
     result = getattr(df.groupby("A"), op)()

From 6df454fc16b1e38fd336bf71e71015a545b82774 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 12:06:34 +0100
Subject: [PATCH 06/18] passthrough min_count for boolean as well

---
 pandas/core/arrays/boolean.py              | 2 +-
 pandas/tests/reductions/test_reductions.py | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 7ebcbd5fe8417..12cc1f9faacbd 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -699,7 +699,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         mask = self._mask
 
         if name == "sum":
-            return mask_ops.sum(data, mask, skipna=skipna)
+            return mask_ops.sum(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         if self._hasna:
diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index 0815e0cdb441c..7596b88309592 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -528,7 +528,7 @@ def test_sum_inf(self):
         res = nanops.nansum(arr, axis=1)
         assert np.isinf(res).all()
 
-    @pytest.mark.parametrize("dtype", ["float64", "Int64"])
+    @pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean"])
     @pytest.mark.parametrize("use_bottleneck", [True, False])
     @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
     def test_empty(self, method, unit, use_bottleneck, dtype):
@@ -609,7 +609,7 @@ def test_empty(self, method, unit, use_bottleneck, dtype):
             assert result == 1.0
 
             # GH#844 (changed in GH#9422)
-            df = DataFrame(np.empty((10, 0)))
+            df = DataFrame(np.empty((10, 0)), dtype=dtype)
             assert (getattr(df, method)(1) == unit).all()
 
             s = pd.Series([1], dtype=dtype)

From 5eb48d6d6a19d250c792dec53530aaef1998c90d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 13 Feb 2020 23:16:16 +0100
Subject: [PATCH 07/18] fix comment

---
 pandas/core/ops/mask_ops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 53a12019ad167..5e760e3d6da93 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -191,7 +191,7 @@ def sum(
         Numpy array with the values (can be of any dtype that support the
         operation).
     mask : np.ndarray
-        Boolean numpy array (False for missing)
+        Boolean numpy array (True values indicate missing values).
     skipna : bool, default True
         Whether to skip NA.
     min_count : int, default 0

From d2230fd6d3c106ba5c672071c0d671931ddd70e3 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 20 Feb 2020 16:51:28 +0100
Subject: [PATCH 08/18] add object to empty reduction test case

---
 pandas/tests/reductions/test_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py
index bf353bdf5621d..562831a912f3b 100644
--- a/pandas/tests/reductions/test_reductions.py
+++ b/pandas/tests/reductions/test_reductions.py
@@ -533,7 +533,7 @@ def test_sum_inf(self):
         res = nanops.nansum(arr, axis=1)
         assert np.isinf(res).all()
 
-    @pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean"])
+    @pytest.mark.parametrize("dtype", ["float64", "Int64", "boolean", "object"])
     @pytest.mark.parametrize("use_bottleneck", [True, False])
     @pytest.mark.parametrize("method, unit", [("sum", 0.0), ("prod", 1.0)])
     def test_empty(self, method, unit, use_bottleneck, dtype):

From 19ac821644ea857ee98685459e458411850ebe90 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 21 Feb 2020 09:55:45 +0100
Subject: [PATCH 09/18] test platform int

---
 pandas/tests/arrays/test_boolean.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py
index d14d6f3ff0c41..58deaa32290c0 100644
--- a/pandas/tests/arrays/test_boolean.py
+++ b/pandas/tests/arrays/test_boolean.py
@@ -827,7 +827,7 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
         s = s.dropna()
 
     if op in ("sum", "prod"):
-        assert isinstance(getattr(s, op)(), np.int64)
+        assert isinstance(getattr(s, op)(), np.intp)
     elif op in ("min", "max"):
         assert isinstance(getattr(s, op)(), np.bool_)
     else:

From 277643694c4bbeed8bd0df2a29999f8c6c8facbb Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 21 Feb 2020 10:37:34 +0100
Subject: [PATCH 10/18] Test sum separately with platform int

---
 pandas/tests/arrays/test_boolean.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/arrays/test_boolean.py b/pandas/tests/arrays/test_boolean.py
index 58deaa32290c0..0872630b6e979 100644
--- a/pandas/tests/arrays/test_boolean.py
+++ b/pandas/tests/arrays/test_boolean.py
@@ -826,8 +826,10 @@ def test_reductions_return_types(dropna, data, all_numeric_reductions):
     if dropna:
         s = s.dropna()
 
-    if op in ("sum", "prod"):
-        assert isinstance(getattr(s, op)(), np.intp)
+    if op == "sum":
+        assert isinstance(getattr(s, op)(), np.int_)
+    elif op == "prod":
+        assert isinstance(getattr(s, op)(), np.int64)
     elif op in ("min", "max"):
         assert isinstance(getattr(s, op)(), np.bool_)
     else:

From 18d5bfa7a4fe35cf3d1efeb1a8030dcfef8d19dc Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Fri, 20 Mar 2020 20:26:21 +0100
Subject: [PATCH 11/18] share min_count checking helper function with nanops

---
 pandas/core/nanops.py       | 22 +++++++++++++++++-----
 pandas/core/ops/mask_ops.py | 22 ++++------------------
 2 files changed, 21 insertions(+), 23 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index a5e70bd279d21..5b9d2509a3f22 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1260,16 +1260,28 @@ def _maybe_null_out(
                 # GH12941, use None to auto cast null
                 result[null_mask] = None
     elif result is not NaT:
-        if mask is not None:
-            null_mask = mask.size - mask.sum()
-        else:
-            null_mask = np.prod(shape)
-        if null_mask < min_count:
+        if _below_min_count(shape, mask, min_count):
             result = np.nan
 
     return result
 
 
+def _below_min_count(shape, mask, min_count):
+    """
+    Check for the `min_count` keyword. Returns True if below `min_count` (when
+    missing value should be returned from the reduction).
+    """
+    if min_count > 0:
+        if mask is None:
+            # no missing values, only check size
+            non_nulls = np.prod(shape)
+        else:
+            non_nulls = mask.size - mask.sum()
+        if non_nulls < min_count:
+            return True
+    return False
+
+
 def _zero_out_fperr(arg):
     # #18044 reference this behavior to fix rolling skew/kurt issue
     if isinstance(arg, np.ndarray):
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 5e760e3d6da93..90e4bf1dece83 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -8,6 +8,8 @@
 from pandas._libs import lib, missing as libmissing
 from pandas.compat.numpy import _np_version_under1p17
 
+from pandas.core.nanops import _below_min_count
+
 
 def kleene_or(
     left: Union[bool, np.ndarray],
@@ -202,30 +204,14 @@ def sum(
         if mask.any():
             return libmissing.NA
         else:
-            if _below_min_count(values, None, min_count):
+            if _below_min_count(values.shape, None, min_count):
                 return libmissing.NA
             return np.sum(values)
     else:
-        if _below_min_count(values, mask, min_count):
+        if _below_min_count(values.shape, mask, min_count):
             return libmissing.NA
 
         if _np_version_under1p17:
             return np.sum(values[~mask])
         else:
             return np.sum(values, where=~mask)
-
-
-def _below_min_count(values, mask, min_count):
-    """
-    Check for the `min_count` keyword. Returns True if below `min_count` (when
-    pd.NA should be returned from the reduction).
-    """
-    if min_count > 0:
-        if mask is None:
-            # no missing values, only check size
-            non_nulls = values.size
-        else:
-            non_nulls = mask.size - mask.sum()
-        if non_nulls < min_count:
-            return True
-    return False

From 4df858fdeab2c624d059a2a22d4d6dadaf7c131a Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Mon, 23 Mar 2020 10:07:59 +0100
Subject: [PATCH 12/18] type + add docstring for min_count

---
 pandas/core/nanops.py | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index ea4333c1e8b3e..c0afb6285715a 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1238,7 +1238,7 @@ def _maybe_null_out(
     result: np.ndarray,
     axis: Optional[int],
     mask: Optional[np.ndarray],
-    shape: Tuple,
+    shape: Tuple[int],
     min_count: int = 1,
 ) -> float:
     """
@@ -1266,10 +1266,23 @@ def _maybe_null_out(
     return result
 
 
-def _below_min_count(shape, mask, min_count):
+def _below_min_count(shape: Tuple[int], mask: Optional[np.ndarray], min_count: int):
     """
     Check for the `min_count` keyword. Returns True if below `min_count` (when
     missing value should be returned from the reduction).
+
+    Parameters
+    ----------
+    shape : tuple
+        The shape of the values (`values.shape`).
+    mask : ndarray or None
+        Boolean numpy array (typically of same shape as `shape`) or None.
+    min_count : int
+        Keyword passed through from sum/prod call.
+
+    Returns
+    -------
+    bool
     """
     if min_count > 0:
         if mask is None:

From 76c5149799dfa00cf5258237f69529010dfee492 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 25 Mar 2020 14:03:26 +0100
Subject: [PATCH 13/18] move sum algo from ops to array_algos

---
 pandas/core/array_algos/masked_reductions.py | 47 ++++++++++++++++++++
 pandas/core/arrays/boolean.py                |  4 +-
 pandas/core/arrays/integer.py                |  5 ++-
 pandas/core/ops/mask_ops.py                  | 39 ----------------
 4 files changed, 52 insertions(+), 43 deletions(-)
 create mode 100644 pandas/core/array_algos/masked_reductions.py

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
new file mode 100644
index 0000000000000..d9fa4cf3fb3f5
--- /dev/null
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -0,0 +1,47 @@
+"""
+masked_reductions.py is for reduction algorithms using a mask-based approach
+for missing values.
+"""
+
+import numpy as np
+
+from pandas._libs import missing as libmissing
+from pandas.compat.numpy import _np_version_under1p17
+
+from pandas.core.nanops import _below_min_count
+
+
+def sum(
+    values: np.ndarray, mask: np.ndarray, skipna: bool, min_count: int = 0,
+):
+    """
+    Sum for 1D masked array.
+
+    Parameters
+    ----------
+    values : np.ndarray
+        Numpy array with the values (can be of any dtype that support the
+        operation).
+    mask : np.ndarray
+        Boolean numpy array (True values indicate missing values).
+    skipna : bool, default True
+        Whether to skip NA.
+    min_count : int, default 0
+        The required number of valid values to perform the operation. If fewer than
+        ``min_count`` non-NA values are present the result will be NA.
+    """
+    if not skipna:
+        if mask.any():
+            return libmissing.NA
+        else:
+            if _below_min_count(values.shape, None, min_count):
+                return libmissing.NA
+            return np.sum(values)
+    else:
+        if _below_min_count(values.shape, mask, min_count):
+            return libmissing.NA
+
+        if _np_version_under1p17:
+            return np.sum(values[~mask])
+        else:
+            return np.sum(values, where=~mask)
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index dbb6cb212cb97..442d4ca8cef6d 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -27,8 +27,8 @@
 from pandas.core.dtypes.missing import isna, notna
 
 from pandas.core import nanops, ops
+from pandas.core.array_algos import masked_reductions
 from pandas.core.indexers import check_array_indexer
-from pandas.core.ops import mask_ops
 
 from .masked import BaseMaskedArray
 
@@ -697,7 +697,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         mask = self._mask
 
         if name == "sum":
-            return mask_ops.sum(data, mask, skipna=skipna, **kwargs)
+            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         if self._hasna:
diff --git a/pandas/core/arrays/integer.py b/pandas/core/arrays/integer.py
index 67c06bd5e9b08..4f3c68aa03b16 100644
--- a/pandas/core/arrays/integer.py
+++ b/pandas/core/arrays/integer.py
@@ -27,9 +27,10 @@
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import nanops, ops
+from pandas.core.array_algos import masked_reductions
 import pandas.core.common as com
 from pandas.core.indexers import check_array_indexer
-from pandas.core.ops import invalid_comparison, mask_ops
+from pandas.core.ops import invalid_comparison
 from pandas.core.ops.common import unpack_zerodim_and_defer
 from pandas.core.tools.numeric import to_numeric
 
@@ -561,7 +562,7 @@ def _reduce(self, name: str, skipna: bool = True, **kwargs):
         mask = self._mask
 
         if name == "sum":
-            return mask_ops.sum(data, mask, skipna=skipna, **kwargs)
+            return masked_reductions.sum(data, mask, skipna=skipna, **kwargs)
 
         # coerce to a nan-aware float if needed
         # (we explicitly use NaN within reductions)
diff --git a/pandas/core/ops/mask_ops.py b/pandas/core/ops/mask_ops.py
index 90e4bf1dece83..8fb81faf313d7 100644
--- a/pandas/core/ops/mask_ops.py
+++ b/pandas/core/ops/mask_ops.py
@@ -6,9 +6,6 @@
 import numpy as np
 
 from pandas._libs import lib, missing as libmissing
-from pandas.compat.numpy import _np_version_under1p17
-
-from pandas.core.nanops import _below_min_count
 
 
 def kleene_or(
@@ -179,39 +176,3 @@ def kleene_and(
 def raise_for_nan(value, method):
     if lib.is_float(value) and np.isnan(value):
         raise ValueError(f"Cannot perform logical '{method}' with floating NaN")
-
-
-def sum(
-    values: np.ndarray, mask: np.ndarray, skipna: bool, min_count: int = 0,
-):
-    """
-    Sum for 1D masked array.
-
-    Parameters
-    ----------
-    values : np.ndarray
-        Numpy array with the values (can be of any dtype that support the
-        operation).
-    mask : np.ndarray
-        Boolean numpy array (True values indicate missing values).
-    skipna : bool, default True
-        Whether to skip NA.
-    min_count : int, default 0
-        The required number of valid values to perform the operation. If fewer than
-        ``min_count`` non-NA values are present the result will be NA.
-    """
-    if not skipna:
-        if mask.any():
-            return libmissing.NA
-        else:
-            if _below_min_count(values.shape, None, min_count):
-                return libmissing.NA
-            return np.sum(values)
-    else:
-        if _below_min_count(values.shape, mask, min_count):
-            return libmissing.NA
-
-        if _np_version_under1p17:
-            return np.sum(values[~mask])
-        else:
-            return np.sum(values, where=~mask)

From b2162dc9f1e88c5670b7cb0f3065076c470fe097 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 26 Mar 2020 10:22:33 +0100
Subject: [PATCH 14/18] add Int64/boolean to some benchmarks

---
 asv_bench/benchmarks/series_methods.py | 25 ++++++++++++++-----------
 asv_bench/benchmarks/stat_ops.py       | 10 ++++++++--
 2 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/asv_bench/benchmarks/series_methods.py b/asv_bench/benchmarks/series_methods.py
index 57c625ced8a43..d78419c12ce0d 100644
--- a/asv_bench/benchmarks/series_methods.py
+++ b/asv_bench/benchmarks/series_methods.py
@@ -223,27 +223,27 @@ def time_series_datetimeindex_repr(self):
 
 class All:
 
-    params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
-    param_names = ["N", "case"]
+    params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
+    param_names = ["N", "case", "dtype"]
 
-    def setup(self, N, case):
+    def setup(self, N, case, dtype):
         val = case != "fast"
-        self.s = Series([val] * N)
+        self.s = Series([val] * N, dtype=dtype)
 
-    def time_all(self, N, case):
+    def time_all(self, N, case, dtype):
         self.s.all()
 
 
 class Any:
 
-    params = [[10 ** 3, 10 ** 6], ["fast", "slow"]]
-    param_names = ["N", "case"]
+    params = [[10 ** 3, 10 ** 6], ["fast", "slow"], ["bool", "boolean"]]
+    param_names = ["N", "case", "dtype"]
 
-    def setup(self, N, case):
+    def setup(self, N, case, dtype):
         val = case == "fast"
-        self.s = Series([val] * N)
+        self.s = Series([val] * N, dtype=dtype)
 
-    def time_any(self, N, case):
+    def time_any(self, N, case, dtype):
         self.s.any()
 
 
@@ -265,11 +265,14 @@ class NanOps:
             "prod",
         ],
         [10 ** 3, 10 ** 6],
-        ["int8", "int32", "int64", "float64"],
+        ["int8", "int32", "int64", "float64", "Int64", "boolean"],
     ]
     param_names = ["func", "N", "dtype"]
 
     def setup(self, func, N, dtype):
+        if func == "argmax" and dtype in {"Int64", "boolean"}:
+            # Skip argmax for nullable int since this doesn't work yet (GH-24382)
+            raise NotImplementedError
         self.s = Series([1] * N, dtype=dtype)
         self.func = getattr(self.s, func)
 
diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py
index ec67394e55a1e..ebbd3c9eddfdb 100644
--- a/asv_bench/benchmarks/stat_ops.py
+++ b/asv_bench/benchmarks/stat_ops.py
@@ -7,11 +7,17 @@
 
 class FrameOps:
 
-    params = [ops, ["float", "int"], [0, 1]]
+    params = [ops, ["float", "int", "Int64"], [0, 1]]
     param_names = ["op", "dtype", "axis"]
 
     def setup(self, op, dtype, axis):
-        df = pd.DataFrame(np.random.randn(100000, 4)).astype(dtype)
+        if op == "mad" and dtype == "Int64" and axis == 1:
+            # GH-33036
+            raise NotImplementedError
+        values = np.random.randn(100000, 4)
+        if dtype == "Int64":
+            values = values.astype(int)
+        df = pd.DataFrame(values).astype(dtype)
         self.df_func = getattr(df, op)
 
     def time_op(self, op, dtype, axis):

From d4746f5586219b5ecd803c1511a2e0adaf8f0cd2 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 26 Mar 2020 10:24:00 +0100
Subject: [PATCH 15/18] add whatsnew

---
 doc/source/whatsnew/v1.1.0.rst | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
index 8cb80c7c92f8e..4cfd47894a776 100644
--- a/doc/source/whatsnew/v1.1.0.rst
+++ b/doc/source/whatsnew/v1.1.0.rst
@@ -229,6 +229,8 @@ Performance improvements
   sparse values from ``scipy.sparse`` matrices using the
   :meth:`DataFrame.sparse.from_spmatrix` constructor (:issue:`32821`,
   :issue:`32825`,  :issue:`32826`, :issue:`32856`, :issue:`32858`).
+- Performance improvement in :meth:`Series.sum` for nullable (integer and boolean) dtypes (:issue:`30982`).
+
 
 .. ---------------------------------------------------------------------------
 

From d9c2cbfcd527242db4bf67622a14fe524021fedb Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 26 Mar 2020 11:20:31 +0100
Subject: [PATCH 16/18] add skipna default in function signature

---
 pandas/core/array_algos/masked_reductions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index d9fa4cf3fb3f5..44b648b762a86 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -12,7 +12,7 @@
 
 
 def sum(
-    values: np.ndarray, mask: np.ndarray, skipna: bool, min_count: int = 0,
+    values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0,
 ):
     """
     Sum for 1D masked array.

From f8705c269ad5d5432b84bf08c8fe043933b695e4 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 26 Mar 2020 17:16:34 +0100
Subject: [PATCH 17/18] update type hint + deprivatize

---
 pandas/core/array_algos/masked_reductions.py | 6 +++---
 pandas/core/nanops.py                        | 6 ++++--
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/pandas/core/array_algos/masked_reductions.py b/pandas/core/array_algos/masked_reductions.py
index 44b648b762a86..0fb2605b554c2 100644
--- a/pandas/core/array_algos/masked_reductions.py
+++ b/pandas/core/array_algos/masked_reductions.py
@@ -8,7 +8,7 @@
 from pandas._libs import missing as libmissing
 from pandas.compat.numpy import _np_version_under1p17
 
-from pandas.core.nanops import _below_min_count
+from pandas.core.nanops import check_below_min_count
 
 
 def sum(
@@ -34,11 +34,11 @@ def sum(
         if mask.any():
             return libmissing.NA
         else:
-            if _below_min_count(values.shape, None, min_count):
+            if check_below_min_count(values.shape, None, min_count):
                 return libmissing.NA
             return np.sum(values)
     else:
-        if _below_min_count(values.shape, mask, min_count):
+        if check_below_min_count(values.shape, mask, min_count):
             return libmissing.NA
 
         if _np_version_under1p17:
diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index c0afb6285715a..46eec67b0f428 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1260,13 +1260,15 @@ def _maybe_null_out(
                 # GH12941, use None to auto cast null
                 result[null_mask] = None
     elif result is not NaT:
-        if _below_min_count(shape, mask, min_count):
+        if check_below_min_count(shape, mask, min_count):
             result = np.nan
 
     return result
 
 
-def _below_min_count(shape: Tuple[int], mask: Optional[np.ndarray], min_count: int):
+def check_below_min_count(
+    shape: Tuple[int, ...], mask: Optional[np.ndarray], min_count: int
+):
     """
     Check for the `min_count` keyword. Returns True if below `min_count` (when
     missing value should be returned from the reduction).

From 1a43e1058cdc12fc0a05e7a990a16e11f463e45d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 26 Mar 2020 17:17:44 +0100
Subject: [PATCH 18/18] update another type hint

---
 pandas/core/nanops.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py
index 46eec67b0f428..822ab775e7e46 100644
--- a/pandas/core/nanops.py
+++ b/pandas/core/nanops.py
@@ -1238,7 +1238,7 @@ def _maybe_null_out(
     result: np.ndarray,
     axis: Optional[int],
     mask: Optional[np.ndarray],
-    shape: Tuple[int],
+    shape: Tuple[int, ...],
     min_count: int = 1,
 ) -> float:
     """