From c87ce472373922c15981be47210d97f3049aaf47 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 6 Feb 2020 13:01:44 -0600 Subject: [PATCH 01/24] Add reduction tests --- pandas/tests/arrays/string_/test_string.py | 29 ++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5e2f14af341ab..5700850e79c85 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -215,7 +215,6 @@ def test_from_sequence_no_mutate(copy): @pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce(skipna): arr = pd.Series(["a", "b", "c"], dtype="string") result = arr.sum(skipna=skipna) @@ -223,7 +222,6 @@ def test_reduce(skipna): @pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce_missing(skipna): arr = pd.Series([None, "a", None, "b", "c", None], dtype="string") result = arr.sum(skipna=skipna) @@ -269,3 +267,30 @@ def test_value_counts_na(): result = arr.value_counts(dropna=True) expected = pd.Series([2, 1], index=["a", "b"], dtype="Int64") tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("func", ["min", "max", "sum"]) +def test_reduction(func): + strings = ["x", "y", "z"] + arr = pd.array(strings, dtype="string") + ser = pd.Series(strings) + + result = getattr(arr, func)() + expected = getattr(ser, func)() + + assert result == expected + + +@pytest.mark.parametrize("func", ["min", "max"]) +@pytest.mark.parametrize("skipna", [True, False]) +def test_reduction_with_na(func, skipna): + arr = pd.Series([pd.NA, "y", "z"], dtype="string") + ser = pd.Series(["y", "z"]) + + result = getattr(arr, func)(skipna=skipna) + + if skipna: + expected = getattr(ser, func)() + assert result == expected + else: + assert result is pd.NA From 5365fa1f946e6faa5fddde03f8c661642254b8ce Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 6 Feb 2020 13:45:21 -0600 Subject: [PATCH 02/24] Implement a few reductions --- pandas/core/arrays/string_.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index fcccd8cc14d6b..efa80367cf907 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -274,7 +274,16 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy) def _reduce(self, name, skipna=True, **kwargs): - raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + if name in ["min", "max", "sum"]: + na_mask = isna(self) + if not na_mask.any(): + return getattr(self, name)(skipna=False, **kwargs) + elif skipna: + return getattr(self[~na_mask], name)(skipna=False, **kwargs) + else: + return libmissing.NA + else: + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") def value_counts(self, dropna=False): from pandas import value_counts From 460030f9a51c16626653fb577493d1bcb8411fd0 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 6 Feb 2020 13:47:20 -0600 Subject: [PATCH 03/24] Fix test --- pandas/tests/arrays/string_/test_string.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 5700850e79c85..edf43aa4ee2e6 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -269,7 +269,7 @@ def test_value_counts_na(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("func", ["min", "max", "sum"]) +@pytest.mark.parametrize("func", ["min", "max"]) def test_reduction(func): strings = ["x", "y", "z"] arr = pd.array(strings, dtype="string") From 6497e165c5905c04e30f9497b437db68b575551a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 6 Feb 2020 13:51:09 -0600 Subject: [PATCH 04/24] Add whatsnew --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 07a837829c384..ff17177b95eaa 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -25,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed issue where taking the minimum, maximum, or sum of a ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) - .. --------------------------------------------------------------------------- From c052febdf0fdb9b285ec5a6157b2f7309f9530bd Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 6 Feb 2020 13:59:34 -0600 Subject: [PATCH 05/24] skipna --- pandas/tests/arrays/string_/test_string.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index edf43aa4ee2e6..32e072db9b4e5 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -270,13 +270,14 @@ def test_value_counts_na(): @pytest.mark.parametrize("func", ["min", "max"]) +@pytest.mark.parametrize("skipna", [True, False]) def test_reduction(func): strings = ["x", "y", "z"] - arr = pd.array(strings, dtype="string") + arr = pd.Series(strings, dtype="string") ser = pd.Series(strings) - result = getattr(arr, func)() - expected = getattr(ser, func)() + result = getattr(arr, func)(skipna=skipna) + expected = getattr(ser, func)(skipna=skipna) assert result == expected From 4312b4ffb2a040cdade0a61dd70ccb010fe6be30 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 14:56:38 -0600 Subject: [PATCH 06/24] Hardcode expected output --- pandas/tests/arrays/string_/test_string.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 32e072db9b4e5..2137c29b4629f 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -271,13 +271,10 @@ def test_value_counts_na(): @pytest.mark.parametrize("func", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) -def test_reduction(func): - strings = ["x", "y", "z"] - arr = pd.Series(strings, dtype="string") - ser = pd.Series(strings) - - result = getattr(arr, func)(skipna=skipna) - expected = getattr(ser, func)(skipna=skipna) +def test_reduction(func, skipna): + s = pd.Series(["x", "y", "z"], dtype="string") + result = getattr(s, func)(skipna=skipna) + expected = "x" if func == "min" else "z" assert result == expected @@ -285,13 +282,11 @@ def test_reduction(func): @pytest.mark.parametrize("func", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) def test_reduction_with_na(func, skipna): - arr = pd.Series([pd.NA, "y", "z"], dtype="string") - ser = pd.Series(["y", "z"]) - - result = getattr(arr, func)(skipna=skipna) + s = pd.Series([pd.NA, "y", "z"], dtype="string") + result = getattr(s, func)(skipna=skipna) if skipna: - expected = getattr(ser, func)() + expected = "y" if func == "min" else "z" assert result == expected else: assert result is pd.NA From 214995fe0ffeff8153916bbd36c826fd15894dae Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Tue, 11 Feb 2020 19:10:37 -0600 Subject: [PATCH 07/24] Skip tests --- pandas/tests/extension/base/reduce.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 6f433d659575a..2b32b82d1b632 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -25,6 +25,13 @@ class BaseNoReduceTests(BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + if isinstance(data, pd.arrays.StringArray) and all_numeric_reductions in [ + "min", + "max", + "sum", + ]: + pytest.skip("These reductions are implemented") + op_name = all_numeric_reductions s = pd.Series(data) From 4082ae3969de8e9c09e606b965fa2b172a573b5a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 15:39:24 -0600 Subject: [PATCH 08/24] No else --- pandas/core/arrays/string_.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index efa80367cf907..f0074a395b9d3 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -282,8 +282,8 @@ def _reduce(self, name, skipna=True, **kwargs): return getattr(self[~na_mask], name)(skipna=False, **kwargs) else: return libmissing.NA - else: - raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + + raise TypeError(f"Cannot perform reduction '{name}' with string dtype") def value_counts(self, dropna=False): from pandas import value_counts From 12a232344a70239e16e4bc3a92f24dcfd06c9df4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:19:59 -0600 Subject: [PATCH 09/24] Modify nanminmax to better handle object dtype --- pandas/core/nanops.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 6115c4af41b25..0b3bc05ea371d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -854,6 +854,8 @@ def reduction( mask: Optional[np.ndarray] = None, ) -> Dtype: + na_mask = isna(values) + values, mask, dtype, dtype_max, fill_value = _get_values( values, skipna, fill_value_typ=fill_value_typ, mask=mask ) @@ -864,6 +866,12 @@ def reduction( result.fill(np.nan) except (AttributeError, TypeError, ValueError): result = np.nan + elif is_object_dtype(dtype) and na_mask.any(): + # Need to explicitly mask NA values for object dtypes + if skipna: + result = getattr(values[~na_mask], meth)(axis) + else: + return np.nan else: result = getattr(values, meth)(axis) From 4a61b81f16f64a0b8d3a1f8a1de4c79263b23e62 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:21:30 -0600 Subject: [PATCH 10/24] Delegate to min and max methods --- pandas/core/arrays/string_.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f0074a395b9d3..53c8b50941de4 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -274,17 +274,22 @@ def astype(self, dtype, copy=True): return super().astype(dtype, copy) def _reduce(self, name, skipna=True, **kwargs): - if name in ["min", "max", "sum"]: - na_mask = isna(self) - if not na_mask.any(): - return getattr(self, name)(skipna=False, **kwargs) - elif skipna: - return getattr(self[~na_mask], name)(skipna=False, **kwargs) - else: - return libmissing.NA + if name in ["min", "max"]: + return getattr(self, name)( + skipna=skipna, **kwargs + ) raise TypeError(f"Cannot perform reduction '{name}' with string dtype") + def min(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_min((), dict(out=out, keepdims=keepdims)) + return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + + def max(self, axis=None, out=None, keepdims=False, skipna=True): + nv.validate_max((), dict(out=out, keepdims=keepdims)) + return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + + def value_counts(self, dropna=False): from pandas import value_counts From c6465e5869b6109f2fdc57dc808bff30f6d3884f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:29:56 -0600 Subject: [PATCH 11/24] Fixup nanops --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 0b3bc05ea371d..a72f335aa1ee1 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -871,7 +871,7 @@ def reduction( if skipna: result = getattr(values[~na_mask], meth)(axis) else: - return np.nan + result = np.nan else: result = getattr(values, meth)(axis) From 5c1a5fbdf11372000fc7e7c8fd9d7c7956c4211a Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:30:37 -0600 Subject: [PATCH 12/24] Import and clean up --- pandas/core/arrays/string_.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 53c8b50941de4..c30ac82daf934 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -4,6 +4,7 @@ import numpy as np from pandas._libs import lib, missing as libmissing +from pandas.compat.numpy import function as nv from pandas.core.dtypes.base import ExtensionDtype from pandas.core.dtypes.common import pandas_dtype @@ -12,7 +13,7 @@ from pandas.core.dtypes.inference import is_array_like from pandas import compat -from pandas.core import ops +from pandas.core import nanops, ops from pandas.core.arrays import PandasArray from pandas.core.construction import extract_array from pandas.core.indexers import check_array_indexer @@ -275,20 +276,19 @@ def astype(self, dtype, copy=True): def _reduce(self, name, skipna=True, **kwargs): if name in ["min", "max"]: - return getattr(self, name)( - skipna=skipna, **kwargs - ) + return getattr(self, name)(skipna=skipna, **kwargs) raise TypeError(f"Cannot perform reduction '{name}' with string dtype") def min(self, axis=None, out=None, keepdims=False, skipna=True): nv.validate_min((), dict(out=out, keepdims=keepdims)) - return nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + return libmissing.NA if isna(result) else result def max(self, axis=None, out=None, keepdims=False, skipna=True): nv.validate_max((), dict(out=out, keepdims=keepdims)) - return nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) - + result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + return libmissing.NA if isna(result) else result def value_counts(self, dropna=False): from pandas import value_counts From ca9965077705999e51b6846851a30f813e781147 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:35:02 -0600 Subject: [PATCH 13/24] Update tests --- pandas/tests/arrays/string_/test_string.py | 2 ++ pandas/tests/extension/base/reduce.py | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 2137c29b4629f..a582f152a3287 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -215,6 +215,7 @@ def test_from_sequence_no_mutate(copy): @pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce(skipna): arr = pd.Series(["a", "b", "c"], dtype="string") result = arr.sum(skipna=skipna) @@ -222,6 +223,7 @@ def test_reduce(skipna): @pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.xfail(reason="Not implemented StringArray.sum") def test_reduce_missing(skipna): arr = pd.Series([None, "a", None, "b", "c", None], dtype="string") result = arr.sum(skipna=skipna) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 2b32b82d1b632..a42dcd2ca72f5 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -28,7 +28,6 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): if isinstance(data, pd.arrays.StringArray) and all_numeric_reductions in [ "min", "max", - "sum", ]: pytest.skip("These reductions are implemented") From 18800e101a27c824a822be013ba48f1b97f3221b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Thu, 13 Feb 2020 21:38:59 -0600 Subject: [PATCH 14/24] Update whatsnew --- doc/source/whatsnew/v1.0.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index e06bd292ef2fc..625e11721111f 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -30,7 +30,7 @@ Bug fixes **ExtensionArray** -- Fixed issue where taking the minimum, maximum, or sum of a ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) +- Fixed issue where taking the minimum or maximum of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) - **Categorical** From 3eddb73fc81b9642abf8386797776217f72f9132 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 08:23:11 -0600 Subject: [PATCH 15/24] Limit to values.ndim == 1 --- pandas/core/nanops.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index a72f335aa1ee1..3839c26dc3214 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -866,7 +866,7 @@ def reduction( result.fill(np.nan) except (AttributeError, TypeError, ValueError): result = np.nan - elif is_object_dtype(dtype) and na_mask.any(): + elif is_object_dtype(dtype) and values.ndim == 1 and na_mask.any(): # Need to explicitly mask NA values for object dtypes if skipna: result = getattr(values[~na_mask], meth)(axis) From 0c1a2a354fd2dc12f08dae17a905f9bf960b8557 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 08:29:55 -0600 Subject: [PATCH 16/24] Sort index in test --- pandas/tests/frame/test_apply.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/frame/test_apply.py b/pandas/tests/frame/test_apply.py index fe6abef97acc4..b982ac7d7f2ed 100644 --- a/pandas/tests/frame/test_apply.py +++ b/pandas/tests/frame/test_apply.py @@ -1406,8 +1406,8 @@ def test_apply_datetime_tz_issue(self): @pytest.mark.parametrize("method", ["min", "max", "sum"]) def test_consistency_of_aggregates_of_columns_with_missing_values(self, df, method): # GH 16832 - none_in_first_column_result = getattr(df[["A", "B"]], method)() - none_in_second_column_result = getattr(df[["B", "A"]], method)() + none_in_first_column_result = getattr(df[["A", "B"]], method)().sort_index() + none_in_second_column_result = getattr(df[["B", "A"]], method)().sort_index() tm.assert_series_equal( none_in_first_column_result, none_in_second_column_result From 5714c06464167095e3743d911a9faf43218fe4c5 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 08:58:58 -0600 Subject: [PATCH 17/24] Box in Series --- pandas/tests/arrays/string_/test_string.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index a582f152a3287..cff5278a23b0a 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -283,9 +283,14 @@ def test_reduction(func, skipna): @pytest.mark.parametrize("func", ["min", "max"]) @pytest.mark.parametrize("skipna", [True, False]) -def test_reduction_with_na(func, skipna): - s = pd.Series([pd.NA, "y", "z"], dtype="string") - result = getattr(s, func)(skipna=skipna) +@pytest.mark.parametrize("box_in_series", [True, False]) +def test_reduction_with_na(func, skipna, box_in_series): + data = pd.array([pd.NA, "y", "z"], dtype="string") + + if box_in_series: + data = pd.Series(data) + + result = getattr(data, func)(skipna=skipna) if skipna: expected = "y" if func == "min" else "z" From 5e01c3f62a6075ef2a775d1af5da4c3f7c53af6b Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 10:52:29 -0600 Subject: [PATCH 18/24] Add Series min / max test --- pandas/tests/reductions/test_reductions.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 0b312fe2f8990..f1ef3d7f3d681 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -168,6 +168,19 @@ def test_numpy_reduction_with_tz_aware_dtype(self, tz_aware_fixture, func): result = getattr(np, func)(expected, expected) tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("func", ["min", "max"]) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_object_with_na(self, func, skipna): + # https://github.com/pandas-dev/pandas/issues/18588 + s = pd.Series(np.array(["a", "b", np.nan], dtype=object)) + result = getattr(s, func)(skipna=skipna) + + if skipna: + expected = "a" if func == "min" else "b" + assert result == expected + else: + assert result is np.nan + class TestIndexReductions: # Note: the name TestIndexReductions indicates these tests From ed01138bef57e03d0bda3aea5d83a85bec41dd7f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 14:48:19 -0600 Subject: [PATCH 19/24] Move tests --- pandas/tests/extension/base/reduce.py | 6 ------ pandas/tests/extension/test_string.py | 14 +++++++++++++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index a42dcd2ca72f5..6f433d659575a 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -25,12 +25,6 @@ class BaseNoReduceTests(BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): - if isinstance(data, pd.arrays.StringArray) and all_numeric_reductions in [ - "min", - "max", - ]: - pytest.skip("These reductions are implemented") - op_name = all_numeric_reductions s = pd.Series(data) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 86aed671f1b88..2e34c73ea6721 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -77,7 +77,19 @@ class TestMissing(base.BaseMissingTests): class TestNoReduce(base.BaseNoReduceTests): - pass + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): + if isinstance(data, pd.arrays.StringArray) and all_numeric_reductions in [ + "min", + "max", + ]: + pytest.skip("These reductions are implemented") + + op_name = all_numeric_reductions + s = pd.Series(data) + + with pytest.raises(TypeError): + getattr(s, op_name)(skipna=skipna) class TestMethods(base.BaseMethodsTests): From 1128950d482050b6c44366fd8e6bf412ae6d6a2f Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 21:02:57 -0600 Subject: [PATCH 20/24] Pass in mask and patch _maybe_get_mask --- pandas/core/arrays/string_.py | 4 ++-- pandas/core/nanops.py | 14 +++++++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index c30ac82daf934..7df4b6d297cb9 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -282,12 +282,12 @@ def _reduce(self, name, skipna=True, **kwargs): def min(self, axis=None, out=None, keepdims=False, skipna=True): nv.validate_min((), dict(out=out, keepdims=keepdims)) - result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmin(self._ndarray, axis=axis, skipna=skipna, mask=isna(self)) return libmissing.NA if isna(result) else result def max(self, axis=None, out=None, keepdims=False, skipna=True): nv.validate_max((), dict(out=out, keepdims=keepdims)) - result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna) + result = nanops.nanmax(self._ndarray, axis=axis, skipna=skipna, mask=isna(self)) return libmissing.NA if isna(result) else result def value_counts(self, dropna=False): diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index 3839c26dc3214..e2f2f0d652ae0 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -228,7 +228,9 @@ def _maybe_get_mask( # Boolean data cannot contain nulls, so signal via mask being None return None - if skipna: + if skipna or is_object_dtype(values.dtype): + # The masking in nanminmax does not work for object dtype, so always + # compute a mask for use later when skipna is False mask = isna(values) return mask @@ -854,8 +856,6 @@ def reduction( mask: Optional[np.ndarray] = None, ) -> Dtype: - na_mask = isna(values) - values, mask, dtype, dtype_max, fill_value = _get_values( values, skipna, fill_value_typ=fill_value_typ, mask=mask ) @@ -866,10 +866,14 @@ def reduction( result.fill(np.nan) except (AttributeError, TypeError, ValueError): result = np.nan - elif is_object_dtype(dtype) and values.ndim == 1 and na_mask.any(): + elif ( + is_object_dtype(dtype) + and values.ndim == 1 + and mask.any() + ): # Need to explicitly mask NA values for object dtypes if skipna: - result = getattr(values[~na_mask], meth)(axis) + result = getattr(values[~mask], meth)(axis) else: result = np.nan else: From 7c0f05dbbfe578f327816b2692372807c830e526 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 21:19:23 -0600 Subject: [PATCH 21/24] Update whatsnew --- doc/source/whatsnew/v1.0.2.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 625e11721111f..81f33051cb1e9 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -30,7 +30,7 @@ Bug fixes **ExtensionArray** -- Fixed issue where taking the minimum or maximum of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) +- Fixed issue where taking :meth:`min` or :meth:`max` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) - **Categorical** @@ -42,7 +42,9 @@ Bug fixes - Using ``pd.NA`` with :meth:`DataFrame.to_json` now correctly outputs a null value instead of an empty object (:issue:`31615`) - Fixed bug in parquet roundtrip with nullable unsigned integer dtypes (:issue:`31896`). +**Reduction** +- Fixed bug where :meth:`Series.min` and :meth:`Series.max` would raise a ``TypeError`` in the presence of ``NaN`` for object dtype. (:issue:`18588`) **Experimental dtypes** From 7db205236dd3b79b4a790cdae42998d78aeaf7d4 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Fri, 14 Feb 2020 21:51:09 -0600 Subject: [PATCH 22/24] Try to make mypy happy --- pandas/core/nanops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/nanops.py b/pandas/core/nanops.py index e2f2f0d652ae0..2bf2c69db8a0d 100644 --- a/pandas/core/nanops.py +++ b/pandas/core/nanops.py @@ -869,6 +869,7 @@ def reduction( elif ( is_object_dtype(dtype) and values.ndim == 1 + and mask is not None and mask.any() ): # Need to explicitly mask NA values for object dtypes From e19bbe1f5888439e70d0a7a0aeb96224efdb627f Mon Sep 17 00:00:00 2001 From: Daniel Saxton <2658661+dsaxton@users.noreply.github.com> Date: Sat, 15 Feb 2020 07:46:08 -0600 Subject: [PATCH 23/24] Update doc/source/whatsnew/v1.0.2.rst Co-Authored-By: Joris Van den Bossche --- doc/source/whatsnew/v1.0.2.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.2.rst b/doc/source/whatsnew/v1.0.2.rst index 81f33051cb1e9..ddb5da60f69af 100644 --- a/doc/source/whatsnew/v1.0.2.rst +++ b/doc/source/whatsnew/v1.0.2.rst @@ -30,7 +30,7 @@ Bug fixes **ExtensionArray** -- Fixed issue where taking :meth:`min` or :meth:`max` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) +- Fixed issue where taking ``min`` or ``max`` of a ``StringArray`` or ``Series`` with ``StringDtype`` type would raise. (:issue:`31746`) - **Categorical** @@ -58,4 +58,4 @@ Bug fixes Contributors ~~~~~~~~~~~~ -.. contributors:: v1.0.1..v1.0.2|HEAD \ No newline at end of file +.. contributors:: v1.0.1..v1.0.2|HEAD From df99b4f30ef2c0c96ab3d100ac5a946393404555 Mon Sep 17 00:00:00 2001 From: Daniel Saxton Date: Sat, 15 Feb 2020 07:54:39 -0600 Subject: [PATCH 24/24] Add comment --- pandas/tests/extension/test_string.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 2e34c73ea6721..5a6522efd21ea 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -83,6 +83,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): "min", "max", ]: + # Tested in pandas/tests/arrays/string_/test_string.py pytest.skip("These reductions are implemented") op_name = all_numeric_reductions