Skip to content

Commit

Permalink
API/DEPR: Change default skipna behaviour + deprecate numeric_only in…
Browse files Browse the repository at this point in the history
… Categorical.min and max (#27929)
  • Loading branch information
makbigc authored and jorisvandenbossche committed Dec 2, 2019
1 parent 7e791e4 commit 37526c1
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 57 deletions.
22 changes: 22 additions & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,26 @@ The following methods now also correctly output values for unobserved categories
df.groupby(["cat_1", "cat_2"], observed=False)["value"].count()
By default :meth:`Categorical.min` now returns the minimum instead of np.nan
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

When :class:`Categorical` contains ``np.nan``,
:meth:`Categorical.min` no longer return ``np.nan`` by default (skipna=True) (:issue:`25303`)

*pandas 0.25.x*

.. code-block:: ipython
In [1]: pd.Categorical([1, 2, np.nan], ordered=True).min()
Out[1]: nan
*pandas 1.0.0*

.. ipython:: python
pd.Categorical([1, 2, np.nan], ordered=True).min()
.. _whatsnew_1000.api_breaking.deps:

Increased minimum versions for dependencies
Expand Down Expand Up @@ -410,6 +430,8 @@ Deprecations
- :func:`is_extension_type` is deprecated, :func:`is_extension_array_dtype` should be used instead (:issue:`29457`)
- :func:`eval` keyword argument "truediv" is deprecated and will be removed in a future version (:issue:`29812`)
- :meth:`Categorical.take_nd` is deprecated, use :meth:`Categorical.take` instead (:issue:`27745`)
- The parameter ``numeric_only`` of :meth:`Categorical.min` and :meth:`Categorical.max` is deprecated and replaced with ``skipna`` (:issue:`25303`)
-

.. _whatsnew_1000.prior_deprecations:

Expand Down
38 changes: 20 additions & 18 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2123,7 +2123,8 @@ def _reduce(self, name, axis=0, **kwargs):
raise TypeError(f"Categorical cannot perform the operation {name}")
return func(**kwargs)

def min(self, numeric_only=None, **kwargs):
@deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna")
def min(self, skipna=True):
"""
The minimum value of the object.
Expand All @@ -2139,17 +2140,18 @@ def min(self, numeric_only=None, **kwargs):
min : the minimum of this `Categorical`
"""
self.check_for_ordered("min")
if numeric_only:
good = self._codes != -1
pointer = self._codes[good].min(**kwargs)
else:
pointer = self._codes.min(**kwargs)
if pointer == -1:
return np.nan
good = self._codes != -1
if not good.all():
if skipna:
pointer = self._codes[good].min()
else:
return np.nan
else:
return self.categories[pointer]
pointer = self._codes.min()
return self.categories[pointer]

def max(self, numeric_only=None, **kwargs):
@deprecate_kwarg(old_arg_name="numeric_only", new_arg_name="skipna")
def max(self, skipna=True):
"""
The maximum value of the object.
Expand All @@ -2165,15 +2167,15 @@ def max(self, numeric_only=None, **kwargs):
max : the maximum of this `Categorical`
"""
self.check_for_ordered("max")
if numeric_only:
good = self._codes != -1
pointer = self._codes[good].max(**kwargs)
else:
pointer = self._codes.max(**kwargs)
if pointer == -1:
return np.nan
good = self._codes != -1
if not good.all():
if skipna:
pointer = self._codes[good].max()
else:
return np.nan
else:
return self.categories[pointer]
pointer = self._codes.max()
return self.categories[pointer]

def mode(self, dropna=True):
"""
Expand Down
4 changes: 1 addition & 3 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -3820,9 +3820,7 @@ def _reduce(
self._get_axis_number(axis)

if isinstance(delegate, Categorical):
# TODO deprecate numeric_only argument for Categorical and use
# skipna as well, see GH25303
return delegate._reduce(name, numeric_only=numeric_only, **kwds)
return delegate._reduce(name, skipna=skipna, **kwds)
elif isinstance(delegate, ExtensionArray):
# dispatch to ExtensionArray interface
return delegate._reduce(name, skipna=skipna, **kwds)
Expand Down
46 changes: 29 additions & 17 deletions pandas/tests/arrays/categorical/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,31 +35,43 @@ def test_min_max(self):
assert _min == "d"
assert _max == "a"

@pytest.mark.parametrize("skipna", [True, False])
def test_min_max_with_nan(self, skipna):
# GH 25303
cat = Categorical(
[np.nan, "b", "c", np.nan], categories=["d", "c", "b", "a"], ordered=True
)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _max == "b"
_min = cat.min(skipna=skipna)
_max = cat.max(skipna=skipna)

_min = cat.min(numeric_only=True)
assert _min == "c"
_max = cat.max(numeric_only=True)
assert _max == "b"
if skipna is False:
assert np.isnan(_min)
assert np.isnan(_max)
else:
assert _min == "c"
assert _max == "b"

cat = Categorical(
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _max == 1

_min = cat.min(numeric_only=True)
assert _min == 2
_max = cat.max(numeric_only=True)
assert _max == 1
_min = cat.min(skipna=skipna)
_max = cat.max(skipna=skipna)

if skipna is False:
assert np.isnan(_min)
assert np.isnan(_max)
else:
assert _min == 2
assert _max == 1

@pytest.mark.parametrize("method", ["min", "max"])
def test_deprecate_numeric_only_min_max(self, method):
# GH 25303
cat = Categorical(
[np.nan, 1, 2, np.nan], categories=[5, 4, 3, 2, 1], ordered=True
)
with tm.assert_produces_warning(expected_warning=FutureWarning):
getattr(cat, method)(numeric_only=True)

@pytest.mark.parametrize(
"values,categories,exp_mode",
Expand Down
32 changes: 13 additions & 19 deletions pandas/tests/reductions/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1043,7 +1043,7 @@ def test_min_max(self):
)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _min == "c"
assert _max == "b"

cat = Series(
Expand All @@ -1053,30 +1053,24 @@ def test_min_max(self):
)
_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _min == 2
assert _max == 1

def test_min_max_numeric_only(self):
# TODO deprecate numeric_only argument for Categorical and use
# skipna as well, see GH25303
@pytest.mark.parametrize("skipna", [True, False])
def test_min_max_skipna(self, skipna):
# GH 25303
cat = Series(
Categorical(["a", "b", np.nan, "a"], categories=["b", "a"], ordered=True)
)
_min = cat.min(skipna=skipna)
_max = cat.max(skipna=skipna)

_min = cat.min()
_max = cat.max()
assert np.isnan(_min)
assert _max == "a"

_min = cat.min(numeric_only=True)
_max = cat.max(numeric_only=True)
assert _min == "b"
assert _max == "a"

_min = cat.min(numeric_only=False)
_max = cat.max(numeric_only=False)
assert np.isnan(_min)
assert _max == "a"
if skipna is True:
assert _min == "b"
assert _max == "a"
else:
assert np.isnan(_min)
assert np.isnan(_max)


class TestSeriesMode:
Expand Down

0 comments on commit 37526c1

Please sign in to comment.