From cbaa585414494812b85f86703c67e67e5f13cc78 Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Mon, 2 Oct 2017 09:00:02 -0400 Subject: [PATCH] DEPR: passing categories or ordered kwargs to Series.astype is deprecated closes #17636 --- doc/source/whatsnew/v0.21.0.txt | 1 + pandas/core/internals.py | 21 +++++++++---- pandas/tests/frame/test_sorting.py | 3 +- pandas/tests/series/test_dtypes.py | 10 ++++++ pandas/tests/series/test_rank.py | 50 +++++++++--------------------- pandas/tests/test_categorical.py | 30 +++++++++--------- 6 files changed, 58 insertions(+), 57 deletions(-) diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt index d69a5c22acc03..28d4be7fd1850 100644 --- a/doc/source/whatsnew/v0.21.0.txt +++ b/doc/source/whatsnew/v0.21.0.txt @@ -572,6 +572,7 @@ Deprecations - :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`) - ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`) - ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation ` for more details (:issue:`17596`) +- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype ` (:issue:`17636`) .. _whatsnew_0210.deprecations.argmin_min: diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 9e348819ce5a3..71c752c328402 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1,3 +1,4 @@ +import warnings import copy from warnings import catch_warnings import itertools @@ -548,12 +549,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, # may need to convert to categorical # this is only called for non-categoricals if self.is_categorical_astype(dtype): - if (('categories' in kwargs or 'ordered' in kwargs) and - isinstance(dtype, CategoricalDtype)): - raise TypeError("Cannot specify a CategoricalDtype and also " - "`categories` or `ordered`. Use " - "`dtype=CategoricalDtype(categories, ordered)`" - " instead.") + + # deprecated 17636 + if ('categories' in kwargs or 'ordered' in kwargs): + if isinstance(dtype, CategoricalDtype): + raise TypeError( + "Cannot specify a CategoricalDtype and also " + "`categories` or `ordered`. Use " + "`dtype=CategoricalDtype(categories, ordered)`" + " instead.") + warnings.warn("specifying 'categories' or 'ordered' in " + ".astype() is deprecated; pass a " + "CategoricalDtype instead", + FutureWarning, stacklevel=7) + kwargs = kwargs.copy() categories = getattr(dtype, 'categories', None) ordered = getattr(dtype, 'ordered', False) diff --git a/pandas/tests/frame/test_sorting.py b/pandas/tests/frame/test_sorting.py index e6f823bf6fac2..a98439797dc28 100644 --- a/pandas/tests/frame/test_sorting.py +++ b/pandas/tests/frame/test_sorting.py @@ -8,6 +8,7 @@ import pandas as pd from pandas.compat import lrange +from pandas.api.types import CategoricalDtype from pandas import (DataFrame, Series, MultiIndex, Timestamp, date_range, NaT, IntervalIndex) @@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self): df = (DataFrame({'A': np.arange(6, dtype='int64'), 'B': Series(list('aabbca')) - .astype('category', categories=list('cab'))}) + .astype(CategoricalDtype(list('cab')))}) .set_index('B')) result = df.sort_index() diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index 3099c02e4aabd..b20c1817e5671 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class): with pytest.raises(KeyError): s.astype(dt5) + def test_astype_categories_deprecation(self): + + # deprecated 17636 + s = Series(['a', 'b', 'a']) + expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) + with tm.assert_produces_warning(FutureWarning, + check_stacklevel=False): + result = s.astype('category', categories=['a', 'b'], ordered=True) + tm.assert_series_equal(result, expected) + def test_astype_categoricaldtype(self): s = Series(['a', 'b', 'a']) result = s.astype(CategoricalDtype(['a', 'b'], ordered=True)) diff --git a/pandas/tests/series/test_rank.py b/pandas/tests/series/test_rank.py index 128a4cdd845e6..e45acdedbd2a9 100644 --- a/pandas/tests/series/test_rank.py +++ b/pandas/tests/series/test_rank.py @@ -7,7 +7,8 @@ from numpy import nan import numpy as np -from pandas import (Series, date_range, NaT) +from pandas import Series, date_range, NaT +from pandas.api.types import CategoricalDtype from pandas.compat import product from pandas.util.testing import assert_series_equal @@ -123,35 +124,25 @@ def test_rank_categorical(self): exp_desc = Series([6., 5., 4., 3., 2., 1.]) ordered = Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'] - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=True - ) + ).astype(CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=True)) assert_series_equal(ordered.rank(), exp) assert_series_equal(ordered.rank(ascending=False), exp_desc) # Unordered categoricals should be ranked as objects - unordered = Series( - ['first', 'second', 'third', 'fourth', 'fifth', 'sixth'], - ).astype( - 'category', - categories=['first', 'second', 'third', - 'fourth', 'fifth', 'sixth'], - ordered=False - ) + unordered = Series(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth']).astype( + CategoricalDtype(categories=['first', 'second', 'third', + 'fourth', 'fifth', 'sixth'], + ordered=False)) exp_unordered = Series([2., 4., 6., 3., 1., 5.]) res = unordered.rank() assert_series_equal(res, exp_unordered) unordered1 = Series( [1, 2, 3, 4, 5, 6], - ).astype( - 'category', - categories=[1, 2, 3, 4, 5, 6], - ordered=False - ) + ).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False)) exp_unordered1 = Series([1., 2., 3., 4., 5., 6.]) res1 = unordered1.rank() assert_series_equal(res1, exp_unordered1) @@ -159,14 +150,8 @@ def test_rank_categorical(self): # Test na_option for rank data na_ser = Series( ['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN] - ).astype( - 'category', - categories=[ - 'first', 'second', 'third', 'fourth', - 'fifth', 'sixth', 'seventh' - ], - ordered=True - ) + ).astype(CategoricalDtype(['first', 'second', 'third', 'fourth', + 'fifth', 'sixth', 'seventh'], True)) exp_top = Series([2., 3., 4., 5., 6., 7., 1.]) exp_bot = Series([1., 2., 3., 4., 5., 6., 7.]) @@ -195,13 +180,8 @@ def test_rank_categorical(self): ) # Test with pct=True - na_ser = Series( - ['first', 'second', 'third', 'fourth', np.NaN], - ).astype( - 'category', - categories=['first', 'second', 'third', 'fourth'], - ordered=True - ) + na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype( + CategoricalDtype(['first', 'second', 'third', 'fourth'], True)) exp_top = Series([0.4, 0.6, 0.8, 1., 0.2]) exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.]) exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN]) diff --git a/pandas/tests/test_categorical.py b/pandas/tests/test_categorical.py index d43901ea091b7..df32437a03f04 100644 --- a/pandas/tests/test_categorical.py +++ b/pandas/tests/test_categorical.py @@ -84,17 +84,17 @@ def test_getitem_category_type(self): # get slice result = s.iloc[0:2] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get list of indexes result = s.iloc[[0, 1]] - expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) # get boolean array result = s.iloc[[True, False, False]] - expected = pd.Series([1]).astype('category', categories=[1, 2, 3]) + expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3])) tm.assert_series_equal(result, expected) def test_setitem(self): @@ -2042,12 +2042,12 @@ def test_creation_astype(self): l = ["a", "b", "c", "a"] s = pd.Series(l) exp = pd.Series(Categorical(l, ordered=True)) - res = s.astype('category', ordered=True) + res = s.astype(CategoricalDtype(None, ordered=True)) tm.assert_series_equal(res, exp) exp = pd.Series(Categorical( l, categories=list('abcdef'), ordered=True)) - res = s.astype('category', categories=list('abcdef'), ordered=True) + res = s.astype(CategoricalDtype(list('abcdef'), ordered=True)) tm.assert_series_equal(res, exp) def test_construction_series(self): @@ -4228,11 +4228,11 @@ def test_concat_preserve(self): b = Series(list('aabbca')) df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab'))}) + 'B': b.astype(CategoricalDtype(list('cab')))}) res = pd.concat([df2, df2]) - exp = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab'))}) + exp = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))}) tm.assert_frame_equal(res, exp) def test_categorical_index_preserver(self): @@ -4241,13 +4241,13 @@ def test_categorical_index_preserver(self): b = Series(list('aabbca')) df2 = DataFrame({'A': a, - 'B': b.astype('category', categories=list('cab')) + 'B': b.astype(CategoricalDtype(list('cab'))) }).set_index('B') result = pd.concat([df2, df2]) - expected = DataFrame({'A': pd.concat([a, a]), - 'B': pd.concat([b, b]).astype( - 'category', categories=list('cab')) - }).set_index('B') + expected = DataFrame( + {'A': pd.concat([a, a]), + 'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab'))) + }).set_index('B') tm.assert_frame_equal(result, expected) # wrong catgories @@ -4290,7 +4290,7 @@ def test_merge(self): cright = right.copy() cright['d'] = cright['d'].astype('category') result = pd.merge(left, cright, how='left', left_on='b', right_on='c') - expected['d'] = expected['d'].astype('category', categories=['null']) + expected['d'] = expected['d'].astype(CategoricalDtype(['null'])) tm.assert_frame_equal(result, expected) # cat-object