Skip to content

Commit

Permalink
DEPR: passing categories or ordered kwargs to Series.astype is deprec…
Browse files Browse the repository at this point in the history
  • Loading branch information
jreback authored and Krzysztof Chomski committed Oct 16, 2017
1 parent 93e2405 commit bdb1a3d
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 57 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,7 @@ Deprecations
- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation <timeseries.custom-freq-ranges>` for more details (:issue:`17596`)
- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype <whatsnew_0210.enhancements.categorical_dtype>` (:issue:`17636`)

.. _whatsnew_0210.deprecations.argmin_min:

Expand Down
21 changes: 15 additions & 6 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
import copy
from warnings import catch_warnings
import itertools
Expand Down Expand Up @@ -547,12 +548,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
# may need to convert to categorical
# this is only called for non-categoricals
if self.is_categorical_astype(dtype):
if (('categories' in kwargs or 'ordered' in kwargs) and
isinstance(dtype, CategoricalDtype)):
raise TypeError("Cannot specify a CategoricalDtype and also "
"`categories` or `ordered`. Use "
"`dtype=CategoricalDtype(categories, ordered)`"
" instead.")

# deprecated 17636
if ('categories' in kwargs or 'ordered' in kwargs):
if isinstance(dtype, CategoricalDtype):
raise TypeError(
"Cannot specify a CategoricalDtype and also "
"`categories` or `ordered`. Use "
"`dtype=CategoricalDtype(categories, ordered)`"
" instead.")
warnings.warn("specifying 'categories' or 'ordered' in "
".astype() is deprecated; pass a "
"CategoricalDtype instead",
FutureWarning, stacklevel=7)

kwargs = kwargs.copy()
categories = getattr(dtype, 'categories', None)
ordered = getattr(dtype, 'ordered', False)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pandas as pd
from pandas.compat import lrange
from pandas.api.types import CategoricalDtype
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
date_range, NaT, IntervalIndex)

Expand Down Expand Up @@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self):

df = (DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca'))
.astype('category', categories=list('cab'))})
.astype(CategoricalDtype(list('cab')))})
.set_index('B'))

result = df.sort_index()
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class):
with pytest.raises(KeyError):
s.astype(dt5)

def test_astype_categories_deprecation(self):

# deprecated 17636
s = Series(['a', 'b', 'a'])
expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s.astype('category', categories=['a', 'b'], ordered=True)
tm.assert_series_equal(result, expected)

def test_astype_categoricaldtype(self):
s = Series(['a', 'b', 'a'])
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
Expand Down
50 changes: 15 additions & 35 deletions pandas/tests/series/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from numpy import nan
import numpy as np

from pandas import (Series, date_range, NaT)
from pandas import Series, date_range, NaT
from pandas.api.types import CategoricalDtype

from pandas.compat import product
from pandas.util.testing import assert_series_equal
Expand Down Expand Up @@ -123,50 +124,34 @@ def test_rank_categorical(self):
exp_desc = Series([6., 5., 4., 3., 2., 1.])
ordered = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
).astype(
'category',
categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=True
)
).astype(CategoricalDtype(categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=True))
assert_series_equal(ordered.rank(), exp)
assert_series_equal(ordered.rank(ascending=False), exp_desc)

# Unordered categoricals should be ranked as objects
unordered = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
).astype(
'category',
categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=False
)
unordered = Series(['first', 'second', 'third', 'fourth',
'fifth', 'sixth']).astype(
CategoricalDtype(categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=False))
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
res = unordered.rank()
assert_series_equal(res, exp_unordered)

unordered1 = Series(
[1, 2, 3, 4, 5, 6],
).astype(
'category',
categories=[1, 2, 3, 4, 5, 6],
ordered=False
)
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
res1 = unordered1.rank()
assert_series_equal(res1, exp_unordered1)

# Test na_option for rank data
na_ser = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
).astype(
'category',
categories=[
'first', 'second', 'third', 'fourth',
'fifth', 'sixth', 'seventh'
],
ordered=True
)
).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
'fifth', 'sixth', 'seventh'], True))

exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
Expand Down Expand Up @@ -195,13 +180,8 @@ def test_rank_categorical(self):
)

# Test with pct=True
na_ser = Series(
['first', 'second', 'third', 'fourth', np.NaN],
).astype(
'category',
categories=['first', 'second', 'third', 'fourth'],
ordered=True
)
na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])
Expand Down
30 changes: 15 additions & 15 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,17 @@ def test_getitem_category_type(self):

# get slice
result = s.iloc[0:2]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

# get list of indexes
result = s.iloc[[0, 1]]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

# get boolean array
result = s.iloc[[True, False, False]]
expected = pd.Series([1]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

def test_setitem(self):
Expand Down Expand Up @@ -2076,12 +2076,12 @@ def test_creation_astype(self):
l = ["a", "b", "c", "a"]
s = pd.Series(l)
exp = pd.Series(Categorical(l, ordered=True))
res = s.astype('category', ordered=True)
res = s.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)

exp = pd.Series(Categorical(
l, categories=list('abcdef'), ordered=True))
res = s.astype('category', categories=list('abcdef'), ordered=True)
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
tm.assert_series_equal(res, exp)

def test_construction_series(self):
Expand Down Expand Up @@ -4262,11 +4262,11 @@ def test_concat_preserve(self):
b = Series(list('aabbca'))

df2 = DataFrame({'A': a,
'B': b.astype('category', categories=list('cab'))})
'B': b.astype(CategoricalDtype(list('cab')))})
res = pd.concat([df2, df2])
exp = DataFrame({'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(
'category', categories=list('cab'))})
exp = DataFrame(
{'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
tm.assert_frame_equal(res, exp)

def test_categorical_index_preserver(self):
Expand All @@ -4275,13 +4275,13 @@ def test_categorical_index_preserver(self):
b = Series(list('aabbca'))

df2 = DataFrame({'A': a,
'B': b.astype('category', categories=list('cab'))
'B': b.astype(CategoricalDtype(list('cab')))
}).set_index('B')
result = pd.concat([df2, df2])
expected = DataFrame({'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(
'category', categories=list('cab'))
}).set_index('B')
expected = DataFrame(
{'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
}).set_index('B')
tm.assert_frame_equal(result, expected)

# wrong catgories
Expand Down Expand Up @@ -4324,7 +4324,7 @@ def test_merge(self):
cright = right.copy()
cright['d'] = cright['d'].astype('category')
result = pd.merge(left, cright, how='left', left_on='b', right_on='c')
expected['d'] = expected['d'].astype('category', categories=['null'])
expected['d'] = expected['d'].astype(CategoricalDtype(['null']))
tm.assert_frame_equal(result, expected)

# cat-object
Expand Down

0 comments on commit bdb1a3d

Please sign in to comment.