Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: passing categories or ordered kwargs to Series.astype is deprecated #17742

Merged
merged 1 commit into from
Oct 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.21.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -572,6 +572,7 @@ Deprecations
- :func:`DataFrame.as_blocks` is deprecated, as this is exposing the internal implementation (:issue:`17302`)
- ``pd.TimeGrouper`` is deprecated in favor of :class:`pandas.Grouper` (:issue:`16747`)
- ``cdate_range`` has been deprecated in favor of :func:`bdate_range`, which has gained ``weekmask`` and ``holidays`` parameters for building custom frequency date ranges. See the :ref:`documentation <timeseries.custom-freq-ranges>` for more details (:issue:`17596`)
- passing ``categories`` or ``ordered`` kwargs to :func:`Series.astype` is deprecated, in favor of passing a :ref:`CategoricalDtype <whatsnew_0210.enhancements.categorical_dtype>` (:issue:`17636`)

.. _whatsnew_0210.deprecations.argmin_min:

Expand Down
21 changes: 15 additions & 6 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import warnings
import copy
from warnings import catch_warnings
import itertools
Expand Down Expand Up @@ -548,12 +549,20 @@ def _astype(self, dtype, copy=False, errors='raise', values=None,
# may need to convert to categorical
# this is only called for non-categoricals
if self.is_categorical_astype(dtype):
if (('categories' in kwargs or 'ordered' in kwargs) and
isinstance(dtype, CategoricalDtype)):
raise TypeError("Cannot specify a CategoricalDtype and also "
"`categories` or `ordered`. Use "
"`dtype=CategoricalDtype(categories, ordered)`"
" instead.")

# deprecated 17636
if ('categories' in kwargs or 'ordered' in kwargs):
if isinstance(dtype, CategoricalDtype):
raise TypeError(
"Cannot specify a CategoricalDtype and also "
"`categories` or `ordered`. Use "
"`dtype=CategoricalDtype(categories, ordered)`"
" instead.")
warnings.warn("specifying 'categories' or 'ordered' in "
".astype() is deprecated; pass a "
"CategoricalDtype instead",
FutureWarning, stacklevel=7)

kwargs = kwargs.copy()
categories = getattr(dtype, 'categories', None)
ordered = getattr(dtype, 'ordered', False)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_sorting.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import pandas as pd
from pandas.compat import lrange
from pandas.api.types import CategoricalDtype
from pandas import (DataFrame, Series, MultiIndex, Timestamp,
date_range, NaT, IntervalIndex)

Expand Down Expand Up @@ -513,7 +514,7 @@ def test_sort_index_categorical_index(self):

df = (DataFrame({'A': np.arange(6, dtype='int64'),
'B': Series(list('aabbca'))
.astype('category', categories=list('cab'))})
.astype(CategoricalDtype(list('cab')))})
.set_index('B'))

result = df.sort_index()
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/series/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,16 @@ def test_astype_dict_like(self, dtype_class):
with pytest.raises(KeyError):
s.astype(dt5)

def test_astype_categories_deprecation(self):

# deprecated 17636
s = Series(['a', 'b', 'a'])
expected = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
with tm.assert_produces_warning(FutureWarning,
check_stacklevel=False):
result = s.astype('category', categories=['a', 'b'], ordered=True)
tm.assert_series_equal(result, expected)

def test_astype_categoricaldtype(self):
s = Series(['a', 'b', 'a'])
result = s.astype(CategoricalDtype(['a', 'b'], ordered=True))
Expand Down
50 changes: 15 additions & 35 deletions pandas/tests/series/test_rank.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
from numpy import nan
import numpy as np

from pandas import (Series, date_range, NaT)
from pandas import Series, date_range, NaT
from pandas.api.types import CategoricalDtype

from pandas.compat import product
from pandas.util.testing import assert_series_equal
Expand Down Expand Up @@ -123,50 +124,34 @@ def test_rank_categorical(self):
exp_desc = Series([6., 5., 4., 3., 2., 1.])
ordered = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth']
).astype(
'category',
categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=True
)
).astype(CategoricalDtype(categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=True))
assert_series_equal(ordered.rank(), exp)
assert_series_equal(ordered.rank(ascending=False), exp_desc)

# Unordered categoricals should be ranked as objects
unordered = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth'],
).astype(
'category',
categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=False
)
unordered = Series(['first', 'second', 'third', 'fourth',
'fifth', 'sixth']).astype(
CategoricalDtype(categories=['first', 'second', 'third',
'fourth', 'fifth', 'sixth'],
ordered=False))
exp_unordered = Series([2., 4., 6., 3., 1., 5.])
res = unordered.rank()
assert_series_equal(res, exp_unordered)

unordered1 = Series(
[1, 2, 3, 4, 5, 6],
).astype(
'category',
categories=[1, 2, 3, 4, 5, 6],
ordered=False
)
).astype(CategoricalDtype([1, 2, 3, 4, 5, 6], False))
exp_unordered1 = Series([1., 2., 3., 4., 5., 6.])
res1 = unordered1.rank()
assert_series_equal(res1, exp_unordered1)

# Test na_option for rank data
na_ser = Series(
['first', 'second', 'third', 'fourth', 'fifth', 'sixth', np.NaN]
).astype(
'category',
categories=[
'first', 'second', 'third', 'fourth',
'fifth', 'sixth', 'seventh'
],
ordered=True
)
).astype(CategoricalDtype(['first', 'second', 'third', 'fourth',
'fifth', 'sixth', 'seventh'], True))

exp_top = Series([2., 3., 4., 5., 6., 7., 1.])
exp_bot = Series([1., 2., 3., 4., 5., 6., 7.])
Expand Down Expand Up @@ -195,13 +180,8 @@ def test_rank_categorical(self):
)

# Test with pct=True
na_ser = Series(
['first', 'second', 'third', 'fourth', np.NaN],
).astype(
'category',
categories=['first', 'second', 'third', 'fourth'],
ordered=True
)
na_ser = Series(['first', 'second', 'third', 'fourth', np.NaN]).astype(
CategoricalDtype(['first', 'second', 'third', 'fourth'], True))
exp_top = Series([0.4, 0.6, 0.8, 1., 0.2])
exp_bot = Series([0.2, 0.4, 0.6, 0.8, 1.])
exp_keep = Series([0.25, 0.5, 0.75, 1., np.NaN])
Expand Down
30 changes: 15 additions & 15 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,17 +84,17 @@ def test_getitem_category_type(self):

# get slice
result = s.iloc[0:2]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

# get list of indexes
result = s.iloc[[0, 1]]
expected = pd.Series([1, 2]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1, 2]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

# get boolean array
result = s.iloc[[True, False, False]]
expected = pd.Series([1]).astype('category', categories=[1, 2, 3])
expected = pd.Series([1]).astype(CategoricalDtype([1, 2, 3]))
tm.assert_series_equal(result, expected)

def test_setitem(self):
Expand Down Expand Up @@ -2042,12 +2042,12 @@ def test_creation_astype(self):
l = ["a", "b", "c", "a"]
s = pd.Series(l)
exp = pd.Series(Categorical(l, ordered=True))
res = s.astype('category', ordered=True)
res = s.astype(CategoricalDtype(None, ordered=True))
tm.assert_series_equal(res, exp)

exp = pd.Series(Categorical(
l, categories=list('abcdef'), ordered=True))
res = s.astype('category', categories=list('abcdef'), ordered=True)
res = s.astype(CategoricalDtype(list('abcdef'), ordered=True))
tm.assert_series_equal(res, exp)

def test_construction_series(self):
Expand Down Expand Up @@ -4228,11 +4228,11 @@ def test_concat_preserve(self):
b = Series(list('aabbca'))

df2 = DataFrame({'A': a,
'B': b.astype('category', categories=list('cab'))})
'B': b.astype(CategoricalDtype(list('cab')))})
res = pd.concat([df2, df2])
exp = DataFrame({'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(
'category', categories=list('cab'))})
exp = DataFrame(
{'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))})
tm.assert_frame_equal(res, exp)

def test_categorical_index_preserver(self):
Expand All @@ -4241,13 +4241,13 @@ def test_categorical_index_preserver(self):
b = Series(list('aabbca'))

df2 = DataFrame({'A': a,
'B': b.astype('category', categories=list('cab'))
'B': b.astype(CategoricalDtype(list('cab')))
}).set_index('B')
result = pd.concat([df2, df2])
expected = DataFrame({'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(
'category', categories=list('cab'))
}).set_index('B')
expected = DataFrame(
{'A': pd.concat([a, a]),
'B': pd.concat([b, b]).astype(CategoricalDtype(list('cab')))
}).set_index('B')
tm.assert_frame_equal(result, expected)

# wrong catgories
Expand Down Expand Up @@ -4290,7 +4290,7 @@ def test_merge(self):
cright = right.copy()
cright['d'] = cright['d'].astype('category')
result = pd.merge(left, cright, how='left', left_on='b', right_on='c')
expected['d'] = expected['d'].astype('category', categories=['null'])
expected['d'] = expected['d'].astype(CategoricalDtype(['null']))
tm.assert_frame_equal(result, expected)

# cat-object
Expand Down