Skip to content

Commit

Permalink
Fix categorical from codes nan 21767 (pandas-dev#21775)
Browse files Browse the repository at this point in the history
  • Loading branch information
miker985 authored and dberenbaum committed Aug 3, 2018
1 parent 4da257b commit 958ecd3
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 4 deletions.
5 changes: 2 additions & 3 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,7 @@ Deprecations
- :meth:`MultiIndex.to_hierarchical` is deprecated and will be removed in a future version (:issue:`21613`)
- :meth:`Series.ptp` is deprecated. Use ``numpy.ptp`` instead (:issue:`21614`)
- :meth:`Series.compress` is deprecated. Use ``Series[condition]`` instead (:issue:`18262`)
- :meth:`Categorical.from_codes` has deprecated providing float values for the ``codes`` argument. (:issue:`21767`)

.. _whatsnew_0240.prior_deprecations:

Expand Down Expand Up @@ -526,9 +527,7 @@ Bug Fixes
Categorical
^^^^^^^^^^^

-
-
-
- Bug in :meth:`Categorical.from_codes` where ``NaN`` values in `codes` were silently converted to ``0`` (:issue:`21767`). In the future this will raise a ``ValueError``. Also changes the behavior of `.from_codes([1.1, 2.0])`.

Datetimelike
^^^^^^^^^^^^
Expand Down
17 changes: 16 additions & 1 deletion pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
is_timedelta64_dtype,
is_categorical,
is_categorical_dtype,
is_float_dtype,
is_integer_dtype,
is_list_like, is_sequence,
is_scalar, is_iterator,
is_dict_like)
Expand Down Expand Up @@ -633,8 +635,21 @@ def from_codes(cls, codes, categories, ordered=False):
categorical. If not given, the resulting categorical will be
unordered.
"""
codes = np.asarray(codes) # #21767
if not is_integer_dtype(codes):
msg = "codes need to be array-like integers"
if is_float_dtype(codes):
icodes = codes.astype('i8')
if (icodes == codes).all():
msg = None
codes = icodes
warn(("float codes will be disallowed in the future and "
"raise a ValueError"), FutureWarning, stacklevel=2)
if msg:
raise ValueError(msg)

try:
codes = coerce_indexer_dtype(np.asarray(codes), categories)
codes = coerce_indexer_dtype(codes, categories)
except (ValueError, TypeError):
raise ValueError(
"codes need to be convertible to an arrays of integers")
Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,26 @@ def test_from_codes_with_categorical_categories(self):
with pytest.raises(ValueError):
Categorical.from_codes([0, 1], Categorical(['a', 'b', 'a']))

def test_from_codes_with_nan_code(self):
# GH21767
codes = [1, 2, np.nan]
categories = ['a', 'b', 'c']
with pytest.raises(ValueError):
Categorical.from_codes(codes, categories)

def test_from_codes_with_float(self):
# GH21767
codes = [1.0, 2.0, 0] # integer, but in float dtype
categories = ['a', 'b', 'c']

with tm.assert_produces_warning(FutureWarning):
cat = Categorical.from_codes(codes, categories)
tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype='i1'))

codes = [1.1, 2.0, 0] # non-integer
with pytest.raises(ValueError):
Categorical.from_codes(codes, categories)

@pytest.mark.parametrize('dtype', [None, 'category'])
def test_from_inferred_categories(self, dtype):
cats = ['a', 'b']
Expand Down

0 comments on commit 958ecd3

Please sign in to comment.