Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: disallow scalar in Categorical constructor #38472

Merged
merged 8 commits into from
Dec 23, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ Other API changes

Deprecations
~~~~~~~~~~~~
- Deprecating allowing scalars passed to the :class:`Categorical` constructor (:issue:`38433`)
- Deprecated allowing subclass-specific keyword arguments in the :class:`Index` constructor, use the specific subclass directly instead (:issue:`14093`,:issue:`21311`,:issue:`22315`,:issue:`26974`)
-
-
Expand Down
10 changes: 10 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,16 @@ def __init__(
self._dtype = self._dtype.update_dtype(dtype)
return

if not is_list_like(values):
# GH#38433
warn(
"Allowing scalars in the Categorical constructor is deprecated "
"and will raise in a future version. Use `[value]` instead",
FutureWarning,
stacklevel=2,
)
values = [values]

# null_mask indicates missing values we want to exclude from inference.
# This means: only missing values in list-likes (not arrays/ndframes).
null_mask = np.array(False)
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/arrays/categorical/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@


class TestCategoricalConstructors:
def test_categorical_scalar_deprecated(self):
# GH#38433
with tm.assert_produces_warning(FutureWarning):
Categorical("A", categories=["A", "B"])

def test_validate_ordered(self):
# see gh-14058
exp_msg = "'ordered' must either be 'True' or 'False'"
Expand Down Expand Up @@ -202,13 +207,13 @@ def test_constructor(self):
assert len(cat.codes) == 1
assert cat.codes[0] == 0

# Scalars should be converted to lists
cat = Categorical(1)
with tm.assert_produces_warning(FutureWarning):
# GH#38433
cat = Categorical(1)
assert len(cat.categories) == 1
assert cat.categories[0] == 1
assert len(cat.codes) == 1
assert cat.codes[0] == 0

# two arrays
# - when the first is an integer dtype and the second is not
# - when the resulting codes are all -1/NaN
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/extension/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def test_cast_category_to_extension_dtype(self, expected):
)
def test_consistent_casting(self, dtype, expected):
# GH 28448
result = Categorical("2015-01-01").astype(dtype)
result = Categorical(["2015-01-01"]).astype(dtype)
assert result == expected


Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def test_replace_mixed_types_with_string(self):
@pytest.mark.parametrize(
"categorical, numeric",
[
(pd.Categorical("A", categories=["A", "B"]), [1]),
(pd.Categorical(["A"], categories=["A", "B"]), [1]),
(pd.Categorical(("A",), categories=["A", "B"]), [1]),
(pd.Categorical(("A", "B"), categories=["A", "B"]), [1, 2]),
],
Expand Down
8 changes: 4 additions & 4 deletions pandas/tests/test_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -908,17 +908,17 @@ def test_categorical_from_codes(self):
# GH 16639
vals = np.array([0, 1, 2, 0])
cats = ["a", "b", "c"]
Sd = Series(Categorical(1).from_codes(vals, cats))
St = Series(Categorical(1).from_codes(np.array([0, 1]), cats))
Sd = Series(Categorical([1]).from_codes(vals, cats))
St = Series(Categorical([1]).from_codes(np.array([0, 1]), cats))
expected = np.array([True, True, False, True])
result = algos.isin(Sd, St)
tm.assert_numpy_array_equal(expected, result)

def test_categorical_isin(self):
vals = np.array([0, 1, 2, 0])
cats = ["a", "b", "c"]
cat = Categorical(1).from_codes(vals, cats)
other = Categorical(1).from_codes(np.array([0, 1]), cats)
cat = Categorical([1]).from_codes(vals, cats)
other = Categorical([1]).from_codes(np.array([0, 1]), cats)

expected = np.array([True, True, False, True])
result = algos.isin(cat, other)
Expand Down