Skip to content

Commit

Permalink
Merge pull request #8937 from JanSchulz/cat_unique2
Browse files Browse the repository at this point in the history
Categorical: let unique only return used categories
  • Loading branch information
jreback committed Nov 30, 2014
2 parents e759d99 + acc549d commit 8290a4d
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 5 deletions.
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v0.15.2.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ API changes

- Bug in concat of Series with ``category`` dtype which were coercing to ``object``. (:issue:`8641`)

- Bug in unique of Series with ``category`` dtype, which returned all categories regardless
whether they were "used" or not (see :issue:`8559` for the discussion).

- ``Series.all`` and ``Series.any`` now support the ``level`` and ``skipna`` parameters. ``Series.all``, ``Series.any``, ``Index.all``, and ``Index.any`` no longer support the ``out`` and ``keepdims`` parameters, which existed for compatibility with ndarray. Various index types no longer support the ``all`` and ``any`` aggregation functions and will now raise ``TypeError``. (:issue:`8302`):

.. ipython:: python
Expand Down
9 changes: 7 additions & 2 deletions pandas/core/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1326,13 +1326,18 @@ def unique(self):
"""
Return the unique values.
This includes all categories, even if one or more is unused.
Unused categories are NOT returned.
Returns
-------
unique values : array
"""
return np.asarray(self.categories)
unique_codes = np.unique(self.codes)
# for compatibility with normal unique, which has nan last
if unique_codes[0] == -1:
unique_codes[0:-1] = unique_codes[1:]
unique_codes[-1] = -1
return take_1d(self.categories.values, unique_codes)

def equals(self, other):
"""
Expand Down
12 changes: 9 additions & 3 deletions pandas/tests/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,11 +769,17 @@ def test_min_max(self):
self.assertEqual(_max, 1)

def test_unique(self):
cat = Categorical(["a","b","c","d"])
exp = np.asarray(["a","b","c","d"])
cat = Categorical(["a","b"])
exp = np.asarray(["a","b"])
res = cat.unique()
self.assert_numpy_array_equal(res, exp)
self.assertEqual(type(res), type(exp))
cat = Categorical(["a","b","a","a"], categories=["a","b","c"])
res = cat.unique()
self.assert_numpy_array_equal(res, exp)
cat = Categorical(["a","b","a", np.nan], categories=["a","b","c"])
res = cat.unique()
exp = np.asarray(["a","b", np.nan], dtype=object)
self.assert_numpy_array_equal(res, exp)

def test_mode(self):
s = Categorical([1,1,2,4,5,5,5], categories=[5,4,3,2,1], ordered=True)
Expand Down

0 comments on commit 8290a4d

Please sign in to comment.