Skip to content

Commit

Permalink
Backport PR #25182: BUG: Fix Series.is_unique with single occurrence …
Browse files Browse the repository at this point in the history
…of NaN (#25223)
  • Loading branch information
meeseeksmachine authored and jreback committed Feb 8, 2019
1 parent c460a92 commit 7f44027
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 44 deletions.
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Bug Fixes

**Other**

-
- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`)
-
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ def is_unique(self):
-------
is_unique : boolean
"""
return self.nunique() == len(self)
return self.nunique(dropna=False) == len(self)

@property
def is_monotonic(self):
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -905,3 +905,24 @@ def test_astype_category(self, copy, name, ordered):
result = index.astype('category', copy=copy)
expected = CategoricalIndex(index.values, name=name)
tm.assert_index_equal(result, expected)

def test_is_unique(self):
# initialize a unique index
index = self.create_index().drop_duplicates()
assert index.is_unique is True

# empty index should be unique
index_empty = index[:0]
assert index_empty.is_unique is True

# test basic dupes
index_dup = index.insert(0, index[0])
assert index_dup.is_unique is False

# single NA should be unique
index_na = index.insert(0, np.nan)
assert index_na.is_unique is True

# multiple NA should not be unique
index_na_dup = index_na.insert(0, np.nan)
assert index_na_dup.is_unique is False
19 changes: 4 additions & 15 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,10 @@ def test_take(self, closed):
[0, 0, 1], [1, 1, 2], closed=closed)
tm.assert_index_equal(result, expected)

def test_unique(self, closed):
# unique non-overlapping
idx = IntervalIndex.from_tuples(
[(0, 1), (2, 3), (4, 5)], closed=closed)
assert idx.is_unique is True

def test_is_unique_interval(self, closed):
"""
Interval specific tests for is_unique in addition to base class tests
"""
# unique overlapping - distinct endpoints
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
assert idx.is_unique is True
Expand All @@ -261,15 +259,6 @@ def test_unique(self, closed):
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
assert idx.is_unique is True

# duplicate
idx = IntervalIndex.from_tuples(
[(0, 1), (0, 1), (2, 3)], closed=closed)
assert idx.is_unique is False

# empty
idx = IntervalIndex([], closed=closed)
assert idx.is_unique is True

def test_monotonic(self, closed):
# increasing non-overlapping
idx = IntervalIndex.from_tuples(
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ def test_has_duplicates(idx, idx_dup):
assert mi.is_unique is False
assert mi.has_duplicates is True

# single instance of NaN
mi_nan = MultiIndex(levels=[['a', 'b'], [0, 1]],
codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]])
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False

# multiple instances of NaN
mi_nan_dup = MultiIndex(levels=[['a', 'b'], [0, 1]],
codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]])
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True


def test_has_duplicates_from_tuples():
# GH 9075
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,18 +441,6 @@ def test_is_monotonic_decreasing(self):
assert idx_dec1.is_monotonic_decreasing is True
assert idx.is_monotonic_decreasing is False

def test_is_unique(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')

idx0 = pd.PeriodIndex([p0, p1, p2])
assert idx0.is_unique is True

idx1 = pd.PeriodIndex([p1, p1, p2])
assert idx1.is_unique is False

def test_contains(self):
# GH 17717
p0 = pd.Period('2017-09-01')
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,15 +611,6 @@ def test_is_monotonic(self, data, non_lexsorted_data):
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False

@pytest.mark.parametrize('values, expected', [
([1, 2, 3], True),
([1, 3, 1], False),
(list('abc'), True),
(list('aba'), False)])
def test_is_unique(self, values, expected):
ci = CategoricalIndex(values)
assert ci.is_unique is expected

def test_has_duplicates(self):

idx = CategoricalIndex([0, 0, 0], name='foo')
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/series/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,18 @@ def test_unique_data_ownership():
Series(Series(["a", "c", "b"]).unique()).sort_values()


def test_is_unique():
# GH11946
s = Series(np.random.randint(0, 10, size=1000))
assert s.is_unique is False
s = Series(np.arange(1000))
assert s.is_unique is True
@pytest.mark.parametrize('data, expected', [
(np.random.randint(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(['foo', 'bar', np.nan], True),
(['foo', 'foo', np.nan], False),
(['foo', 'bar', np.nan, np.nan], False)])
def test_is_unique(data, expected):
# GH11946 / GH25180
s = Series(data)
assert s.is_unique is expected


def test_is_unique_class_ne(capsys):
Expand Down

0 comments on commit 7f44027

Please sign in to comment.