Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix Series.is_unique with single occurrence of NaN #25182

Merged
merged 3 commits into from
Feb 8, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.24.2.rst
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ Bug Fixes

**Other**

-
- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`)
-
-

Expand Down
2 changes: 1 addition & 1 deletion pandas/core/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1345,7 +1345,7 @@ def is_unique(self):
-------
is_unique : boolean
"""
return self.nunique() == len(self)
return self.nunique(dropna=False) == len(self)

@property
def is_monotonic(self):
Expand Down
21 changes: 21 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -913,3 +913,24 @@ def test_astype_category(self, copy, name, ordered):
result = index.astype('category', copy=copy)
expected = CategoricalIndex(index.values, name=name)
tm.assert_index_equal(result, expected)

def test_is_unique(self):
# initialize a unique index
index = self.create_index().drop_duplicates()
assert index.is_unique is True

# empty index should be unique
index_empty = index[:0]
assert index_empty.is_unique is True

# test basic dupes
index_dup = index.insert(0, index[0])
assert index_dup.is_unique is False

# single NA should be unique
index_na = index.insert(0, np.nan)
assert index_na.is_unique is True

# multiple NA should not be unique
index_na_dup = index_na.insert(0, np.nan)
assert index_na_dup.is_unique is False
19 changes: 4 additions & 15 deletions pandas/tests/indexes/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,12 +242,10 @@ def test_take(self, closed):
[0, 0, 1], [1, 1, 2], closed=closed)
tm.assert_index_equal(result, expected)

def test_unique(self, closed):
# unique non-overlapping
idx = IntervalIndex.from_tuples(
[(0, 1), (2, 3), (4, 5)], closed=closed)
assert idx.is_unique is True

def test_is_unique_interval(self, closed):
"""
Interval specific tests for is_unique in addition to base class tests
"""
# unique overlapping - distinct endpoints
idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
assert idx.is_unique is True
Expand All @@ -261,15 +259,6 @@ def test_unique(self, closed):
idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
assert idx.is_unique is True

# duplicate
idx = IntervalIndex.from_tuples(
[(0, 1), (0, 1), (2, 3)], closed=closed)
assert idx.is_unique is False

# empty
idx = IntervalIndex([], closed=closed)
assert idx.is_unique is True

def test_monotonic(self, closed):
# increasing non-overlapping
idx = IntervalIndex.from_tuples(
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/indexes/multi/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,18 @@ def test_has_duplicates(idx, idx_dup):
assert mi.is_unique is False
assert mi.has_duplicates is True

# single instance of NaN
mi_nan = MultiIndex(levels=[['a', 'b'], [0, 1]],
codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]])
assert mi_nan.is_unique is True
assert mi_nan.has_duplicates is False

# multiple instances of NaN
mi_nan_dup = MultiIndex(levels=[['a', 'b'], [0, 1]],
codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]])
assert mi_nan_dup.is_unique is False
assert mi_nan_dup.has_duplicates is True


def test_has_duplicates_from_tuples():
# GH 9075
Expand Down
12 changes: 0 additions & 12 deletions pandas/tests/indexes/period/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,18 +441,6 @@ def test_is_monotonic_decreasing(self):
assert idx_dec1.is_monotonic_decreasing is True
assert idx.is_monotonic_decreasing is False

def test_is_unique(self):
# GH 17717
p0 = pd.Period('2017-09-01')
p1 = pd.Period('2017-09-02')
p2 = pd.Period('2017-09-03')

idx0 = pd.PeriodIndex([p0, p1, p2])
assert idx0.is_unique is True

idx1 = pd.PeriodIndex([p1, p1, p2])
assert idx1.is_unique is False

def test_contains(self):
# GH 17717
p0 = pd.Period('2017-09-01')
Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/indexes/test_category.py
Original file line number Diff line number Diff line change
Expand Up @@ -611,15 +611,6 @@ def test_is_monotonic(self, data, non_lexsorted_data):
assert c.is_monotonic_increasing is True
assert c.is_monotonic_decreasing is False

@pytest.mark.parametrize('values, expected', [
([1, 2, 3], True),
([1, 3, 1], False),
(list('abc'), True),
(list('aba'), False)])
def test_is_unique(self, values, expected):
ci = CategoricalIndex(values)
assert ci.is_unique is expected

def test_has_duplicates(self):

idx = CategoricalIndex([0, 0, 0], name='foo')
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/series/test_duplicates.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,18 @@ def test_unique_data_ownership():
Series(Series(["a", "c", "b"]).unique()).sort_values()


def test_is_unique():
# GH11946
s = Series(np.random.randint(0, 10, size=1000))
assert s.is_unique is False
s = Series(np.arange(1000))
assert s.is_unique is True
@pytest.mark.parametrize('data, expected', [
jschendel marked this conversation as resolved.
Show resolved Hide resolved
(np.random.randint(0, 10, size=1000), False),
(np.arange(1000), True),
([], True),
([np.nan], True),
(['foo', 'bar', np.nan], True),
(['foo', 'foo', np.nan], False),
(['foo', 'bar', np.nan, np.nan], False)])
def test_is_unique(data, expected):
# GH11946 / GH25180
s = Series(data)
assert s.is_unique is expected


def test_is_unique_class_ne(capsys):
Expand Down