-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
return empty MultiIndex for symmetrical difference on equal MultiIndexes #16486
Changes from 3 commits
4903b20
743aa47
0200ca0
efb5f86
7eeb626
0dee788
104d8e6
f49bcfc
85b4b55
f2d24a3
4cd39a9
6c67a4a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2320,6 +2320,11 @@ def symmetric_difference(self, other, result_name=None): | |
except TypeError: | ||
pass | ||
|
||
# On equal MultiIndexes the difference is empty. Therefore an empty | ||
# MultiIndex is returned GH13490 | ||
if self.nlevels > 1 and len(the_diff) == 0: | ||
return type(self)([[] for _ in range(self.nlevels)], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That wouldn't work, self._shallow_copy fails if the_diff is empty. That's why I am returning an empty MI instead. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry I was confusing it with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @jreback what do you think about a special case in @Appender(_index_shared_docs['_shallow_copy'])
def _shallow_copy(self, values=None, **kwargs):
if values is not None:
if 'name' in kwargs:
kwargs['names'] = kwargs.pop('name', None)
# discards freq
kwargs.pop('freq', None)
# this if block is new
if len(values) == 0:
return MultiIndex(levels=[[] for _ in range(self.nlevels)],
labels=[[] for _ in range(self.nlabels)])
return MultiIndex.from_tuples(values, **kwargs)
return self.view() this would "work" but I don't know if "array of length 0" means same structure, but empty. Maybe it's ok. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this change should be done in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This leaves around some levels unfortunately: In [16]: idx = pd.MultiIndex.from_product([['a', 'b'], ['A', 'B']])
In [17]: idx[0:0]
Out[17]:
MultiIndex(levels=[['a', 'b'], ['A', 'B']],
labels=[[], []])
In [18]: idx.difference(idx) | idx.difference(idx)
Out[18]:
MultiIndex(levels=[[], []],
labels=[[], []]) I believe we want There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, then I guess you can special case |
||
[[] for _ in range(self.nlevels)]) | ||
attribs = self._get_attributes_dict() | ||
attribs['name'] = result_name | ||
if 'freq' in attribs: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -188,7 +188,6 @@ def test_constructor_ndarray_like(self): | |
# it should be possible to convert any object that satisfies the numpy | ||
# ndarray interface directly into an Index | ||
class ArrayLike(object): | ||
|
||
def __init__(self, array): | ||
self.array = array | ||
|
||
|
@@ -246,7 +245,6 @@ def test_index_ctor_infer_nan_nat(self): | |
[np.timedelta64('nat'), np.nan], | ||
[pd.NaT, np.timedelta64('nat')], | ||
[np.timedelta64('nat'), pd.NaT]]: | ||
|
||
tm.assert_index_equal(Index(data), exp) | ||
tm.assert_index_equal(Index(np.array(data, dtype=object)), exp) | ||
|
||
|
@@ -936,6 +934,14 @@ def test_symmetric_difference(self): | |
assert tm.equalContents(result, expected) | ||
assert result.name == 'new_name' | ||
|
||
def test_symmetric_difference_on_equal_multiindex(self): | ||
# GH13490 | ||
idx1 = MultiIndex.from_tuples(self.tuples) | ||
idx2 = MultiIndex.from_tuples(self.tuples) | ||
result = idx1.symmetric_difference(idx2) | ||
expected = MultiIndex(levels=[[], []], labels=[[], []]) | ||
assert tm.equalContents(result, expected) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would the expected value be None then? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why?
in fact your example is NOT propogating meta-data (that's what _shallow_copy does and why you need to use it). add some |
||
|
||
def test_is_numeric(self): | ||
assert not self.dateIndex.is_numeric() | ||
assert not self.strIndex.is_numeric() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This could use a comment explaining why we need the special case.