-
-
Notifications
You must be signed in to change notification settings - Fork 18.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
API: Change default for Index.union sort #25007
Changes from all commits
aac172c
d4bcc55
45c827c
68b72a6
8716f97
f7056d5
e82cbb1
2a2de25
5c3da74
ce6d1db
52a2f2f
bb848f1
b15dc7e
27b5b16
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other): | |
return self._shallow_copy(name=name) | ||
return self | ||
|
||
def union(self, other, sort=True): | ||
def union(self, other, sort=None): | ||
""" | ||
Form the union of two Index objects. | ||
|
||
Parameters | ||
---------- | ||
other : Index or array-like | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
sort : bool or None, default None | ||
Whether to sort the resulting Index. | ||
|
||
* None : Sort the result, except when | ||
|
||
1. `self` and `other` are equal. | ||
2. `self` or `other` has length 0. | ||
3. Some values in `self` or `other` cannot be compared. | ||
A RuntimeWarning is issued in this case. | ||
|
||
* True : sort the result. A TypeError is raised when the | ||
values cannot be compared. | ||
* False : do not sort the result. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
.. versionchanged:: 0.24.1 | ||
|
||
Changed the default `sort` to None, matching the | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is this being changed? this is certainly not a regression at all. This was the default behavior. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be clear: no behaviour is changed. It was indeed the default, it stays the default. It's only the value that encodes the default that is changed (True -> None), so that True can mean something else (=always sort). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok, maybe it should be more clear in the doc-string |
||
behavior of pandas 0.23.4 and earlier. | ||
|
||
Returns | ||
------- | ||
union : Index | ||
|
@@ -2273,10 +2289,16 @@ def union(self, other, sort=True): | |
other = ensure_index(other) | ||
|
||
if len(other) == 0 or self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
result = self._get_reconciled_name_object(other) | ||
if sort: | ||
result = result.sort_values() | ||
return result | ||
|
||
if len(self) == 0: | ||
return other._get_reconciled_name_object(self) | ||
result = other._get_reconciled_name_object(self) | ||
if sort: | ||
result = result.sort_values() | ||
return result | ||
|
||
# TODO: is_dtype_union_equal is a hack around | ||
# 1. buggy set ops with duplicates (GH #13432) | ||
|
@@ -2319,13 +2341,16 @@ def union(self, other, sort=True): | |
else: | ||
result = lvals | ||
|
||
if sort: | ||
if sort is None: | ||
try: | ||
result = sorting.safe_sort(result) | ||
except TypeError as e: | ||
warnings.warn("{}, sort order is undefined for " | ||
"incomparable objects".format(e), | ||
RuntimeWarning, stacklevel=3) | ||
elif sort: | ||
# raise if not sortable. | ||
result = sorting.safe_sort(result) | ||
|
||
# for subclasses | ||
return self._wrap_setop_result(other, result) | ||
|
@@ -2342,8 +2367,12 @@ def intersection(self, other, sort=False): | |
Parameters | ||
---------- | ||
other : Index or array-like | ||
sort : bool, default False | ||
Sort the resulting index if possible | ||
sort : bool or None, default False | ||
Whether to sort the resulting index. | ||
|
||
* False : do not sort the result. | ||
* True : sort the result. A TypeError is raised when the | ||
values cannot be compared. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
|
@@ -2367,7 +2396,10 @@ def intersection(self, other, sort=False): | |
other = ensure_index(other) | ||
|
||
if self.equals(other): | ||
return self._get_reconciled_name_object(other) | ||
result = self._get_reconciled_name_object(other) | ||
if sort: | ||
result = result.sort_values() | ||
return result | ||
|
||
if not is_dtype_equal(self.dtype, other.dtype): | ||
this = self.astype('O') | ||
|
@@ -2415,7 +2447,7 @@ def intersection(self, other, sort=False): | |
|
||
return taken | ||
|
||
def difference(self, other, sort=True): | ||
def difference(self, other, sort=None): | ||
""" | ||
Return a new Index with elements from the index that are not in | ||
`other`. | ||
|
@@ -2425,11 +2457,24 @@ def difference(self, other, sort=True): | |
Parameters | ||
---------- | ||
other : Index or array-like | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
sort : bool or None, default None | ||
Whether to sort the resulting index. By default, the | ||
values are attempted to be sorted, but any TypeError from | ||
incomparable elements is caught by pandas. | ||
|
||
* None : Attempt to sort the result, but catch any TypeErrors | ||
from comparing incomparable elements. | ||
* False : Do not sort the result. | ||
* True : Sort the result, raising a TypeError if any elements | ||
cannot be compared. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
.. versionchanged:: 0.24.1 | ||
|
||
Added the `None` option, which matches the behavior of | ||
pandas 0.23.4 and earlier. | ||
|
||
Returns | ||
------- | ||
difference : Index | ||
|
@@ -2460,27 +2505,42 @@ def difference(self, other, sort=True): | |
label_diff = np.setdiff1d(np.arange(this.size), indexer, | ||
assume_unique=True) | ||
the_diff = this.values.take(label_diff) | ||
if sort: | ||
if sort is None: | ||
try: | ||
the_diff = sorting.safe_sort(the_diff) | ||
except TypeError: | ||
pass | ||
elif sort: | ||
the_diff = sorting.safe_sort(the_diff) | ||
|
||
return this._shallow_copy(the_diff, name=result_name, freq=None) | ||
|
||
def symmetric_difference(self, other, result_name=None, sort=True): | ||
def symmetric_difference(self, other, result_name=None, sort=None): | ||
""" | ||
Compute the symmetric difference of two Index objects. | ||
|
||
Parameters | ||
---------- | ||
other : Index or array-like | ||
result_name : str | ||
sort : bool, default True | ||
Sort the resulting index if possible | ||
sort : bool or None, default None | ||
Whether to sort the resulting index. By default, the | ||
values are attempted to be sorted, but any TypeError from | ||
incomparable elements is caught by pandas. | ||
|
||
* None : Attempt to sort the result, but catch any TypeErrors | ||
from comparing incomparable elements. | ||
* False : Do not sort the result. | ||
* True : Sort the result, raising a TypeError if any elements | ||
cannot be compared. | ||
|
||
.. versionadded:: 0.24.0 | ||
|
||
.. versionchanged:: 0.24.1 | ||
|
||
Added the `None` option, which matches the behavior of | ||
pandas 0.23.4 and earlier. | ||
|
||
Returns | ||
------- | ||
symmetric_difference : Index | ||
|
@@ -2524,11 +2584,13 @@ def symmetric_difference(self, other, result_name=None, sort=True): | |
right_diff = other.values.take(right_indexer) | ||
|
||
the_diff = _concat._concat_compat([left_diff, right_diff]) | ||
if sort: | ||
if sort is None: | ||
try: | ||
the_diff = sorting.safe_sort(the_diff) | ||
except TypeError: | ||
pass | ||
elif sort: | ||
the_diff = sorting.safe_sort(the_diff) | ||
|
||
attribs = self._get_attributes_dict() | ||
attribs['name'] = result_name | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am -1 on this change. We do NOT do this elsewhere, e.g.
.reindex
, so this is extra useless sorting. (basically cases 1 and 2 above). I am not sure of the utility of 3 at all. We cannot guarantee sorting, showing a warning is fine ; this has been this way since pandas inception. I don't see any utility in changing this.