From aac172cefd468f3896ea9019b92fdc00efef6ef4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 11:59:22 -0600 Subject: [PATCH 01/22] [WIP]: API: Change default for Index.union sort Closes https://github.com/pandas-dev/pandas/issues/24959 --- pandas/core/indexes/base.py | 37 +++++++++++++++++++++---- pandas/tests/indexes/test_base.py | 46 +++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 6 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 767da81c5c43a..41430249055f9 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self - def union(self, other, sort=True): + def union(self, other, sort=None): """ Form the union of two Index objects. Parameters ---------- other : Index or array-like - sort : bool, default True - Sort the resulting index if possible + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` or `other` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * True : sort the result. A TypeError is raised when the + values cannot be compared. + * False : do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.0 + + Changed the default `sort` to None, matching the + behavior of pandas 0.23.4 and earlier. + Returns ------- union : Index @@ -2273,10 +2289,16 @@ def union(self, other, sort=True): other = ensure_index(other) if len(other) == 0 or self.equals(other): - return self._get_reconciled_name_object(other) + result = self._get_reconciled_name_object(other) + if sort: + result = result.sort_values() + return result if len(self) == 0: - return other._get_reconciled_name_object(self) + result = other._get_reconciled_name_object(self) + if sort: + result = result.sort_values() + return result # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2319,13 +2341,16 @@ def union(self, other, sort=True): else: result = lvals - if sort: + if sort is None: try: result = sorting.safe_sort(result) except TypeError as e: warnings.warn("{}, sort order is undefined for " "incomparable objects".format(e), RuntimeWarning, stacklevel=3) + elif sort: + # raise if not sortable. + result = sorting.safe_sort(result) # for subclasses return self._wrap_setop_result(other, result) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index f3e9d835c7391..c38e956dafcac 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -794,6 +794,52 @@ def test_union(self, sort): tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) + def test_union_sort_other_equal(self): + a = pd.Index([1, 0, 2]) + # default, sort=None + result = a.union(a) + tm.assert_index_equal(result, a) + + # sort=True + result = a.union(a, sort=True) + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(result, expected) + + # sort=False + result = a.union(a, sort=False) + tm.assert_index_equal(result, a) + + def test_union_sort_other_empty(self): + a = pd.Index([1, 0, 2]) + # default, sort=None + tm.assert_index_equal(a.union(a[:0]), a) + tm.assert_index_equal(a[:0].union(a), a) + + # sort=True + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(a.union(a[:0], sort=True), expected) + tm.assert_index_equal(a[:0].union(a, sort=True), expected) + + # sort=False + tm.assert_index_equal(a.union(a[:0], sort=False), a) + tm.assert_index_equal(a[:0].union(a, sort=False), a) + + def test_union_sort_other_incomparable(self): + a = pd.Index([1, pd.Timestamp('2000')]) + # default, sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = a.union(a[:1]) + + tm.assert_index_equal(result, a) + + # sort=True + with pytest.raises(TypeError, match='.*'): + a.union(a[:1], sort=True) + + # sort=False + result = a.union(a[:1], sort=False) + tm.assert_index_equal(result, a) + @pytest.mark.parametrize("klass", [ np.array, Series, list]) @pytest.mark.parametrize("sort", [True, False]) From d4bcc55a161c56f485b95fdbcfcff88344df47d4 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 13:27:49 -0600 Subject: [PATCH 02/22] update test --- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/test_base.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 41430249055f9..dac32b887a596 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2268,7 +2268,7 @@ def union(self, other, sort=None): .. versionadded:: 0.24.0 - .. versionchanged:: 0.24.0 + .. versionchanged:: 0.24.1 Changed the default `sort` to None, matching the behavior of pandas 0.23.4 and earlier. diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c38e956dafcac..972d0bad45426 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -856,19 +856,20 @@ def test_union_from_iterables(self, klass, sort): tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, True, False]) def test_union_identity(self, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] union = first.union(first, sort=sort) - assert union is first + # i.e. identity is not preserved when sort is True + assert (union is first) is (not sort) union = first.union([], sort=sort) - assert union is first + assert (union is first) is (not sort) union = Index([]).union(first, sort=sort) - assert union is first + assert (union is first) is (not sort) @pytest.mark.parametrize("first_list", [list('ba'), list()]) @pytest.mark.parametrize("second_list", [list('ab'), list()]) From 45c827cdf59fef7f4cdc229be4a880372a6d0f4b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 20:38:52 -0600 Subject: [PATCH 03/22] fixups --- pandas/_libs/lib.pyx | 5 ++- pandas/core/indexes/multi.py | 26 ++++++++++-- pandas/tests/indexes/multi/test_set_ops.py | 39 ++++++++++++++++++ pandas/tests/indexes/test_base.py | 48 +++++++++------------- 4 files changed, 85 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 4745916eb0ce2..f3ff3f7678e1a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -233,11 +233,14 @@ def fast_unique_multiple(list arrays, sort: bool=True): if val not in table: table[val] = stub uniques.append(val) - if sort: + if sort is None: try: uniques.sort() except Exception: + # TODO: RuntimeWarning? pass + elif sort: + uniques.sort() return uniques diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index e4d01a40bd181..bdbb6c812c5c6 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2879,18 +2879,34 @@ def equal_levels(self, other): return False return True - def union(self, other, sort=True): + def union(self, other, sort=None): """ Form the union of two MultiIndex objects Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool, default True - Sort the resulting MultiIndex if possible + sort : bool or None, default None + Whether to sort the resulting Index. + + * None : Sort the result, except when + + 1. `self` and `other` are equal. + 2. `self` has length 0. + 3. Some values in `self` or `other` cannot be compared. + A RuntimeWarning is issued in this case. + + * True : sort the result. A TypeError is raised when the + values cannot be compared. + * False : do not sort the result. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default `sort` to None, matching the + behavior of pandas 0.23.4 and earlier. + Returns ------- Index @@ -2901,8 +2917,12 @@ def union(self, other, sort=True): other, result_names = self._convert_can_do_setop(other) if len(other) == 0 or self.equals(other): + if sort: + return self.sort_values() return self + # TODO: Index.union returns other when `len(self)` is 0. + uniq_tuples = lib.fast_unique_multiple([self._ndarray_values, other._ndarray_values], sort=sort) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 208d6cf1c639f..09321000eb80d 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -249,3 +249,42 @@ def test_intersection(idx, sort): # tuples = _index.values # result = _index & tuples # assert result.equals(tuples) + + +@pytest.mark.parametrize('slice_', [slice(None), slice(0)]) +def test_union_sort_other_empty(slice_): + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + # Two cases: + # 1. idx is other + # 2. other is empty + + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + # MultiIndex does not special case empty.union(idx) + # tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + # sort=True + result = idx.union(other, sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) + + +def test_union_sort_other_incomparable(): + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']]) + + # default, sort=None + # with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1]) + tm.assert_index_equal(result, idx) + + # sort=True + with pytest.raises(TypeError, match='Cannot compare'): + idx.union(idx[:1], sort=True) + + # sort=False + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 972d0bad45426..c2fdab98977ba 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -794,51 +794,41 @@ def test_union(self, sort): tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) - def test_union_sort_other_equal(self): - a = pd.Index([1, 0, 2]) - # default, sort=None - result = a.union(a) - tm.assert_index_equal(result, a) + @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # Two cases: + # 1. idx is other + # 2. other is empty - # sort=True - result = a.union(a, sort=True) - expected = pd.Index([0, 1, 2]) - tm.assert_index_equal(result, expected) + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) # sort=False - result = a.union(a, sort=False) - tm.assert_index_equal(result, a) - - def test_union_sort_other_empty(self): - a = pd.Index([1, 0, 2]) - # default, sort=None - tm.assert_index_equal(a.union(a[:0]), a) - tm.assert_index_equal(a[:0].union(a), a) + tm.assert_index_equal(idx.union(other, sort=False), idx) # sort=True + result = idx.union(other, sort=True) expected = pd.Index([0, 1, 2]) - tm.assert_index_equal(a.union(a[:0], sort=True), expected) - tm.assert_index_equal(a[:0].union(a, sort=True), expected) - - # sort=False - tm.assert_index_equal(a.union(a[:0], sort=False), a) - tm.assert_index_equal(a[:0].union(a, sort=False), a) + tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(self): - a = pd.Index([1, pd.Timestamp('2000')]) + idx = pd.Index([1, pd.Timestamp('2000')]) # default, sort=None with tm.assert_produces_warning(RuntimeWarning): - result = a.union(a[:1]) + result = idx.union(idx[:1]) - tm.assert_index_equal(result, a) + tm.assert_index_equal(result, idx) # sort=True with pytest.raises(TypeError, match='.*'): - a.union(a[:1], sort=True) + idx.union(idx[:1], sort=True) # sort=False - result = a.union(a[:1], sort=False) - tm.assert_index_equal(result, a) + result = idx.union(idx[:1], sort=False) + tm.assert_index_equal(result, idx) @pytest.mark.parametrize("klass", [ np.array, Series, list]) From 8716f977433cfc62e6c279deadb5d285806c135c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 20:44:44 -0600 Subject: [PATCH 04/22] multi --- pandas/tests/indexes/multi/test_set_ops.py | 14 ++++++-------- pandas/tests/indexes/test_base.py | 5 ++--- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 09321000eb80d..c0c11ccab2a26 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -253,10 +253,8 @@ def test_intersection(idx, sort): @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) def test_union_sort_other_empty(slice_): + # https://github.com/pandas-dev/pandas/issues/24959 idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) - # Two cases: - # 1. idx is other - # 2. other is empty # default, sort=None other = idx[slice_] @@ -274,17 +272,17 @@ def test_union_sort_other_empty(slice_): def test_union_sort_other_incomparable(): + # https://github.com/pandas-dev/pandas/issues/24959 idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']]) # default, sort=None - # with tm.assert_produces_warning(RuntimeWarning): result = idx.union(idx[:1]) tm.assert_index_equal(result, idx) - # sort=True - with pytest.raises(TypeError, match='Cannot compare'): - idx.union(idx[:1], sort=True) - # sort=False result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx) + + # sort=True + with pytest.raises(TypeError, match='Cannot compare'): + idx.union(idx[:1], sort=True) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4eaad5b31de7d..d6a9b3b769965 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -801,9 +801,7 @@ def test_union(self, sort): @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) def test_union_sort_other_special(self, slice_): - # Two cases: - # 1. idx is other - # 2. other is empty + # https://github.com/pandas-dev/pandas/issues/24959 idx = pd.Index([1, 0, 2]) # default, sort=None @@ -820,6 +818,7 @@ def test_union_sort_other_special(self, slice_): tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(self): + # https://github.com/pandas-dev/pandas/issues/24959 idx = pd.Index([1, pd.Timestamp('2000')]) # default, sort=None with tm.assert_produces_warning(RuntimeWarning): From f7056d5e2d9f66eab73c2ff595712cb46b66e465 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 20:45:59 -0600 Subject: [PATCH 05/22] doc typo --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 6f832bba5ec67..cab15e727fa6e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2937,7 +2937,7 @@ def intersection(self, other, sort=False): Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool, default True + sort : bool, default False Sort the resulting MultiIndex if possible .. versionadded:: 0.24.0 From e82cbb184373080d641c62ecd9c4aced245d8cf6 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 21:03:56 -0600 Subject: [PATCH 06/22] intersection --- pandas/core/indexes/base.py | 13 ++++++++++--- pandas/core/indexes/multi.py | 2 ++ pandas/tests/indexes/multi/test_set_ops.py | 22 +++++++++++++++++++--- pandas/tests/indexes/test_base.py | 11 ++++++++++- 4 files changed, 41 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e0d5d37087055..b2dc46dab620c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2367,8 +2367,12 @@ def intersection(self, other, sort=False): Parameters ---------- other : Index or array-like - sort : bool, default False - Sort the resulting index if possible + sort : bool or None, default False + Whether to sort the resulting index. + + * False : do not sort the result. + * True : sort the result. A TypeError is raised when the + values cannot be compared. .. versionadded:: 0.24.0 @@ -2392,7 +2396,10 @@ def intersection(self, other, sort=False): other = ensure_index(other) if self.equals(other): - return self._get_reconciled_name_object(other) + result = self._get_reconciled_name_object(other) + if sort: + result = result.sort_values() + return result if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index cab15e727fa6e..2115135552df2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2954,6 +2954,8 @@ def intersection(self, other, sort=False): other, result_names = self._convert_can_do_setop(other) if self.equals(other): + if sort: + return self.sort_values() return self self_tuples = self._ndarray_values diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index c0c11ccab2a26..eee649de61714 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -203,10 +203,16 @@ def test_union(idx, sort): # corner case, pass self or empty thing: the_union = idx.union(idx, sort=sort) - assert the_union is idx + if sort: + tm.assert_index_equal(the_union, idx.sort_values()) + else: + assert the_union is idx the_union = idx.union(idx[:0], sort=sort) - assert the_union is idx + if sort: + tm.assert_index_equal(the_union, idx.sort_values()) + else: + assert the_union is idx # won't work in python 3 # tuples = _index.values @@ -238,7 +244,10 @@ def test_intersection(idx, sort): # corner case, pass self the_int = idx.intersection(idx, sort=sort) - assert the_int is idx + if sort: + tm.assert_index_equal(the_int, idx.sort_values()) + else: + assert the_int is idx # empty intersection: disjoint empty = idx[:2].intersection(idx[2:], sort=sort) @@ -251,6 +260,13 @@ def test_intersection(idx, sort): # assert result.equals(tuples) +def test_intersect_equal_sort(): + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + + @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) def test_union_sort_other_empty(slice_): # https://github.com/pandas-dev/pandas/issues/24959 diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d6a9b3b769965..cff515604179f 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -695,7 +695,10 @@ def test_intersection(self, sort): # Corner cases inter = first.intersection(first, sort=sort) - assert inter is first + if sort: + tm.assert_index_equal(inter, first.sort_values()) + else: + assert inter is first @pytest.mark.parametrize("index2,keeps_name", [ (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name @@ -770,6 +773,12 @@ def test_intersect_nosort(self): expected = pd.Index(['b', 'a']) tm.assert_index_equal(result, expected) + def test_intersect_equal_sort(self): + idx = pd.Index(['c', 'a', 'b']) + sorted_ = pd.Index(['a', 'b', 'c']) + tm.assert_index_equal(idx.intersection(idx, sort=False), idx) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + @pytest.mark.parametrize("sort", [True, False]) def test_chained_union(self, sort): # Chained unions handles names correctly From 2a2de250f02f8fb67960d03cead0faf1127666b2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 29 Jan 2019 21:13:48 -0600 Subject: [PATCH 07/22] whatsnew --- doc/source/whatsnew/v0.24.1.rst | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 828c35c10e958..19d15b533c61b 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -15,10 +15,34 @@ Whats New in 0.24.1 (February XX, 2019) These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog including other versions of pandas. +.. _whatsnew_0241.api: + +API Changes +~~~~~~~~~~~ + +Changing the ``sort`` parameter for :meth:`Index.Union` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None``. +The default *behavior* remains the same: The result is sorted, unless + +1. ``self`` and ``other`` are identical +2. ``self`` or ``other`` is empty +3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). + +This allows ``sort=True`` to now mean "always sort" A ``TypeError`` is raised if the values cannot be compared. + +Changed the behavior of :meth:`Index.intersection` with ``sort=True`` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When ``sort=True`` is provided to :meth:`Index.intersection`, the values are always sorted. In 0.24.0, +the values would not be sorted when ``self`` and ``other`` were identical. Pass ``sort=False`` to not +sort the values. + .. _whatsnew_0241.regressions: Fixed Regressions -^^^^^^^^^^^^^^^^^ +~~~~~~~~~~~~~~~~~ - Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`) - Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`) @@ -27,7 +51,7 @@ Fixed Regressions .. _whatsnew_0241.enhancements: Enhancements -^^^^^^^^^^^^ +~~~~~~~~~~~~ .. _whatsnew_0241.bug_fixes: From 5c3da746dd6232ebe1a41bfc8a7620d48c43bcc7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 30 Jan 2019 07:50:39 -0600 Subject: [PATCH 08/22] update whatsnew --- doc/source/whatsnew/v0.24.1.rst | 53 +++++++++++++++++++++++++++++++-- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 19d15b533c61b..9e09d02d8d2f8 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -20,7 +20,7 @@ including other versions of pandas. API Changes ~~~~~~~~~~~ -Changing the ``sort`` parameter for :meth:`Index.Union` +Changing the ``sort`` parameter for :meth:`Index.union` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None``. @@ -30,14 +30,61 @@ The default *behavior* remains the same: The result is sorted, unless 2. ``self`` or ``other`` is empty 3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). -This allows ``sort=True`` to now mean "always sort" A ``TypeError`` is raised if the values cannot be compared. +This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised if the values cannot be compared. + +**Behavior in 0.24.0** + +.. ipython:: python + + In [1]: idx = pd.Index(['b', 'a']) + + In [2]: idx.union(idx) # sort=True was the default. + Out[2]: Index(['b', 'a'], dtype='object') + + In [3]: idx.union(idx, sort=True) # result is still not sorted. + Out[32]: Index(['b', 'a'], dtype='object') + +**New Behavior** + +.. ipython:: python + + idx = pd.Index(['b', 'a']) + idx.union(idx) # sort=None is the default. Don't sort identical operands. + + idx.union(idx, sort=True) Changed the behavior of :meth:`Index.intersection` with ``sort=True`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When ``sort=True`` is provided to :meth:`Index.intersection`, the values are always sorted. In 0.24.0, the values would not be sorted when ``self`` and ``other`` were identical. Pass ``sort=False`` to not -sort the values. +sort the values. This matches the behavior of pandas 0.23.4 and earlier. + +**Behavior in 0.23.4** + +.. ipython:: python + + In [2]: idx = pd.Index(['b', 'a']) + + In [3]: idx.intersection(idx) # sort was not a keyword. + Out[3]: Index(['b', 'a'], dtype='object') + +**Behavior in 0.24.0** + +.. ipython:: python + + In [5]: idx.intersection(idx) # sort=True by default. Don't sort identical. + Out[5]: Index(['b', 'a'], dtype='object') + + In [6]: idx.intersection(idx, sort=True) + Out[6]: Index(['b', 'a'], dtype='object') + +**New Behavior** + +.. ipython:: python + + idx.intersection(idx) # sort=False by default + idx.intersection(idx, sort=True) .. _whatsnew_0241.regressions: From 52a2f2f567ae6d2e1c38fab937ad00fa09a9d541 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 31 Jan 2019 07:54:15 -0600 Subject: [PATCH 09/22] symdiff --- pandas/core/indexes/base.py | 22 ++++++++++++++++++---- pandas/tests/indexes/test_base.py | 17 +++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index b2dc46dab620c..dc7f1da69bd02 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2500,7 +2500,7 @@ def difference(self, other, sort=True): return this._shallow_copy(the_diff, name=result_name, freq=None) - def symmetric_difference(self, other, result_name=None, sort=True): + def symmetric_difference(self, other, result_name=None, sort=None): """ Compute the symmetric difference of two Index objects. @@ -2508,11 +2508,23 @@ def symmetric_difference(self, other, result_name=None, sort=True): ---------- other : Index or array-like result_name : str - sort : bool, default True - Sort the resulting index if possible + sort : bool or None, default None. + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + * True : Sort the result, raising a TypeError if any elements + cannot be compared. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Added the `None` option. + Returns ------- symmetric_difference : Index @@ -2556,11 +2568,13 @@ def symmetric_difference(self, other, result_name=None, sort=True): right_diff = other.values.take(right_indexer) the_diff = _concat._concat_compat([left_diff, right_diff]) - if sort: + if sort is None: try: the_diff = sorting.safe_sort(the_diff) except TypeError: pass + elif sort: + the_diff = sorting.safe_sort(the_diff) attribs = self._get_attributes_dict() attribs['name'] = result_name diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index cff515604179f..995a52335ec87 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1099,6 +1099,23 @@ def test_symmetric_difference(self, sort): assert tm.equalContents(result, expected) assert result.name is None + def test_symmetric_difference_incomparable(self): + a = pd.Index([3, pd.Timestamp('2000'), 1]) + b = pd.Index([2, pd.Timestamp('1999'), 1]) + + # sort=None, the default + result = a.symmetric_difference(b) + expected = pd.Index([3, pd.Timestamp('2000'), 2, pd.Timestamp('1999')]) + tm.assert_index_equal(result, expected) + + # sort=False + result = a.symmetric_difference(b, sort=False) + tm.assert_index_equal(result, expected) + + # sort=True, raises + with pytest.raises(TypeError, match='Cannot compare'): + a.symmetric_difference(b, sort=True) + @pytest.mark.parametrize("sort", [True, False]) def test_symmetric_difference_mi(self, sort): index1 = MultiIndex.from_tuples(self.tuples) From bb848f19a68ccb3a3a74f366a26dd152d9f4ad12 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 31 Jan 2019 07:56:19 -0600 Subject: [PATCH 10/22] whatsnew --- doc/source/whatsnew/v0.24.1.rst | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 4045417c6f5e9..14482c93fa67e 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -23,7 +23,7 @@ API Changes Changing the ``sort`` parameter for :meth:`Index.union` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None``. +The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). The default *behavior* remains the same: The result is sorted, unless 1. ``self`` and ``other`` are identical @@ -53,6 +53,9 @@ This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised i idx.union(idx, sort=True) +The same change applies to :meth:`Index.symmetric_difference`, which would previously not +sort the result when ``sort=True`` but the values could not be compared. + Changed the behavior of :meth:`Index.intersection` with ``sort=True`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From b15dc7e8de3af51fc3daa190b9daa53aa4841db5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 31 Jan 2019 07:57:27 -0600 Subject: [PATCH 11/22] doc --- pandas/core/indexes/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index dc7f1da69bd02..0fa6b168580b4 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2523,7 +2523,8 @@ def symmetric_difference(self, other, result_name=None, sort=None): .. versionchanged:: 0.24.1 - Added the `None` option. + Added the `None` option, which matches the behavior of + pandas 0.23.4 and earlier. Returns ------- From 27b5b16d95bdb211a489fa32e54f85eed4bbead5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 31 Jan 2019 09:05:42 -0600 Subject: [PATCH 12/22] index multi --- doc/source/whatsnew/v0.24.1.rst | 4 +-- pandas/core/indexes/base.py | 25 ++++++++++++---- pandas/core/indexes/multi.py | 4 ++- pandas/tests/indexes/multi/test_set_ops.py | 35 +++++++++++++++++++++- pandas/tests/indexes/test_base.py | 15 +++++++--- 5 files changed, 70 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 14482c93fa67e..948350df140eb 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -53,8 +53,8 @@ This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised i idx.union(idx, sort=True) -The same change applies to :meth:`Index.symmetric_difference`, which would previously not -sort the result when ``sort=True`` but the values could not be compared. +The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which +would previously not sort the result when ``sort=True`` but the values could not be compared. Changed the behavior of :meth:`Index.intersection` with ``sort=True`` ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 0fa6b168580b4..12880ed93cc2a 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2447,7 +2447,7 @@ def intersection(self, other, sort=False): return taken - def difference(self, other, sort=True): + def difference(self, other, sort=None): """ Return a new Index with elements from the index that are not in `other`. @@ -2457,11 +2457,24 @@ def difference(self, other, sort=True): Parameters ---------- other : Index or array-like - sort : bool, default True - Sort the resulting index if possible + sort : bool or None, default None + Whether to sort the resulting index. By default, the + values are attempted to be sorted, but any TypeError from + incomparable elements is caught by pandas. + + * None : Attempt to sort the result, but catch any TypeErrors + from comparing incomparable elements. + * False : Do not sort the result. + * True : Sort the result, raising a TypeError if any elements + cannot be compared. .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Added the `None` option, which matches the behavior of + pandas 0.23.4 and earlier. + Returns ------- difference : Index @@ -2492,11 +2505,13 @@ def difference(self, other, sort=True): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) the_diff = this.values.take(label_diff) - if sort: + if sort is None: try: the_diff = sorting.safe_sort(the_diff) except TypeError: pass + elif sort: + the_diff = sorting.safe_sort(the_diff) return this._shallow_copy(the_diff, name=result_name, freq=None) @@ -2508,7 +2523,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): ---------- other : Index or array-like result_name : str - sort : bool or None, default None. + sort : bool or None, default None Whether to sort the resulting index. By default, the values are attempted to be sorted, but any TypeError from incomparable elements is caught by pandas. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 2115135552df2..32a5a09359019 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2973,7 +2973,7 @@ def intersection(self, other, sort=False): return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) - def difference(self, other, sort=True): + def difference(self, other, sort=None): """ Compute set difference of two MultiIndex objects @@ -2993,6 +2993,8 @@ def difference(self, other, sort=True): other, result_names = self._convert_can_do_setop(other) if len(other) == 0: + if sort: + return self.sort_values() return self if self.equals(other): diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index eee649de61714..6a42e29aa8f5c 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -174,7 +174,10 @@ def test_difference(idx, sort): # name from empty array result = first.difference([], sort=sort) - assert first.equals(result) + if sort: + assert first.sort_values().equals(result) + else: + assert first.equals(result) assert first.names == result.names # name from non-empty array @@ -189,6 +192,36 @@ def test_difference(idx, sort): first.difference([1, 2, 3, 4, 5], sort=sort) +def test_difference_sort_special(): + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + # sort=None, the default + result = idx.difference([]) + tm.assert_index_equal(result, idx) + + result = idx.difference([], sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) + + +def test_difference_sort_incomparable(): + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2], + ['a', 'b']]) + + other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], + ['c', 'd']]) + # sort=None, the default + # result = idx.difference(other) + # tm.assert_index_equal(result, idx) + + # sort=False + result = idx.difference(other) + tm.assert_index_equal(result, idx) + + # sort=True, raises + with pytest.raises(TypeError): + idx.difference(other, sort=True) + + @pytest.mark.parametrize("sort", [True, False]) def test_union(idx, sort): piece1 = idx[:5][::-1] diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 995a52335ec87..4e8555cbe1aab 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -3,6 +3,7 @@ from collections import defaultdict from datetime import datetime, timedelta import math +import operator import sys import numpy as np @@ -1099,22 +1100,28 @@ def test_symmetric_difference(self, sort): assert tm.equalContents(result, expected) assert result.name is None - def test_symmetric_difference_incomparable(self): + @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference']) + def test_difference_incomparable(self, opname): a = pd.Index([3, pd.Timestamp('2000'), 1]) b = pd.Index([2, pd.Timestamp('1999'), 1]) + op = operator.methodcaller(opname, b) # sort=None, the default - result = a.symmetric_difference(b) + result = op(a) expected = pd.Index([3, pd.Timestamp('2000'), 2, pd.Timestamp('1999')]) + if opname == 'difference': + expected = expected[:2] tm.assert_index_equal(result, expected) # sort=False - result = a.symmetric_difference(b, sort=False) + op = operator.methodcaller(opname, b, sort=False) + result = op(a) tm.assert_index_equal(result, expected) # sort=True, raises + op = operator.methodcaller(opname, b, sort=True) with pytest.raises(TypeError, match='Cannot compare'): - a.symmetric_difference(b, sort=True) + op(a) @pytest.mark.parametrize("sort", [True, False]) def test_symmetric_difference_mi(self, sort): From 1564d4f8c698cfe3c3c71c2fa5b2248b20d90f65 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 21:02:53 +0100 Subject: [PATCH 13/22] Change True to None; disallow True --- doc/source/whatsnew/v0.24.1.rst | 66 ++------- pandas/_libs/lib.pyx | 2 - pandas/core/indexes/base.py | 57 +++----- pandas/core/indexes/datetimes.py | 6 +- pandas/core/indexes/interval.py | 2 +- pandas/core/indexes/multi.py | 33 ++--- pandas/core/indexes/range.py | 8 +- pandas/tests/indexes/common.py | 2 +- pandas/tests/indexes/datetimes/test_setops.py | 12 +- .../tests/indexes/interval/test_interval.py | 24 ++-- pandas/tests/indexes/multi/test_set_ops.py | 104 +++++++------- pandas/tests/indexes/period/test_period.py | 2 +- pandas/tests/indexes/period/test_setops.py | 20 +-- pandas/tests/indexes/test_base.py | 127 ++++++++++-------- pandas/tests/indexes/test_range.py | 2 +- .../indexes/timedeltas/test_timedelta.py | 8 +- 16 files changed, 217 insertions(+), 258 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 948350df140eb..ecce8005cfb3a 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -20,74 +20,24 @@ including other versions of pandas. API Changes ~~~~~~~~~~~ -Changing the ``sort`` parameter for :meth:`Index.union` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Changing the ``sort`` parameter for :class:`Index` set operations +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`). -The default *behavior* remains the same: The result is sorted, unless +The default *behavior*, however, remains the same: the result is sorted, unless 1. ``self`` and ``other`` are identical 2. ``self`` or ``other`` is empty 3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). -This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised if the values cannot be compared. - -**Behavior in 0.24.0** - -.. ipython:: python - - In [1]: idx = pd.Index(['b', 'a']) - - In [2]: idx.union(idx) # sort=True was the default. - Out[2]: Index(['b', 'a'], dtype='object') - - In [3]: idx.union(idx, sort=True) # result is still not sorted. - Out[32]: Index(['b', 'a'], dtype='object') - -**New Behavior** - -.. ipython:: python - - idx = pd.Index(['b', 'a']) - idx.union(idx) # sort=None is the default. Don't sort identical operands. - - idx.union(idx, sort=True) +This change will allow to preserve ``sort=True`` to mean "always sort" in a future release. The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which -would previously not sort the result when ``sort=True`` but the values could not be compared. - -Changed the behavior of :meth:`Index.intersection` with ``sort=True`` -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -When ``sort=True`` is provided to :meth:`Index.intersection`, the values are always sorted. In 0.24.0, -the values would not be sorted when ``self`` and ``other`` were identical. Pass ``sort=False`` to not -sort the values. This matches the behavior of pandas 0.23.4 and earlier. - -**Behavior in 0.23.4** - -.. ipython:: python - - In [2]: idx = pd.Index(['b', 'a']) - - In [3]: idx.intersection(idx) # sort was not a keyword. - Out[3]: Index(['b', 'a'], dtype='object') - -**Behavior in 0.24.0** - -.. ipython:: python - - In [5]: idx.intersection(idx) # sort=True by default. Don't sort identical. - Out[5]: Index(['b', 'a'], dtype='object') - - In [6]: idx.intersection(idx, sort=True) - Out[6]: Index(['b', 'a'], dtype='object') - -**New Behavior** - -.. ipython:: python +would do not sort the result when the values could not be compared. - idx.intersection(idx) # sort=False by default - idx.intersection(idx, sort=True) +For :meth:`Index.intersection` the option of ``sort=True`` is also renamed +to ``sort=None`` (but for :meth:`Index.intersection` it is not the default), as +the result is not sorted when ``self`` and ``other`` were identical. .. _whatsnew_0241.regressions: diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index c9473149d8a84..f8875d60049b1 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -239,8 +239,6 @@ def fast_unique_multiple(list arrays, sort: bool=True): except Exception: # TODO: RuntimeWarning? pass - elif sort: - uniques.sort() return uniques diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 12880ed93cc2a..870dd6165daf5 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2245,6 +2245,11 @@ def _get_reconciled_name_object(self, other): return self._shallow_copy(name=name) return self + def _validate_sort_keyword(self, sort): + if sort not in [None, False]: + raise ValueError("The 'sort' keyword only takes the values of " + "None or False; {0} was passed.".format(sort)) + def union(self, other, sort=None): """ Form the union of two Index objects. @@ -2262,16 +2267,14 @@ def union(self, other, sort=None): 3. Some values in `self` or `other` cannot be compared. A RuntimeWarning is issued in this case. - * True : sort the result. A TypeError is raised when the - values cannot be compared. * False : do not sort the result. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default `sort` to None, matching the - behavior of pandas 0.23.4 and earlier. + Changed the default `sort` from True to None (without + change in behaviour). Returns ------- @@ -2285,20 +2288,15 @@ def union(self, other, sort=None): >>> idx1.union(idx2) Int64Index([1, 2, 3, 4, 5, 6], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other = ensure_index(other) if len(other) == 0 or self.equals(other): - result = self._get_reconciled_name_object(other) - if sort: - result = result.sort_values() - return result + return self._get_reconciled_name_object(other) if len(self) == 0: - result = other._get_reconciled_name_object(self) - if sort: - result = result.sort_values() - return result + return other._get_reconciled_name_object(self) # TODO: is_dtype_union_equal is a hack around # 1. buggy set ops with duplicates (GH #13432) @@ -2348,9 +2346,6 @@ def union(self, other, sort=None): warnings.warn("{}, sort order is undefined for " "incomparable objects".format(e), RuntimeWarning, stacklevel=3) - elif sort: - # raise if not sortable. - result = sorting.safe_sort(result) # for subclasses return self._wrap_setop_result(other, result) @@ -2367,12 +2362,12 @@ def intersection(self, other, sort=False): Parameters ---------- other : Index or array-like - sort : bool or None, default False + sort : False or None, default False Whether to sort the resulting index. * False : do not sort the result. - * True : sort the result. A TypeError is raised when the - values cannot be compared. + * None : sort the result, except when `self` and `other` are equal + or when the values cannot be compared. .. versionadded:: 0.24.0 @@ -2392,14 +2387,12 @@ def intersection(self, other, sort=False): >>> idx1.intersection(idx2) Int64Index([3, 4], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other = ensure_index(other) if self.equals(other): - result = self._get_reconciled_name_object(other) - if sort: - result = result.sort_values() - return result + return self._get_reconciled_name_object(other) if not is_dtype_equal(self.dtype, other.dtype): this = self.astype('O') @@ -2434,7 +2427,7 @@ def intersection(self, other, sort=False): taken = other.take(indexer) - if sort: + if sort is None: taken = sorting.safe_sort(taken.values) if self.name != other.name: name = None @@ -2457,7 +2450,7 @@ def difference(self, other, sort=None): Parameters ---------- other : Index or array-like - sort : bool or None, default None + sort : False or None, default None Whether to sort the resulting index. By default, the values are attempted to be sorted, but any TypeError from incomparable elements is caught by pandas. @@ -2465,14 +2458,12 @@ def difference(self, other, sort=None): * None : Attempt to sort the result, but catch any TypeErrors from comparing incomparable elements. * False : Do not sort the result. - * True : Sort the result, raising a TypeError if any elements - cannot be compared. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Added the `None` option, which matches the behavior of + Changed `True` to `None`, which matches the behavior of pandas 0.23.4 and earlier. Returns @@ -2489,6 +2480,7 @@ def difference(self, other, sort=None): >>> idx1.difference(idx2, sort=False) Int64Index([2, 1], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): @@ -2510,8 +2502,6 @@ def difference(self, other, sort=None): the_diff = sorting.safe_sort(the_diff) except TypeError: pass - elif sort: - the_diff = sorting.safe_sort(the_diff) return this._shallow_copy(the_diff, name=result_name, freq=None) @@ -2523,7 +2513,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): ---------- other : Index or array-like result_name : str - sort : bool or None, default None + sort : False or None, default None Whether to sort the resulting index. By default, the values are attempted to be sorted, but any TypeError from incomparable elements is caught by pandas. @@ -2531,14 +2521,12 @@ def symmetric_difference(self, other, result_name=None, sort=None): * None : Attempt to sort the result, but catch any TypeErrors from comparing incomparable elements. * False : Do not sort the result. - * True : Sort the result, raising a TypeError if any elements - cannot be compared. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Added the `None` option, which matches the behavior of + Changed `True` to `None`, which matches the behavior of pandas 0.23.4 and earlier. Returns @@ -2564,6 +2552,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): >>> idx1 ^ idx2 Int64Index([1, 5], dtype='int64') """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_name_update = self._convert_can_do_setop(other) if result_name is None: @@ -2589,8 +2578,6 @@ def symmetric_difference(self, other, result_name=None, sort=None): the_diff = sorting.safe_sort(the_diff) except TypeError: pass - elif sort: - the_diff = sorting.safe_sort(the_diff) attribs = self._get_attributes_dict() attribs['name'] = result_name diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index ef941ab87ba12..9c46860eb49d6 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -602,19 +602,21 @@ def intersection(self, other, sort=False): Parameters ---------- other : DatetimeIndex or array-like - sort : bool, default True + sort : False or None, default False Sort the resulting index if possible. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default to ``False`` to match the behaviour + from before 0.24.0. Returns ------- y : Index or DatetimeIndex """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) if self.equals(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 736de94991181..2c63fe33c57fe 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1093,7 +1093,7 @@ def equals(self, other): def overlaps(self, other): return self._data.overlaps(other) - def _setop(op_name, sort=True): + def _setop(op_name, sort=None): def func(self, other, sort=sort): other = self._as_like_interval_index(other) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 32a5a09359019..76f895a678cd0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2886,7 +2886,7 @@ def union(self, other, sort=None): Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool or None, default None + sort : False or None, default None Whether to sort the resulting Index. * None : Sort the result, except when @@ -2896,15 +2896,13 @@ def union(self, other, sort=None): 3. Some values in `self` or `other` cannot be compared. A RuntimeWarning is issued in this case. - * True : sort the result. A TypeError is raised when the - values cannot be compared. * False : do not sort the result. .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default `sort` to None, matching the + Changed the default `sort` from True to None, matching the behavior of pandas 0.23.4 and earlier. Returns @@ -2913,12 +2911,11 @@ def union(self, other, sort=None): >>> index.union(index2) """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) if len(other) == 0 or self.equals(other): - if sort: - return self.sort_values() return self # TODO: Index.union returns other when `len(self)` is 0. @@ -2937,32 +2934,32 @@ def intersection(self, other, sort=False): Parameters ---------- other : MultiIndex or array / Index of tuples - sort : bool, default False + sort : False or None, default False Sort the resulting MultiIndex if possible .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default to ``False``, to match + behaviour from before 0.24.0 Returns ------- Index """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) if self.equals(other): - if sort: - return self.sort_values() return self self_tuples = self._ndarray_values other_tuples = other._ndarray_values uniq_tuples = set(self_tuples) & set(other_tuples) - if sort: + if sort is None: uniq_tuples = sorted(uniq_tuples) if len(uniq_tuples) == 0: @@ -2980,7 +2977,7 @@ def difference(self, other, sort=None): Parameters ---------- other : MultiIndex - sort : bool, default True + sort : False or None, default None Sort the resulting MultiIndex if possible .. versionadded:: 0.24.0 @@ -2989,12 +2986,11 @@ def difference(self, other, sort=None): ------- diff : MultiIndex """ + self._validate_sort_keyword(sort) self._assert_can_do_setop(other) other, result_names = self._convert_can_do_setop(other) if len(other) == 0: - if sort: - return self.sort_values() return self if self.equals(other): @@ -3010,8 +3006,13 @@ def difference(self, other, sort=None): label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) difference = this.values.take(label_diff) - if sort: - difference = sorted(difference) + if sort is None: + try: + difference = sorted(difference) + except TypeError as e: + warnings.warn("{}, sort order is undefined for " + "incomparable objects".format(e), + RuntimeWarning, stacklevel=2) if len(difference) == 0: return MultiIndex(levels=[[]] * self.nlevels, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index e17a6a682af40..5aafe9734b6a0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -350,19 +350,21 @@ def intersection(self, other, sort=False): Parameters ---------- other : Index or array-like - sort : bool, default True + sort : False or None, default False Sort the resulting index if possible .. versionadded:: 0.24.0 .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default to ``False`` to match the behaviour + from before 0.24.0. Returns ------- intersection : Index """ + self._validate_sort_keyword(sort) if self.equals(other): return self._get_reconciled_name_object(other) @@ -405,7 +407,7 @@ def intersection(self, other, sort=False): if (self._step < 0 and other._step < 0) is not (new_index._step < 0): new_index = new_index[::-1] - if sort: + if sort is None: new_index = new_index.sort_values() return new_index diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index d72dccadf0ac0..a838779689c44 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -486,7 +486,7 @@ def test_union_base(self): with pytest.raises(TypeError, match=msg): first.union([1, 2, 3]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): for name, idx in compat.iteritems(self.indices): first = idx[2:] diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index bd37cc815d0f7..19009e45ee83a 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -138,7 +138,7 @@ def test_intersection2(self): @pytest.mark.parametrize("tz", [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, tz, sort): # GH 4690 (with tz) base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx') @@ -187,7 +187,7 @@ def test_intersection(self, tz, sort): for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name @@ -212,7 +212,7 @@ def test_intersection_bug_1708(self): assert len(result) == 0 @pytest.mark.parametrize("tz", tz) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, tz, sort): rng_dates = ['1/2/2000', '1/3/2000', '1/1/2000', '1/4/2000', '1/5/2000'] @@ -233,11 +233,11 @@ def test_difference(self, tz, sort): (rng2, other2, expected2), (rng3, other3, expected3)]: result_diff = rng.difference(other, sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of DatetimeIndex should not preserve frequency @@ -254,7 +254,7 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_datetimeindex_diff(self, sort): dti1 = date_range(freq='Q-JAN', start=datetime(1997, 12, 31), periods=100) diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index db69258c1d3d2..f1fd06c9cab6e 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -783,19 +783,19 @@ def test_non_contiguous(self, closed): assert 1.5 not in index - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, closed, sort): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(13), closed=closed) result = index[::-1].union(other, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) result = other[::-1].union(index, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -812,19 +812,19 @@ def test_union(self, closed, sort): result = index.union(other, sort=sort) tm.assert_index_equal(result, index) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, closed, sort): index = self.create_index(closed=closed) other = IntervalIndex.from_breaks(range(5, 13), closed=closed) expected = IntervalIndex.from_breaks(range(5, 11), closed=closed) result = index[::-1].intersection(other, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) result = other[::-1].intersection(index, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -842,14 +842,14 @@ def test_intersection(self, closed, sort): result = index.intersection(other, sort=sort) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, closed, sort): index = IntervalIndex.from_arrays([1, 0, 3, 2], [1, 2, 3, 4], closed=closed) result = index.difference(index[:1], sort=sort) expected = index[1:] - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @@ -864,19 +864,19 @@ def test_difference(self, closed, sort): result = index.difference(other, sort=sort) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(self, closed, sort): index = self.create_index(closed=closed) result = index[1:].symmetric_difference(index[:-1], sort=sort) expected = IntervalIndex([index[0], index[-1]]) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) # GH 19101: empty result, same dtype result = index.symmetric_difference(index, sort=sort) expected = IntervalIndex(np.array([], dtype='int64'), closed=closed) - if sort: + if sort is None: tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -888,7 +888,7 @@ def test_symmetric_difference(self, closed, sort): @pytest.mark.parametrize('op_name', [ 'union', 'intersection', 'difference', 'symmetric_difference']) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_set_operation_errors(self, closed, op_name, sort): index = self.create_index(closed=closed) set_op = getattr(index, op_name) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 6a42e29aa8f5c..f6b7d3d057d26 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -9,7 +9,7 @@ @pytest.mark.parametrize("case", [0.5, "xxx"]) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) @pytest.mark.parametrize("method", ["intersection", "union", "difference", "symmetric_difference"]) def test_set_ops_error_cases(idx, case, sort, method): @@ -19,13 +19,13 @@ def test_set_ops_error_cases(idx, case, sort, method): getattr(idx, method)(case, sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(idx, sort): first = idx[:5] second = idx[:3] intersect = first.intersection(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) @@ -34,7 +34,7 @@ def test_intersection_base(idx, sort): for klass in [np.array, Series, list]] for case in cases: result = first.intersection(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, second.sort_values()) assert tm.equalContents(result, second) @@ -43,13 +43,13 @@ def test_intersection_base(idx, sort): first.intersection([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_union_base(idx, sort): first = idx[3:] second = idx[:5] everything = idx union = first.union(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) @@ -58,7 +58,7 @@ def test_union_base(idx, sort): for klass in [np.array, Series, list]] for case in cases: result = first.union(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) @@ -67,13 +67,13 @@ def test_union_base(idx, sort): first.union([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_difference_base(idx, sort): second = idx[4:] answer = idx[:4] result = idx.difference(second, sort=sort) - if sort: + if sort is None: answer = answer.sort_values() assert result.equals(answer) @@ -91,14 +91,14 @@ def test_difference_base(idx, sort): idx.difference([1, 2, 3], sort=sort) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(idx, sort): first = idx[1:] second = idx[:-1] answer = idx[[-1, 0]] result = first.symmetric_difference(second, sort=sort) - if sort: + if sort is None: answer = answer.sort_values() tm.assert_index_equal(result, answer) @@ -121,14 +121,14 @@ def test_empty(idx): assert idx[:0].empty -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_difference(idx, sort): first = idx result = first.difference(idx[-3:], sort=sort) vals = idx[:-3].values - if sort: + if sort is None: vals = sorted(vals) expected = MultiIndex.from_tuples(vals, @@ -174,10 +174,7 @@ def test_difference(idx, sort): # name from empty array result = first.difference([], sort=sort) - if sort: - assert first.sort_values().equals(result) - else: - assert first.equals(result) + assert first.equals(result) assert first.names == result.names # name from non-empty array @@ -198,9 +195,10 @@ def test_difference_sort_special(): result = idx.difference([]) tm.assert_index_equal(result, idx) - result = idx.difference([], sort=True) - expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - tm.assert_index_equal(result, expected) + # TODO decide on True behaviour + # result = idx.difference([], sort=True) + # expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + # tm.assert_index_equal(result, expected) def test_difference_sort_incomparable(): @@ -210,42 +208,38 @@ def test_difference_sort_incomparable(): other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], ['c', 'd']]) # sort=None, the default - # result = idx.difference(other) - # tm.assert_index_equal(result, idx) + with tm.assert_produces_warning(RuntimeWarning): + result = idx.difference(other) + tm.assert_index_equal(result, idx) # sort=False - result = idx.difference(other) + result = idx.difference(other, sort=False) tm.assert_index_equal(result, idx) - # sort=True, raises - with pytest.raises(TypeError): - idx.difference(other, sort=True) + # TODO decide on True behaviour + # # sort=True, raises + # with pytest.raises(TypeError): + # idx.difference(other, sort=True) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_union(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] the_union = piece1.union(piece2, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(the_union, idx.sort_values()) assert tm.equalContents(the_union, idx) # corner case, pass self or empty thing: the_union = idx.union(idx, sort=sort) - if sort: - tm.assert_index_equal(the_union, idx.sort_values()) - else: - assert the_union is idx + assert the_union is idx the_union = idx.union(idx[:0], sort=sort) - if sort: - tm.assert_index_equal(the_union, idx.sort_values()) - else: - assert the_union is idx + assert the_union is idx # won't work in python 3 # tuples = _index.values @@ -264,23 +258,20 @@ def test_union(idx, sort): # assert result.equals(result2) -@pytest.mark.parametrize("sort", [True, False]) +@pytest.mark.parametrize("sort", [None, False]) def test_intersection(idx, sort): piece1 = idx[:5][::-1] piece2 = idx[3:] the_int = piece1.intersection(piece2, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(the_int, idx[3:5]) assert tm.equalContents(the_int, idx[3:5]) # corner case, pass self the_int = idx.intersection(idx, sort=sort) - if sort: - tm.assert_index_equal(the_int, idx.sort_values()) - else: - assert the_int is idx + assert the_int is idx # empty intersection: disjoint empty = idx[:2].intersection(idx[2:], sort=sort) @@ -297,7 +288,8 @@ def test_intersect_equal_sort(): idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) - tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + # tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) @@ -314,10 +306,11 @@ def test_union_sort_other_empty(slice_): # sort=False tm.assert_index_equal(idx.union(other, sort=False), idx) - # sort=True - result = idx.union(other, sort=True) - expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - tm.assert_index_equal(result, expected) + # TODO decide on True behaviour + # # sort=True + # result = idx.union(other, sort=True) + # expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + # tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(): @@ -332,6 +325,17 @@ def test_union_sort_other_incomparable(): result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx) - # sort=True - with pytest.raises(TypeError, match='Cannot compare'): - idx.union(idx[:1], sort=True) + # TODO decide on True behaviour + # # sort=True + # with pytest.raises(TypeError, match='Cannot compare'): + # idx.union(idx[:1], sort=True) + + +@pytest.mark.parametrize("method", ['union', 'intersection', 'difference', + 'symmetric_difference']) +def test_setops_disallow_true(method): + idx1 = pd.MultiIndex.from_product([['a', 'b'], [1, 2]]) + idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]]) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) \ No newline at end of file diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index 464ff7aa5d58d..dc9a32d75d272 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -77,7 +77,7 @@ def test_no_millisecond_field(self): with pytest.raises(AttributeError): DatetimeIndex([]).millisecond - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved diff --git a/pandas/tests/indexes/period/test_setops.py b/pandas/tests/indexes/period/test_setops.py index a97ab47bcda16..bf29edad4841e 100644 --- a/pandas/tests/indexes/period/test_setops.py +++ b/pandas/tests/indexes/period/test_setops.py @@ -38,7 +38,7 @@ def test_join_does_not_recur(self): df.columns[0], df.columns[1]], object) tm.assert_index_equal(res, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # union other1 = pd.period_range('1/1/2000', freq='D', periods=5) @@ -97,11 +97,11 @@ def test_union(self, sort): (rng8, other8, expected8)]: result_union = rng.union(other, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_misc(self, sort): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -110,7 +110,7 @@ def test_union_misc(self, sort): # not in order result = _permute(index[:-5]).union(_permute(index[10:]), sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, index) assert tm.equalContents(result, index) @@ -139,7 +139,7 @@ def test_union_dataframe_index(self): exp = pd.period_range('1/1/1980', '1/1/2012', freq='M') tm.assert_index_equal(df.index, exp) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): index = period_range('1/1/2000', '1/20/2000', freq='D') @@ -150,7 +150,7 @@ def test_intersection(self, sort): left = _permute(index[:-5]) right = _permute(index[10:]) result = left.intersection(right, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, index[10:-5]) assert tm.equalContents(result, index[10:-5]) @@ -164,7 +164,7 @@ def test_intersection(self, sort): with pytest.raises(period.IncompatibleFrequency): index.intersection(index3, sort=sort) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_cases(self, sort): base = period_range('6/1/2000', '6/30/2000', freq='D', name='idx') @@ -210,7 +210,7 @@ def test_intersection_cases(self, sort): for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]: result = base.intersection(rng, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name @@ -224,7 +224,7 @@ def test_intersection_cases(self, sort): result = rng.intersection(rng[0:0]) assert len(result) == 0 - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference(self, sort): # diff period_rng = ['1/3/2000', '1/2/2000', '1/1/2000', '1/5/2000', @@ -276,6 +276,6 @@ def test_difference(self, sort): (rng6, other6, expected6), (rng7, other7, expected7), ]: result_difference = rng.difference(other, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_difference, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 4e8555cbe1aab..ee32a9e2e8bf1 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -685,27 +685,24 @@ def test_empty_fancy_raises(self, attr): # np.ndarray only accepts ndarray of int & bool dtypes, so should Index pytest.raises(IndexError, index.__getitem__, empty_farr) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): first = self.strIndex[:20] second = self.strIndex[:10] intersect = first.intersection(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) # Corner cases inter = first.intersection(first, sort=sort) - if sort: - tm.assert_index_equal(inter, first.sort_values()) - else: - assert inter is first + assert inter is first @pytest.mark.parametrize("index2,keeps_name", [ (Index([3, 4, 5, 6, 7], name="index"), True), # preserve same name (Index([3, 4, 5, 6, 7], name="other"), False), # drop diff names (Index([3, 4, 5, 6, 7]), False)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation(self, index2, keeps_name, sort): index1 = Index([1, 2, 3, 4, 5], name='index') expected = Index([3, 4, 5]) @@ -719,7 +716,7 @@ def test_intersection_name_preservation(self, index2, keeps_name, sort): @pytest.mark.parametrize("first_name,second_name,expected_name", [ ('A', 'A', 'A'), ('A', 'B', None), (None, 'B', None)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_name_preservation2(self, first_name, second_name, expected_name, sort): first = self.strIndex[5:20] @@ -732,7 +729,7 @@ def test_intersection_name_preservation2(self, first_name, second_name, @pytest.mark.parametrize("index2,keeps_name", [ (Index([4, 7, 6, 5, 3], name='index'), True), (Index([4, 7, 6, 5, 3], name='other'), False)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_monotonic(self, index2, keeps_name, sort): index1 = Index([5, 3, 2, 4, 1], name='index') expected = Index([5, 3, 4]) @@ -741,25 +738,25 @@ def test_intersection_monotonic(self, index2, keeps_name, sort): expected.name = "index" result = index1.intersection(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @pytest.mark.parametrize("index2,expected_arr", [ (Index(['B', 'D']), ['B']), (Index(['B', 'D', 'A']), ['A', 'B', 'A'])]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): # non-monotonic non-unique index1 = Index(['A', 'B', 'A', 'C']) expected = Index(expected_arr, dtype='object') result = index1.intersection(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersect_str_dates(self, sort): dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] @@ -774,13 +771,15 @@ def test_intersect_nosort(self): expected = pd.Index(['b', 'a']) tm.assert_index_equal(result, expected) - def test_intersect_equal_sort(self): + def test_intersection_equal_sort(self): idx = pd.Index(['c', 'a', 'b']) sorted_ = pd.Index(['a', 'b', 'c']) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) - tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + # TODO decide on True behaviour + # tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_chained_union(self, sort): # Chained unions handles names correctly i1 = Index([1, 2], name='i1') @@ -797,7 +796,7 @@ def test_chained_union(self, sort): expected = j1.union(j2, sort=sort).union(j3, sort=sort) tm.assert_index_equal(union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union(self, sort): # TODO: Replace with fixturesult first = self.strIndex[5:20] @@ -805,7 +804,7 @@ def test_union(self, sort): everything = self.strIndex[:20] union = first.union(second, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) @@ -822,10 +821,11 @@ def test_union_sort_other_special(self, slice_): # sort=False tm.assert_index_equal(idx.union(other, sort=False), idx) - # sort=True - result = idx.union(other, sort=True) - expected = pd.Index([0, 1, 2]) - tm.assert_index_equal(result, expected) + # TODO decide on True behaviour + # # sort=True + # result = idx.union(other, sort=True) + # expected = pd.Index([0, 1, 2]) + # tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(self): # https://github.com/pandas-dev/pandas/issues/24959 @@ -836,9 +836,14 @@ def test_union_sort_other_incomparable(self): tm.assert_index_equal(result, idx) - # sort=True - with pytest.raises(TypeError, match='.*'): - idx.union(idx[:1], sort=True) + # sort=False + result = idx.union(idx[:1], sort=None) + tm.assert_index_equal(result, idx) + + # TODO decide on True behaviour + # # sort=True + # with pytest.raises(TypeError, match='.*'): + # idx.union(idx[:1], sort=True) # sort=False result = idx.union(idx[:1], sort=False) @@ -846,7 +851,7 @@ def test_union_sort_other_incomparable(self): @pytest.mark.parametrize("klass", [ np.array, Series, list]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_from_iterables(self, klass, sort): # GH 10149 # TODO: Replace with fixturesult @@ -856,11 +861,11 @@ def test_union_from_iterables(self, klass, sort): case = klass(second.values) result = first.union(case, sort=sort) - if sort: + if sort is None: tm.assert_index_equal(result, everything.sort_values()) assert tm.equalContents(result, everything) - @pytest.mark.parametrize("sort", [None, True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_identity(self, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] @@ -879,7 +884,7 @@ def test_union_identity(self, sort): @pytest.mark.parametrize("second_list", [list('ab'), list()]) @pytest.mark.parametrize("first_name, second_name, expected_name", [ ('A', 'B', None), (None, 'B', None), ('A', None, None)]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_name_preservation(self, first_list, second_list, first_name, second_name, expected_name, sort): first = Index(first_list, name=first_name) @@ -888,14 +893,14 @@ def test_union_name_preservation(self, first_list, second_list, first_name, vals = set(first_list).union(second_list) - if sort and len(first_list) > 0 and len(second_list) > 0: + if sort is None and len(first_list) > 0 and len(second_list) > 0: expected = Index(sorted(vals), name=expected_name) tm.assert_index_equal(union, expected) else: expected = Index(vals, name=expected_name) assert tm.equalContents(union, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_union_dt_as_obj(self, sort): # TODO: Replace with fixturesult firstCat = self.strIndex.union(self.dateIndex) @@ -912,6 +917,15 @@ def test_union_dt_as_obj(self, sort): tm.assert_contains_all(self.strIndex, secondCat) tm.assert_contains_all(self.dateIndex, firstCat) + @pytest.mark.parametrize("method", ['union', 'intersection', 'difference', + 'symmetric_difference']) + def test_setops_disallow_true(self, method): + idx1 = pd.Index(['a', 'b']) + idx2 = pd.Index(['b', 'c']) + + with pytest.raises(ValueError, match="The 'sort' keyword only takes"): + getattr(idx1, method)(idx2, sort=True) + def test_map_identity_mapping(self): # GH 12766 # TODO: replace with fixture @@ -1033,7 +1047,7 @@ def test_append_empty_preserve_name(self, name, expected): @pytest.mark.parametrize("second_name,expected", [ (None, None), ('name', 'name')]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_name_preservation(self, second_name, expected, sort): # TODO: replace with fixturesult first = self.strIndex[5:20] @@ -1051,7 +1065,7 @@ def test_difference_name_preservation(self, second_name, expected, sort): else: assert result.name == expected - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_empty_arg(self, sort): first = self.strIndex[5:20] first.name == 'name' @@ -1060,7 +1074,7 @@ def test_difference_empty_arg(self, sort): assert tm.equalContents(result, first) assert result.name == first.name - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_identity(self, sort): first = self.strIndex[5:20] first.name == 'name' @@ -1069,7 +1083,7 @@ def test_difference_identity(self, sort): assert len(result) == 0 assert result.name == first.name - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, sort): first = self.strIndex[5:20] second = self.strIndex[:10] @@ -1077,12 +1091,12 @@ def test_difference_sort(self, sort): result = first.difference(second, sort) expected = self.strIndex[10:20] - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference(self, sort): # smoke index1 = Index([5, 2, 3, 4], name='index1') @@ -1091,7 +1105,7 @@ def test_symmetric_difference(self, sort): expected = Index([5, 1]) assert tm.equalContents(result, expected) assert result.name is None - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) @@ -1118,18 +1132,19 @@ def test_difference_incomparable(self, opname): result = op(a) tm.assert_index_equal(result, expected) - # sort=True, raises - op = operator.methodcaller(opname, b, sort=True) - with pytest.raises(TypeError, match='Cannot compare'): - op(a) + # TODO decide on True behaviour + # # sort=True, raises + # op = operator.methodcaller(opname, b, sort=True) + # with pytest.raises(TypeError, match='Cannot compare'): + # op(a) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_mi(self, sort): index1 = MultiIndex.from_tuples(self.tuples) index2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) result = index1.symmetric_difference(index2, sort=sort) expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert tm.equalContents(result, expected) @@ -1137,18 +1152,18 @@ def test_symmetric_difference_mi(self, sort): @pytest.mark.parametrize("index2,expected", [ (Index([0, 1, np.nan]), Index([2.0, 3.0, 0.0])), (Index([0, 1]), Index([np.nan, 2.0, 3.0, 0.0]))]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_missing(self, index2, expected, sort): # GH 13514 change: {nan} - {nan} == {} # (GH 6444, sorting of nans, is no longer an issue) index1 = Index([1, np.nan, 2, 3]) result = index1.symmetric_difference(index2, sort=sort) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_non_index(self, sort): index1 = Index([1, 2, 3, 4], name='index1') index2 = np.array([2, 3, 4, 5]) @@ -1162,7 +1177,7 @@ def test_symmetric_difference_non_index(self, sort): assert tm.equalContents(result, expected) assert result.name == 'new_name' - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_type(self, sort): # GH 20040 # If taking difference of a set and itself, it @@ -1173,7 +1188,7 @@ def test_difference_type(self, sort): expected = index.drop(index) tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_difference(self, sort): # GH 20040 # Test that the intersection of an index with an @@ -1676,11 +1691,11 @@ def test_drop_tuple(self, values, to_drop): ('intersection', np.array([(1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')], dtype=[('num', int), ('let', 'a1')]), - True), + None), ('union', np.array([(1, 'A'), (1, 'B'), (1, 'C'), (2, 'A'), (2, 'B'), (2, 'C')], dtype=[('num', int), ('let', 'a1')]), - True) + None) ]) def test_tuple_union_bug(self, method, expected, sort): index1 = Index(np.array([(1, 'A'), (2, 'A'), (1, 'B'), (2, 'B')], @@ -2328,20 +2343,20 @@ def test_unique_na(self): result = idx.unique() tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() first = index[:5] second = index[:3] - expected = Index([0, 1, 'a']) if sort else Index([0, 'a', 1]) + expected = Index([0, 1, 'a']) if sort is None else Index([0, 'a', 1]) result = first.intersection(second, sort=sort) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("klass", [ np.array, Series, list]) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection_different_type_base(self, klass, sort): # GH 10149 index = self.create_index() @@ -2351,7 +2366,7 @@ def test_intersection_different_type_base(self, klass, sort): result = first.intersection(klass(second.values), sort=sort) assert tm.equalContents(result, second) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = self.create_index() @@ -2360,7 +2375,7 @@ def test_difference_base(self, sort): result = first.difference(second, sort) expected = Index([0, 'a', 1]) - if sort: + if sort is None: expected = Index(safe_sort(expected)) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_range.py b/pandas/tests/indexes/test_range.py index bbd1e0ccc19b1..96cf83d477376 100644 --- a/pandas/tests/indexes/test_range.py +++ b/pandas/tests/indexes/test_range.py @@ -503,7 +503,7 @@ def test_join_self(self): joined = self.index.join(self.index, how=kind) assert self.index is joined - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, sort): # intersect with Int64Index other = Index(np.arange(1, 6)) diff --git a/pandas/tests/indexes/timedeltas/test_timedelta.py b/pandas/tests/indexes/timedeltas/test_timedelta.py index 547366ec79094..79210705103ab 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta.py @@ -51,7 +51,7 @@ def test_fillna_timedelta(self): [pd.Timedelta('1 day'), 'x', pd.Timedelta('3 day')], dtype=object) tm.assert_index_equal(idx.fillna('x'), exp) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: Difference of TimedeltaIndex should not preserve frequency @@ -69,7 +69,7 @@ def test_difference_freq(self, sort): tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) - @pytest.mark.parametrize("sort", [True, False]) + @pytest.mark.parametrize("sort", [None, False]) def test_difference_sort(self, sort): index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days", @@ -80,7 +80,7 @@ def test_difference_sort(self, sort): expected = TimedeltaIndex(["5 days", "0 days"], freq=None) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) @@ -90,7 +90,7 @@ def test_difference_sort(self, sort): idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["1 days", "0 days"], freq=None) - if sort: + if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) From cb54640afacf3b387ad3fcbe2a4a4b2ee7b44ab8 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 21:42:32 +0100 Subject: [PATCH 14/22] fix concat tests --- pandas/core/indexes/api.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/api.py b/pandas/core/indexes/api.py index 684a19c56c92f..6299fc482d0df 100644 --- a/pandas/core/indexes/api.py +++ b/pandas/core/indexes/api.py @@ -112,7 +112,7 @@ def _get_combined_index(indexes, intersect=False, sort=False): elif intersect: index = indexes[0] for other in indexes[1:]: - index = index.intersection(other, sort=sort) + index = index.intersection(other) else: index = _union_indexes(indexes, sort=sort) index = ensure_index(index) From 05a0ed04cac54d3d92b24c36947d85df107170cf Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 21:52:48 +0100 Subject: [PATCH 15/22] pep8 --- pandas/core/indexes/base.py | 2 +- pandas/tests/indexes/multi/test_set_ops.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 870dd6165daf5..d58b597a6e088 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2367,7 +2367,7 @@ def intersection(self, other, sort=False): * False : do not sort the result. * None : sort the result, except when `self` and `other` are equal - or when the values cannot be compared. + or when the values cannot be compared. .. versionadded:: 0.24.0 diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index f6b7d3d057d26..215025fe6b43d 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -338,4 +338,4 @@ def test_setops_disallow_true(method): idx2 = pd.MultiIndex.from_product([['b', 'c'], [1, 2]]) with pytest.raises(ValueError, match="The 'sort' keyword only takes"): - getattr(idx1, method)(idx2, sort=True) \ No newline at end of file + getattr(idx1, method)(idx2, sort=True) From 5e1b831f8efc0cca70779f3556485321da57d250 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 22:20:59 +0100 Subject: [PATCH 16/22] keep inconsistent behaviour for MultiIndex.difference --- pandas/core/indexes/multi.py | 7 +------ pandas/tests/indexes/multi/test_set_ops.py | 5 +++-- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 76f895a678cd0..8caa92f0e844d 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3007,12 +3007,7 @@ def difference(self, other, sort=None): assume_unique=True) difference = this.values.take(label_diff) if sort is None: - try: - difference = sorted(difference) - except TypeError as e: - warnings.warn("{}, sort order is undefined for " - "incomparable objects".format(e), - RuntimeWarning, stacklevel=2) + difference = sorted(difference) if len(difference) == 0: return MultiIndex(levels=[[]] * self.nlevels, diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index 215025fe6b43d..dc785c6992bdf 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -208,9 +208,10 @@ def test_difference_sort_incomparable(): other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], ['c', 'd']]) # sort=None, the default - with tm.assert_produces_warning(RuntimeWarning): + # MultiIndex.difference deviates here from other difference + # implementations in not catching the TypeError + with pytest.raises(TypeError): result = idx.difference(other) - tm.assert_index_equal(result, idx) # sort=False result = idx.difference(other, sort=False) From 32a5966cebbe3c54a46cd6e7610d6cf0c0b140f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 22:35:11 +0100 Subject: [PATCH 17/22] docstring updates --- pandas/core/indexes/base.py | 15 ++++++++------- pandas/core/indexes/multi.py | 11 ++++++++--- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index d58b597a6e088..2fa034670e885 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2273,8 +2273,8 @@ def union(self, other, sort=None): .. versionchanged:: 0.24.1 - Changed the default `sort` from True to None (without - change in behaviour). + Changed the default value from ``True`` to ``None`` + (without change in behaviour). Returns ------- @@ -2373,7 +2373,8 @@ def intersection(self, other, sort=False): .. versionchanged:: 0.24.1 - Changed the default from ``True`` to ``False``. + Changed the default from ``True`` to ``False``, to match + the behaviour of 0.23.4 and earlier. Returns ------- @@ -2463,8 +2464,8 @@ def difference(self, other, sort=None): .. versionchanged:: 0.24.1 - Changed `True` to `None`, which matches the behavior of - pandas 0.23.4 and earlier. + Changed the default value from ``True`` to ``None`` + (without change in behaviour). Returns ------- @@ -2526,8 +2527,8 @@ def symmetric_difference(self, other, result_name=None, sort=None): .. versionchanged:: 0.24.1 - Changed `True` to `None`, which matches the behavior of - pandas 0.23.4 and earlier. + Changed the default value from ``True`` to ``None`` + (without change in behaviour). Returns ------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 8caa92f0e844d..14975dbbefa63 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2902,8 +2902,8 @@ def union(self, other, sort=None): .. versionchanged:: 0.24.1 - Changed the default `sort` from True to None, matching the - behavior of pandas 0.23.4 and earlier. + Changed the default value from ``True`` to ``None`` + (without change in behaviour). Returns ------- @@ -2941,7 +2941,7 @@ def intersection(self, other, sort=False): .. versionchanged:: 0.24.1 - Changed the default to ``False``, to match + Changed the default from ``True`` to ``False``, to match behaviour from before 0.24.0 Returns @@ -2982,6 +2982,11 @@ def difference(self, other, sort=None): .. versionadded:: 0.24.0 + .. versionchanged:: 0.24.1 + + Changed the default value from ``True`` to ``None`` + (without change in behaviour). + Returns ------- diff : MultiIndex From d234a1de817ada0483f81fbe8b9bf56519ad92b9 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 22:49:26 +0100 Subject: [PATCH 18/22] fix linting + catch remaining RuntimeWarning --- pandas/tests/indexes/multi/test_set_ops.py | 3 ++- pandas/tests/indexes/test_base.py | 7 ++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index dc785c6992bdf..b2ff5e67a2dba 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -287,9 +287,10 @@ def test_intersection(idx, sort): def test_intersect_equal_sort(): idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) - sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + # sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + # TODO decide on True behaviour # tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ee32a9e2e8bf1..ab275ab1b5103 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -830,14 +830,15 @@ def test_union_sort_other_special(self, slice_): def test_union_sort_other_incomparable(self): # https://github.com/pandas-dev/pandas/issues/24959 idx = pd.Index([1, pd.Timestamp('2000')]) - # default, sort=None + # default (sort=None) with tm.assert_produces_warning(RuntimeWarning): result = idx.union(idx[:1]) tm.assert_index_equal(result, idx) - # sort=False - result = idx.union(idx[:1], sort=None) + # sort=None + with tm.assert_produces_warning(RuntimeWarning): + result = idx.union(idx[:1], sort=None) tm.assert_index_equal(result, idx) # TODO decide on True behaviour From 260aba22c0f9eec82359de1b73ff1fc7f278a6ec Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 31 Jan 2019 23:23:49 +0100 Subject: [PATCH 19/22] more linting --- pandas/tests/indexes/test_base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ab275ab1b5103..63cc06b719a0b 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -773,7 +773,7 @@ def test_intersect_nosort(self): def test_intersection_equal_sort(self): idx = pd.Index(['c', 'a', 'b']) - sorted_ = pd.Index(['a', 'b', 'c']) + # sorted_ = pd.Index(['a', 'b', 'c']) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) # TODO decide on True behaviour From 41c24f03b43b162ec4258187ff7196b2534b22ab Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 1 Feb 2019 12:56:22 -0600 Subject: [PATCH 20/22] update whatsnew --- doc/source/whatsnew/v0.24.1.rst | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index ecce8005cfb3a..809db385d4a91 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -30,14 +30,20 @@ The default *behavior*, however, remains the same: the result is sorted, unless 2. ``self`` or ``other`` is empty 3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised). -This change will allow to preserve ``sort=True`` to mean "always sort" in a future release. +This change will allow ``sort=True`` to mean "always sort" in a future release. The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which would do not sort the result when the values could not be compared. -For :meth:`Index.intersection` the option of ``sort=True`` is also renamed -to ``sort=None`` (but for :meth:`Index.intersection` it is not the default), as -the result is not sorted when ``self`` and ``other`` were identical. +The `sort` option for :meth:`Index.intersection` has changed in three ways. + +1. The default has changed from ``True`` to ``False``, to restore the + pandas 0.23.4 and earlier behavior of not sorting by default. +2. The behavior of ``sort=True`` can now be obtained with ``sort=None``. + This will sort the result only if the values in ``self`` and ``other`` + are not identical. +3. The value ``sort=True`` is no longer allowed. A future version of pandas + will properly support ``sort=True`` meaning "always sort". .. _whatsnew_0241.regressions: From 2e181ac163007d9eefe0dfb2b620b77b6b4a8f54 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 1 Feb 2019 12:56:52 -0600 Subject: [PATCH 21/22] xfail trues --- pandas/tests/indexes/multi/test_set_ops.py | 53 +++++++++++++++++----- pandas/tests/indexes/test_base.py | 48 ++++++++++++++------ 2 files changed, 75 insertions(+), 26 deletions(-) diff --git a/pandas/tests/indexes/multi/test_set_ops.py b/pandas/tests/indexes/multi/test_set_ops.py index b2ff5e67a2dba..41a0e1e59e8a5 100644 --- a/pandas/tests/indexes/multi/test_set_ops.py +++ b/pandas/tests/indexes/multi/test_set_ops.py @@ -190,18 +190,24 @@ def test_difference(idx, sort): def test_difference_sort_special(): + # GH-24959 idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) # sort=None, the default result = idx.difference([]) tm.assert_index_equal(result, idx) + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_special_true(): # TODO decide on True behaviour - # result = idx.difference([], sort=True) - # expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - # tm.assert_index_equal(result, expected) + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + result = idx.difference([], sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) def test_difference_sort_incomparable(): + # GH-24959 idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2], ['a', 'b']]) @@ -217,10 +223,18 @@ def test_difference_sort_incomparable(): result = idx.difference(other, sort=False) tm.assert_index_equal(result, idx) + +@pytest.mark.xfail(reason="Not implemented.") +def test_difference_sort_incomparable_true(): # TODO decide on True behaviour # # sort=True, raises - # with pytest.raises(TypeError): - # idx.difference(other, sort=True) + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000'), 2], + ['a', 'b']]) + other = pd.MultiIndex.from_product([[3, pd.Timestamp('2000'), 4], + ['c', 'd']]) + + with pytest.raises(TypeError): + idx.difference(other, sort=True) @pytest.mark.parametrize("sort", [None, False]) @@ -286,12 +300,18 @@ def test_intersection(idx, sort): def test_intersect_equal_sort(): + # GH-24959 idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) - # sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + +@pytest.mark.xfail(reason="Not implemented.") +def test_intersect_equal_sort_true(): # TODO decide on True behaviour - # tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + sorted_ = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) @@ -308,11 +328,16 @@ def test_union_sort_other_empty(slice_): # sort=False tm.assert_index_equal(idx.union(other, sort=False), idx) + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_empty_sort(slice_): # TODO decide on True behaviour # # sort=True - # result = idx.union(other, sort=True) - # expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) - # tm.assert_index_equal(result, expected) + idx = pd.MultiIndex.from_product([[1, 0], ['a', 'b']]) + other = idx[:0] + result = idx.union(other, sort=True) + expected = pd.MultiIndex.from_product([[0, 1], ['a', 'b']]) + tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(): @@ -327,10 +352,14 @@ def test_union_sort_other_incomparable(): result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx) + +@pytest.mark.xfail(reason="Not implemented.") +def test_union_sort_other_incomparable_sort(): # TODO decide on True behaviour # # sort=True - # with pytest.raises(TypeError, match='Cannot compare'): - # idx.union(idx[:1], sort=True) + idx = pd.MultiIndex.from_product([[1, pd.Timestamp('2000')], ['a', 'b']]) + with pytest.raises(TypeError, match='Cannot compare'): + idx.union(idx[:1], sort=True) @pytest.mark.parametrize("method", ['union', 'intersection', 'difference', diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 63cc06b719a0b..c99007cef90d4 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -773,11 +773,15 @@ def test_intersect_nosort(self): def test_intersection_equal_sort(self): idx = pd.Index(['c', 'a', 'b']) - # sorted_ = pd.Index(['a', 'b', 'c']) tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + + @pytest.mark.xfail(reason="Not implemented") + def test_intersection_equal_sort_true(self): # TODO decide on True behaviour - # tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + idx = pd.Index(['c', 'a', 'b']) + sorted_ = pd.Index(['a', 'b', 'c']) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) @pytest.mark.parametrize("sort", [None, False]) def test_chained_union(self, sort): @@ -821,11 +825,18 @@ def test_union_sort_other_special(self, slice_): # sort=False tm.assert_index_equal(idx.union(other, sort=False), idx) + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize('slice_', [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): # TODO decide on True behaviour - # # sort=True - # result = idx.union(other, sort=True) - # expected = pd.Index([0, 1, 2]) - # tm.assert_index_equal(result, expected) + # sort=True + idx = pd.Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = pd.Index([0, 1, 2]) + tm.assert_index_equal(result, expected) def test_union_sort_other_incomparable(self): # https://github.com/pandas-dev/pandas/issues/24959 @@ -841,15 +852,18 @@ def test_union_sort_other_incomparable(self): result = idx.union(idx[:1], sort=None) tm.assert_index_equal(result, idx) - # TODO decide on True behaviour - # # sort=True - # with pytest.raises(TypeError, match='.*'): - # idx.union(idx[:1], sort=True) - # sort=False result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx) + @pytest.mark.xfail(reason="Not implemented") + def test_union_sort_other_incomparable_true(self): + # TODO decide on True behaviour + # sort=True + idx = pd.Index([1, pd.Timestamp('2000')]) + with pytest.raises(TypeError, match='.*'): + idx.union(idx[:1], sort=True) + @pytest.mark.parametrize("klass", [ np.array, Series, list]) @pytest.mark.parametrize("sort", [None, False]) @@ -1133,11 +1147,17 @@ def test_difference_incomparable(self, opname): result = op(a) tm.assert_index_equal(result, expected) + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize('opname', ['difference', 'symmetric_difference']) + def test_difference_incomparable_true(self, opname): # TODO decide on True behaviour # # sort=True, raises - # op = operator.methodcaller(opname, b, sort=True) - # with pytest.raises(TypeError, match='Cannot compare'): - # op(a) + a = pd.Index([3, pd.Timestamp('2000'), 1]) + b = pd.Index([2, pd.Timestamp('1999'), 1]) + op = operator.methodcaller(opname, b, sort=True) + + with pytest.raises(TypeError, match='Cannot compare'): + op(a) @pytest.mark.parametrize("sort", [None, False]) def test_symmetric_difference_mi(self, sort): From 1c5a0376dd93c38ffe71225cec9cea15fd9624f0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 1 Feb 2019 14:06:02 -0600 Subject: [PATCH 22/22] fixup [ci skip] --- doc/source/whatsnew/v0.24.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index 809db385d4a91..8f2584934d13a 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -33,7 +33,7 @@ The default *behavior*, however, remains the same: the result is sorted, unless This change will allow ``sort=True`` to mean "always sort" in a future release. The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which -would do not sort the result when the values could not be compared. +would not sort the result when the values could not be compared. The `sort` option for :meth:`Index.intersection` has changed in three ways.