pandas-dev · TomAugspurger · Jan 29, 2019 · Jan 29, 2019 · Jan 30, 2019 · Jan 30, 2019
diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst
@@ -15,10 +15,84 @@ Whats New in 0.24.1 (February XX, 2019)
 These are the changes in pandas 0.24.1. See :ref:`release` for a full changelog
 including other versions of pandas.
 
+.. _whatsnew_0241.api:
+
+API Changes
+~~~~~~~~~~~
+
+Changing the ``sort`` parameter for :meth:`Index.union`
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The default ``sort`` value for :meth:`Index.union` has changed from ``True`` to ``None`` (:issue:`24959`).
+The default *behavior* remains the same: The result is sorted, unless
+
+1. ``self`` and ``other`` are identical
+2. ``self`` or ``other`` is empty
+3. ``self`` or ``other`` contain values that can not be compared (a ``RuntimeWarning`` is raised).
+
+This allows ``sort=True`` to now mean "always sort". A ``TypeError`` is raised if the values cannot be compared.
+
+**Behavior in 0.24.0**
+
+.. ipython:: python
+
+   In [1]: idx = pd.Index(['b', 'a'])
+
+   In [2]: idx.union(idx)  # sort=True was the default.
+   Out[2]: Index(['b', 'a'], dtype='object')
+
+   In [3]: idx.union(idx, sort=True)  # result is still not sorted.
+   Out[32]: Index(['b', 'a'], dtype='object')
+
+**New Behavior**
+
+.. ipython:: python
+
+   idx = pd.Index(['b', 'a'])
+   idx.union(idx)  # sort=None is the default. Don't sort identical operands.
+
+   idx.union(idx, sort=True)
+
+The same change applies to :meth:`Index.difference` and :meth:`Index.symmetric_difference`, which
+would previously not sort the result when ``sort=True`` but the values could not be compared.
+
+Changed the behavior of :meth:`Index.intersection` with ``sort=True``
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When ``sort=True`` is provided to :meth:`Index.intersection`, the values are always sorted. In 0.24.0,
+the values would not be sorted when ``self`` and ``other`` were identical. Pass ``sort=False`` to not
+sort the values. This matches the behavior of pandas 0.23.4 and earlier.
+
+**Behavior in 0.23.4**
+
+.. ipython:: python
+
+   In [2]: idx = pd.Index(['b', 'a'])
+
+   In [3]: idx.intersection(idx)  # sort was not a keyword.
+   Out[3]: Index(['b', 'a'], dtype='object')
+
+**Behavior in 0.24.0**
+
+.. ipython:: python
+
+   In [5]: idx.intersection(idx)  # sort=True by default. Don't sort identical.
+   Out[5]: Index(['b', 'a'], dtype='object')
+
+   In [6]: idx.intersection(idx, sort=True)
+   Out[6]: Index(['b', 'a'], dtype='object')
+
+**New Behavior**
+
+.. ipython:: python
+
+   idx.intersection(idx)  # sort=False by default
+   idx.intersection(idx, sort=True)
+
 .. _whatsnew_0241.regressions:
 
 Fixed Regressions
-^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~
 
 - Bug in :meth:`DataFrame.itertuples` with ``records`` orient raising an ``AttributeError`` when the ``DataFrame`` contained more than 255 columns (:issue:`24939`)
 - Bug in :meth:`DataFrame.itertuples` orient converting integer column names to strings prepended with an underscore (:issue:`24940`)
@@ -28,7 +102,7 @@ Fixed Regressions
 .. _whatsnew_0241.enhancements:
 
 Enhancements
-^^^^^^^^^^^^
+~~~~~~~~~~~~
 
 
 .. _whatsnew_0241.bug_fixes:

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -233,11 +233,14 @@ def fast_unique_multiple(list arrays, sort: bool=True):
             if val not in table:
                 table[val] = stub
                 uniques.append(val)
-    if sort:
+    if sort is None:
         try:
             uniques.sort()
         except Exception:
+            # TODO: RuntimeWarning?
             pass
+    elif sort:
+        uniques.sort()
 
     return uniques
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2245,18 +2245,34 @@ def _get_reconciled_name_object(self, other):
             return self._shallow_copy(name=name)
         return self
 
-    def union(self, other, sort=True):
+    def union(self, other, sort=None):
         """
         Form the union of two Index objects.
 
         Parameters
         ----------
         other : Index or array-like
-        sort : bool, default True
-            Sort the resulting index if possible
+        sort : bool or None, default None
+            Whether to sort the resulting Index.
+
+            * None : Sort the result, except when
+
+              1. `self` and `other` are equal.
+              2. `self` or `other` has length 0.
+              3. Some values in `self` or `other` cannot be compared.
+                 A RuntimeWarning is issued in this case.
+
+            * True : sort the result. A TypeError is raised when the
+              values cannot be compared.
+            * False : do not sort the result.
 
             .. versionadded:: 0.24.0
 
+            .. versionchanged:: 0.24.1
+
+               Changed the default `sort` to None, matching the
+               behavior of pandas 0.23.4 and earlier.
+
         Returns
         -------
         union : Index
@@ -2273,10 +2289,16 @@ def union(self, other, sort=True):
         other = ensure_index(other)
 
         if len(other) == 0 or self.equals(other):
-            return self._get_reconciled_name_object(other)
+            result = self._get_reconciled_name_object(other)
+            if sort:
+                result = result.sort_values()
+            return result
 
         if len(self) == 0:
-            return other._get_reconciled_name_object(self)
+            result = other._get_reconciled_name_object(self)
+            if sort:
+                result = result.sort_values()
+            return result
 
         # TODO: is_dtype_union_equal is a hack around
         # 1. buggy set ops with duplicates (GH #13432)
@@ -2319,13 +2341,16 @@ def union(self, other, sort=True):
             else:
                 result = lvals
 
-            if sort:
+            if sort is None:
                 try:
                     result = sorting.safe_sort(result)
                 except TypeError as e:
                     warnings.warn("{}, sort order is undefined for "
                                   "incomparable objects".format(e),
                                   RuntimeWarning, stacklevel=3)
+            elif sort:
+                # raise if not sortable.
+                result = sorting.safe_sort(result)
 
         # for subclasses
         return self._wrap_setop_result(other, result)
@@ -2342,8 +2367,12 @@ def intersection(self, other, sort=False):
         Parameters
         ----------
         other : Index or array-like
-        sort : bool, default False
-            Sort the resulting index if possible
+        sort : bool or None, default False
+            Whether to sort the resulting index.
+
+            * False : do not sort the result.
+            * True : sort the result. A TypeError is raised when the
+              values cannot be compared.
 
             .. versionadded:: 0.24.0
 
@@ -2367,7 +2396,10 @@ def intersection(self, other, sort=False):
         other = ensure_index(other)
 
         if self.equals(other):
-            return self._get_reconciled_name_object(other)
+            result = self._get_reconciled_name_object(other)
+            if sort:
+                result = result.sort_values()
+            return result
 
         if not is_dtype_equal(self.dtype, other.dtype):
             this = self.astype('O')
@@ -2415,7 +2447,7 @@ def intersection(self, other, sort=False):
 
         return taken
 
-    def difference(self, other, sort=True):
+    def difference(self, other, sort=None):
         """
         Return a new Index with elements from the index that are not in
         `other`.
@@ -2425,11 +2457,24 @@ def difference(self, other, sort=True):
         Parameters
         ----------
         other : Index or array-like
-        sort : bool, default True
-            Sort the resulting index if possible
+        sort : bool or None, default None
+            Whether to sort the resulting index. By default, the
+            values are attempted to be sorted, but any TypeError from
+            incomparable elements is caught by pandas.
+
+            * None : Attempt to sort the result, but catch any TypeErrors
+              from comparing incomparable elements.
+            * False : Do not sort the result.
+            * True : Sort the result, raising a TypeError if any elements
+              cannot be compared.
 
             .. versionadded:: 0.24.0
 
+            .. versionchanged:: 0.24.1
+
+               Added the `None` option, which matches the behavior of
+               pandas 0.23.4 and earlier.
+
         Returns
         -------
         difference : Index
@@ -2460,27 +2505,42 @@ def difference(self, other, sort=True):
         label_diff = np.setdiff1d(np.arange(this.size), indexer,
                                   assume_unique=True)
         the_diff = this.values.take(label_diff)
-        if sort:
+        if sort is None:
             try:
                 the_diff = sorting.safe_sort(the_diff)
             except TypeError:
                 pass
+        elif sort:
+            the_diff = sorting.safe_sort(the_diff)
 
         return this._shallow_copy(the_diff, name=result_name, freq=None)
 
-    def symmetric_difference(self, other, result_name=None, sort=True):
+    def symmetric_difference(self, other, result_name=None, sort=None):
         """
         Compute the symmetric difference of two Index objects.
 
         Parameters
         ----------
         other : Index or array-like
         result_name : str
-        sort : bool, default True
-            Sort the resulting index if possible
+        sort : bool or None, default None
+            Whether to sort the resulting index. By default, the
+            values are attempted to be sorted, but any TypeError from
+            incomparable elements is caught by pandas.
+
+            * None : Attempt to sort the result, but catch any TypeErrors
+              from comparing incomparable elements.
+            * False : Do not sort the result.
+            * True : Sort the result, raising a TypeError if any elements
+              cannot be compared.
 
             .. versionadded:: 0.24.0
 
+            .. versionchanged:: 0.24.1
+
+               Added the `None` option, which matches the behavior of
+               pandas 0.23.4 and earlier.
+
         Returns
         -------
         symmetric_difference : Index
@@ -2524,11 +2584,13 @@ def symmetric_difference(self, other, result_name=None, sort=True):
         right_diff = other.values.take(right_indexer)
 
         the_diff = _concat._concat_compat([left_diff, right_diff])
-        if sort:
+        if sort is None:
             try:
                 the_diff = sorting.safe_sort(the_diff)
             except TypeError:
                 pass
+        elif sort:
+            the_diff = sorting.safe_sort(the_diff)
 
         attribs = self._get_attributes_dict()
         attribs['name'] = result_name

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -2879,18 +2879,34 @@ def equal_levels(self, other):
                 return False
         return True
 
-    def union(self, other, sort=True):
+    def union(self, other, sort=None):
         """
         Form the union of two MultiIndex objects
 
         Parameters
         ----------
         other : MultiIndex or array / Index of tuples
-        sort : bool, default True
-            Sort the resulting MultiIndex if possible
+        sort : bool or None, default None
+            Whether to sort the resulting Index.
+
+            * None : Sort the result, except when
+
+              1. `self` and `other` are equal.
+              2. `self` has length 0.
+              3. Some values in `self` or `other` cannot be compared.
+                 A RuntimeWarning is issued in this case.
+
+            * True : sort the result. A TypeError is raised when the
+              values cannot be compared.
+            * False : do not sort the result.
 
             .. versionadded:: 0.24.0
 
+            .. versionchanged:: 0.24.1
+
+               Changed the default `sort` to None, matching the
+               behavior of pandas 0.23.4 and earlier.
+
         Returns
         -------
         Index
@@ -2901,8 +2917,12 @@ def union(self, other, sort=True):
         other, result_names = self._convert_can_do_setop(other)
 
         if len(other) == 0 or self.equals(other):
+            if sort:
+                return self.sort_values()
             return self
 
+        # TODO: Index.union returns other when `len(self)` is 0.
+
         uniq_tuples = lib.fast_unique_multiple([self._ndarray_values,
                                                 other._ndarray_values],
                                                sort=sort)
@@ -2917,7 +2937,7 @@ def intersection(self, other, sort=False):
         Parameters
         ----------
         other : MultiIndex or array / Index of tuples
-        sort : bool, default True
+        sort : bool, default False
             Sort the resulting MultiIndex if possible
 
             .. versionadded:: 0.24.0
@@ -2934,6 +2954,8 @@ def intersection(self, other, sort=False):
         other, result_names = self._convert_can_do_setop(other)
 
         if self.equals(other):
+            if sort:
+                return self.sort_values()
             return self
 
         self_tuples = self._ndarray_values
@@ -2951,7 +2973,7 @@ def intersection(self, other, sort=False):
             return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
                                           names=result_names)
 
-    def difference(self, other, sort=True):
+    def difference(self, other, sort=None):
         """
         Compute set difference of two MultiIndex objects
 
@@ -2971,6 +2993,8 @@ def difference(self, other, sort=True):
         other, result_names = self._convert_can_do_setop(other)
 
         if len(other) == 0:
+            if sort:
+                return self.sort_values()
             return self
 
         if self.equals(other):