pandas-dev · jreback · Sep 11, 2014 · Sep 9, 2014 · immerrr · Sep 11, 2014
diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -1616,28 +1616,33 @@ display:
    df
    df['A']
 
+.. _indexing.setops:
 
 Set operations on Index objects
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+.. warning::
+
+   In 0.15.0. the set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain
+   index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``.
+
 .. _indexing.set_ops:
 
-The three main operations are ``union (|)``, ``intersection (&)``, and ``diff
-(-)``. These can be directly called as instance methods or used via overloaded
-operators:
+The two main operations are ``union (|)``, ``intersection (&)``
+These can be directly called as instance methods or used via overloaded
+operators. Difference is provided via the ``.difference()`` method.
 
 .. ipython:: python
 
    a = Index(['c', 'b', 'a'])
    b = Index(['c', 'e', 'd'])
-   a.union(b)
    a | b
    a & b
-   a - b
+   a.difference(b)
 
 Also available is the ``sym_diff (^)`` operation, which returns elements
 that appear in either ``idx1`` or ``idx2`` but not both. This is
-equivalent to the Index created by ``(idx1 - idx2) + (idx2 - idx1)``,
+equivalent to the Index created by ``(idx1.difference(idx2)).union(idx2.difference(idx1))``,
 with duplicates dropped.
 
 .. ipython:: python

diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -19,6 +19,7 @@ users upgrade to this version.
   - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring <whatsnew_0150.refactoring>`
   - New datetimelike properties accessor ``.dt`` for Series, see :ref:`Datetimelike Properties <whatsnew_0150.dt>`
   - dropping support for ``PyTables`` less than version 3.0.0, and ``numexpr`` less than version 2.1 (:issue:`7990`)
+  - API change in using Indexs set operations, see :ref:`here <whatsnew_0150.index_set_ops>`
 
 - :ref:`Other Enhancements <whatsnew_0150.enhancements>`
 
@@ -343,6 +344,11 @@ API changes
 - ``Series.to_csv()`` now returns a string when ``path=None``, matching the behaviour of
     ``DataFrame.to_csv()`` (:issue:`8215`).
 
+
+.. _whatsnew_0150.index_set_ops:
+
+- The Index set operations ``+`` and ``-`` were deprecated in order to provide these for numeric type operations on certain index types. ``+`` can be replace by ``.union()`` or ``|``, and ``-`` by ``.difference()``. Further the method name ``Index.diff()`` is deprecated and can be replaced by ``Index.difference()``
+
 .. _whatsnew_0150.dt:
 
 .dt accessor

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -711,8 +711,10 @@ def __add__(self, other):
         from pandas.core.index import Index
         from pandas.tseries.offsets import DateOffset
         if isinstance(other, Index):
+            warnings.warn("using '+' to provide set union with Indexes is deprecated, "
+                          "use .union()",FutureWarning)
             return self.union(other)
-        elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
+        if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
             return self._add_delta(other)
         elif com.is_integer(other):
             return self.shift(other)
@@ -723,8 +725,10 @@ def __sub__(self, other):
         from pandas.core.index import Index
         from pandas.tseries.offsets import DateOffset
         if isinstance(other, Index):
-            return self.diff(other)
-        elif isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
+            warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
+                          "use .difference()",FutureWarning)
+            return self.difference(other)
+        if isinstance(other, (DateOffset, datetime.timedelta, np.timedelta64)):
             return self._add_delta(-other)
         elif com.is_integer(other):
             return self.shift(-other)

diff --git a/pandas/core/categorical.py b/pandas/core/categorical.py
@@ -435,7 +435,7 @@ def reorder_levels(self, new_levels, ordered=None):
         """
         new_levels = self._validate_levels(new_levels)
 
-        if len(new_levels) < len(self._levels) or len(self._levels-new_levels):
+        if len(new_levels) < len(self._levels) or len(self._levels.difference(new_levels)):
             raise ValueError('Reordered levels must include all original levels')
         values = self.__array__()
         self._codes = _get_codes_for_values(values, new_levels)
@@ -887,7 +887,7 @@ def __setitem__(self, key, value):
                 raise ValueError("cannot set a Categorical with another, without identical levels")
 
         rvalue = value if com.is_list_like(value) else [value]
-        to_add = Index(rvalue)-self.levels
+        to_add = Index(rvalue).difference(self.levels)
         # no assignments of values not in levels, but it's always ok to set something to np.nan
         if len(to_add) and not isnull(to_add).all():
             raise ValueError("cannot setitem on a Categorical with a new level,"

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -3682,7 +3682,7 @@ def append(self, other, ignore_index=False, verify_integrity=False):
                                 'ignore_index=True')
 
             index = None if other.name is None else [other.name]
-            combined_columns = self.columns.tolist() + ((self.columns | other.index) - self.columns).tolist()
+            combined_columns = self.columns.tolist() + (self.columns | other.index).difference(self.columns).tolist()
             other = other.reindex(combined_columns, copy=False)
             other = DataFrame(other.values.reshape((1, len(other))),
                               index=index, columns=combined_columns).convert_objects()

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -473,7 +473,7 @@ def _set_selection_from_grouper(self):
             ax = self.obj._info_axis
             groupers = [ g.name for g in grp.groupings if g.level is None and g.name is not None and g.name in ax ]
             if len(groupers):
-                self._group_selection = (ax-Index(groupers)).tolist()
+                self._group_selection = ax.difference(Index(groupers)).tolist()
 
     def _set_result_index_ordered(self, result):
         # set the result index on the passed values object

diff --git a/pandas/core/index.py b/pandas/core/index.py
@@ -1128,9 +1128,10 @@ def argsort(self, *args, **kwargs):
 
     def __add__(self, other):
         if isinstance(other, Index):
+            warnings.warn("using '+' to provide set union with Indexes is deprecated, "
+                          "use '|' or .union()",FutureWarning)
             return self.union(other)
-        else:
-            return Index(np.array(self) + other)
+        return Index(np.array(self) + other)
 
     __iadd__ = __add__
     __eq__ = _indexOp('__eq__')
@@ -1141,7 +1142,10 @@ def __add__(self, other):
     __ge__ = _indexOp('__ge__')
 
     def __sub__(self, other):
-        return self.diff(other)
+        if isinstance(other, Index):
+            warnings.warn("using '-' to provide set differences with Indexes is deprecated, "
+                          "use .difference()",FutureWarning)
+        return self.difference(other)
 
     def __and__(self, other):
         return self.intersection(other)
@@ -1273,7 +1277,7 @@ def intersection(self, other):
             taken.name = None
         return taken
 
-    def diff(self, other):
+    def difference(self, other):
         """
         Compute sorted set difference of two Index objects
 
@@ -1289,8 +1293,7 @@ def diff(self, other):
         -----
         One can do either of these and achieve the same result
 
-        >>> index - index2
-        >>> index.diff(index2)
+        >>> index.difference(index2)
         """
 
         if not hasattr(other, '__iter__'):
@@ -1308,6 +1311,8 @@ def diff(self, other):
         theDiff = sorted(set(self) - set(other))
         return Index(theDiff, name=result_name)
 
+    diff = deprecate('diff',difference)
+
     def sym_diff(self, other, result_name=None):
         """
         Compute the sorted symmetric difference of two Index objects.
@@ -1350,7 +1355,7 @@ def sym_diff(self, other, result_name=None):
             other = Index(other)
             result_name = result_name or self.name
 
-        the_diff = sorted(set((self - other) + (other - self)))
+        the_diff = sorted(set((self.difference(other)).union(other.difference(self))))
         return Index(the_diff, name=result_name)
 
     def get_loc(self, key):
@@ -4135,6 +4140,8 @@ def union(self, other):
         Returns
         -------
         Index
+
+        >>> index.union(index2)
         """
         self._assert_can_do_setop(other)
 
@@ -4177,7 +4184,7 @@ def intersection(self, other):
             return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
                                           names=result_names)
 
-    def diff(self, other):
+    def difference(self, other):
         """
         Compute sorted set difference of two MultiIndex objects
 

diff --git a/pandas/core/panel.py b/pandas/core/panel.py
@@ -678,9 +678,9 @@ def _combine_frame(self, other, func, axis=0):
                                  self.minor_axis)
 
     def _combine_panel(self, other, func):
-        items = self.items + other.items
-        major = self.major_axis + other.major_axis
-        minor = self.minor_axis + other.minor_axis
+        items = self.items.union(other.items)
+        major = self.major_axis.union(other.major_axis)
+        minor = self.minor_axis.union(other.minor_axis)
 
         # could check that everything's the same size, but forget it
         this = self.reindex(items=items, major=major, minor=minor)

diff --git a/pandas/core/panelnd.py b/pandas/core/panelnd.py
@@ -82,7 +82,7 @@ def _combine_with_constructor(self, other, func):
         # combine labels to form new axes
         new_axes = []
         for a in self._AXIS_ORDERS:
-            new_axes.append(getattr(self, a) + getattr(other, a))
+            new_axes.append(getattr(self, a).union(getattr(other, a)))
 
         # reindex: could check that everything's the same size, but forget it
         d = dict([(a, ax) for a, ax in zip(self._AXIS_ORDERS, new_axes)])

diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -848,7 +848,7 @@ def lreshape(data, groups, dropna=True, label=None):
         keys, values = zip(*groups)
 
     all_cols = list(set.union(*[set(x) for x in values]))
-    id_cols = list(data.columns.diff(all_cols))
+    id_cols = list(data.columns.difference(all_cols))
 
     K = len(values[0])
 

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -1458,7 +1458,7 @@ def combine(self, other, func, fill_value=nan):
         result : Series
         """
         if isinstance(other, Series):
-            new_index = self.index + other.index
+            new_index = self.index.union(other.index)
             new_name = _maybe_match_name(self, other)
             new_values = pa.empty(len(new_index), dtype=self.dtype)
             for i, idx in enumerate(new_index):
@@ -1484,7 +1484,7 @@ def combine_first(self, other):
         -------
         y : Series
         """
-        new_index = self.index + other.index
+        new_index = self.index.union(other.index)
         this = self.reindex(new_index, copy=False)
         other = other.reindex(new_index, copy=False)
         name = _maybe_match_name(self, other)

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -970,7 +970,7 @@ def append_to_multiple(self, d, value, selector, data_columns=None,
                 remain_values.extend(v)
         if remain_key is not None:
             ordered = value.axes[axis]
-            ordd = ordered - Index(remain_values)
+            ordd = ordered.difference(Index(remain_values))
             ordd = sorted(ordered.get_indexer(ordd))
             d[remain_key] = ordered.take(ordd)
 
@@ -3245,7 +3245,7 @@ def get_blk_items(mgr, blocks):
                 data_columns, min_itemsize)
             if len(data_columns):
                 mgr = block_obj.reindex_axis(
-                    Index(axis_labels) - Index(data_columns),
+                    Index(axis_labels).difference(Index(data_columns)),
                     axis=axis
                 )._data
 
@@ -3362,7 +3362,7 @@ def process_filter(field, filt):
                             # if we have a multi-index, then need to include
                             # the levels
                             if self.is_multi_index:
-                                filt = filt + Index(self.levels)
+                                filt = filt.union(Index(self.levels))
 
                             takers = op(axis_values, filt)
                             return obj.ix._getitem_axis(takers,

diff --git a/pandas/io/tests/test_pytables.py b/pandas/io/tests/test_pytables.py
@@ -2320,7 +2320,7 @@ def test_remove_startstop(self):
             n = store.remove('wp5', start=16, stop=-16)
             self.assertTrue(n == 120-32)
             result = store.select('wp5')
-            expected = wp.reindex(major_axis=wp.major_axis[:16//4]+wp.major_axis[-16//4:])
+            expected = wp.reindex(major_axis=wp.major_axis[:16//4].union(wp.major_axis[-16//4:]))
             assert_panel_equal(result, expected)
 
             _maybe_remove(store, 'wp6')
@@ -2339,7 +2339,7 @@ def test_remove_startstop(self):
             n = store.remove('wp7', where=[crit], stop=80)
             self.assertTrue(n == 28)
             result = store.select('wp7')
-            expected = wp.reindex(major_axis=wp.major_axis-wp.major_axis[np.arange(0,20,3)])
+            expected = wp.reindex(major_axis=wp.major_axis.difference(wp.major_axis[np.arange(0,20,3)]))
             assert_panel_equal(result, expected)
 
     def test_remove_crit(self):
@@ -2357,7 +2357,7 @@ def test_remove_crit(self):
             self.assertTrue(n == 36)
 
             result = store.select('wp3')
-            expected = wp.reindex(major_axis=wp.major_axis - date4)
+            expected = wp.reindex(major_axis=wp.major_axis.difference(date4))
             assert_panel_equal(result, expected)
 
             # upper half
@@ -2385,23 +2385,23 @@ def test_remove_crit(self):
             crit1 = Term('major_axis=date1')
             store.remove('wp2', where=[crit1])
             result = store.select('wp2')
-            expected = wp.reindex(major_axis=wp.major_axis - date1)
+            expected = wp.reindex(major_axis=wp.major_axis.difference(date1))
             assert_panel_equal(result, expected)
 
             date2 = wp.major_axis[5]
             crit2 = Term('major_axis=date2')
             store.remove('wp2', where=[crit2])
             result = store['wp2']
             expected = wp.reindex(
-                major_axis=wp.major_axis - date1 - Index([date2]))
+                major_axis=wp.major_axis.difference(date1).difference(Index([date2])))
             assert_panel_equal(result, expected)
 
             date3 = [wp.major_axis[7], wp.major_axis[9]]
             crit3 = Term('major_axis=date3')
             store.remove('wp2', where=[crit3])
             result = store['wp2']
             expected = wp.reindex(
-                major_axis=wp.major_axis - date1 - Index([date2]) - Index(date3))
+                major_axis=wp.major_axis.difference(date1).difference(Index([date2])).difference(Index(date3)))
             assert_panel_equal(result, expected)
 
             # corners

diff --git a/pandas/sparse/panel.py b/pandas/sparse/panel.py
@@ -427,9 +427,9 @@ def _new_like(self, new_frames):
                            default_kind=self.default_kind)
 
     def _combinePanel(self, other, func):
-        items = self.items + other.items
-        major = self.major_axis + other.major_axis
-        minor = self.minor_axis + other.minor_axis
+        items = self.items.union(other.items)
+        major = self.major_axis.union(other.major_axis)
+        minor = self.minor_axis.union(other.minor_axis)
 
         # could check that everything's the same size, but forget it