CLN: Index.append() refactoring (#16236)

pandas-dev · Aug 22, 2017 · 2f00159 · 2f00159
1 parent a4c4ede
commit 2f00159
Show file tree

Hide file tree

Showing 6 changed files with 63 additions and 67 deletions.
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -19,7 +19,7 @@
     _TD_DTYPE)
 from pandas.core.dtypes.generic import (
     ABCDatetimeIndex, ABCTimedeltaIndex,
-    ABCPeriodIndex)
+    ABCPeriodIndex, ABCRangeIndex)
 
 
 def get_dtype_kinds(l):
@@ -41,6 +41,8 @@ def get_dtype_kinds(l):
             typ = 'category'
         elif is_sparse(arr):
             typ = 'sparse'
+        elif isinstance(arr, ABCRangeIndex):
+            typ = 'range'
         elif is_datetimetz(arr):
             # if to_concat contains different tz,
             # the result must be object dtype
@@ -559,3 +561,47 @@ def convert_sparse(x, axis):
         # coerce to object if needed
         result = result.astype('object')
     return result
+
+
+def _concat_rangeindex_same_dtype(indexes):
+    """
+    Concatenates multiple RangeIndex instances. All members of "indexes" must
+    be of type RangeIndex; result will be RangeIndex if possible, Int64Index
+    otherwise. E.g.:
+    indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
+    indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
+    """
+
+    start = step = next = None
+
+    for obj in indexes:
+        if not len(obj):
+            continue
+
+        if start is None:
+            # This is set by the first non-empty index
+            start = obj._start
+            if step is None and len(obj) > 1:
+                step = obj._step
+        elif step is None:
+            # First non-empty index had only one element
+            if obj._start == start:
+                return _concat_index_asobject(indexes)
+            step = obj._start - start
+
+        non_consecutive = ((step != obj._step and len(obj) > 1) or
+                           (next is not None and obj._start != next))
+        if non_consecutive:
+            # Int64Index._append_same_dtype([ix.astype(int) for ix in indexes])
+            # would be preferred... but it currently resorts to
+            # _concat_index_asobject anyway.
+            return _concat_index_asobject(indexes)
+
+        if step is not None:
+            next = obj[-1] + step
+
+    if start is None:
+        start = obj._start
+        step = obj._step
+    stop = obj._stop if next is None else next
+    return indexes[0].__class__(start, stop, step)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1745,18 +1745,17 @@ def append(self, other):
         names = set([obj.name for obj in to_concat])
         name = None if len(names) > 1 else self.name
 
-        if self.is_categorical():
-            # if calling index is category, don't check dtype of others
-            from pandas.core.indexes.category import CategoricalIndex
-            return CategoricalIndex._append_same_dtype(self, to_concat, name)
+        return self._concat(to_concat, name)
+
+    def _concat(self, to_concat, name):
 
         typs = _concat.get_dtype_kinds(to_concat)
 
         if len(typs) == 1:
-            return self._append_same_dtype(to_concat, name=name)
+            return self._concat_same_dtype(to_concat, name=name)
         return _concat._concat_index_asobject(to_concat, name=name)
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         """

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
@@ -633,7 +633,11 @@ def insert(self, loc, item):
         codes = np.concatenate((codes[:loc], code, codes[loc:]))
         return self._create_from_codes(codes)
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat(self, to_concat, name):
+        # if calling index is category, don't check dtype of others
+        return CategoricalIndex._concat_same_dtype(self, to_concat, name)
+
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         ValueError if other is not in the categories

diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
@@ -837,7 +837,7 @@ def summary(self, name=None):
         result = result.replace("'", "")
         return result
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         Concatenate to_concat which has the same class
         """

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
@@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg):
             raise ValueError(error_msg)
         return other
 
-    def _append_same_dtype(self, to_concat, name):
+    def _concat_same_dtype(self, to_concat, name):
         """
         assert that we all have the same .closed
         we allow a 0-len index here as well
@@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name):
             msg = ('can only append two IntervalIndex objects '
                    'that are closed on the same side')
             raise ValueError(msg)
-        return super(IntervalIndex, self)._append_same_dtype(to_concat, name)
+        return super(IntervalIndex, self)._concat_same_dtype(to_concat, name)
 
     @Appender(_index_shared_docs['take'] % _index_doc_kwargs)
     def take(self, indices, axis=0, allow_fill=True,

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -14,6 +14,7 @@
 from pandas.compat.numpy import function as nv
 from pandas.core.indexes.base import Index, _index_shared_docs
 from pandas.util._decorators import Appender, cache_readonly
+import pandas.core.dtypes.concat as _concat
 import pandas.core.indexes.base as ibase
 
 from pandas.core.indexes.numeric import Int64Index
@@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False,
         return super(RangeIndex, self).join(other, how, level, return_indexers,
                                             sort)
 
-    def append(self, other):
-        """
-        Append a collection of Index options together
-
-        Parameters
-        ----------
-        other : Index or list/tuple of indices
-
-        Returns
-        -------
-        appended : RangeIndex if all indexes are consecutive RangeIndexes,
-                   otherwise Int64Index or Index
-        """
-
-        to_concat = [self]
-
-        if isinstance(other, (list, tuple)):
-            to_concat = to_concat + list(other)
-        else:
-            to_concat.append(other)
-
-        if not all([isinstance(i, RangeIndex) for i in to_concat]):
-            return super(RangeIndex, self).append(other)
-
-        start = step = next = None
-
-        for obj in to_concat:
-            if not len(obj):
-                continue
-
-            if start is None:
-                # This is set by the first non-empty index
-                start = obj._start
-                if step is None and len(obj) > 1:
-                    step = obj._step
-            elif step is None:
-                # First non-empty index had only one element
-                if obj._start == start:
-                    return super(RangeIndex, self).append(other)
-                step = obj._start - start
-
-            non_consecutive = ((step != obj._step and len(obj) > 1) or
-                               (next is not None and obj._start != next))
-            if non_consecutive:
-                return super(RangeIndex, self).append(other)
-
-            if step is not None:
-                next = obj[-1] + step
-
-        if start is None:
-            start = obj._start
-            step = obj._step
-        stop = obj._stop if next is None else next
-        names = set([obj.name for obj in to_concat])
-        name = None if len(names) > 1 else self.name
-        return RangeIndex(start, stop, step, name=name)
+    def _concat_same_dtype(self, indexes, name):
+        return _concat._concat_rangeindex_same_dtype(indexes).rename(name)
 
     def __len__(self):
         """