From 8cc872f24f5ec6e1f286b6d2afe47be0526ec74d Mon Sep 17 00:00:00 2001 From: Pietro Battiston Date: Tue, 22 Aug 2017 10:11:10 +0200 Subject: [PATCH] CLN: Index.append() refactoring (#16236) --- pandas/core/dtypes/concat.py | 48 ++++++++++++++++++++++- pandas/core/indexes/base.py | 11 +++--- pandas/core/indexes/category.py | 6 ++- pandas/core/indexes/datetimelike.py | 2 +- pandas/core/indexes/interval.py | 4 +- pandas/core/indexes/range.py | 59 ++--------------------------- 6 files changed, 63 insertions(+), 67 deletions(-) diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py index 292d5f608d4cb2..0ce45eea119ed2 100644 --- a/pandas/core/dtypes/concat.py +++ b/pandas/core/dtypes/concat.py @@ -19,7 +19,7 @@ _TD_DTYPE) from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCTimedeltaIndex, - ABCPeriodIndex) + ABCPeriodIndex, ABCRangeIndex) def get_dtype_kinds(l): @@ -41,6 +41,8 @@ def get_dtype_kinds(l): typ = 'category' elif is_sparse(arr): typ = 'sparse' + elif isinstance(arr, ABCRangeIndex): + typ = 'range' elif is_datetimetz(arr): # if to_concat contains different tz, # the result must be object dtype @@ -559,3 +561,47 @@ def convert_sparse(x, axis): # coerce to object if needed result = result.astype('object') return result + + +def _concat_rangeindex_same_dtype(indexes): + """ + Concatenates multiple RangeIndex instances. All members of "indexes" must + be of type RangeIndex; result will be RangeIndex if possible, Int64Index + otherwise. E.g.: + indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6) + indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5]) + """ + + start = step = next = None + + for obj in indexes: + if not len(obj): + continue + + if start is None: + # This is set by the first non-empty index + start = obj._start + if step is None and len(obj) > 1: + step = obj._step + elif step is None: + # First non-empty index had only one element + if obj._start == start: + return _concat_index_asobject(indexes) + step = obj._start - start + + non_consecutive = ((step != obj._step and len(obj) > 1) or + (next is not None and obj._start != next)) + if non_consecutive: + # Int64Index._append_same_dtype([ix.astype(int) for ix in indexes]) + # would be preferred... but it currently resorts to + # _concat_index_asobject anyway. + return _concat_index_asobject(indexes) + + if step is not None: + next = obj[-1] + step + + if start is None: + start = obj._start + step = obj._step + stop = obj._stop if next is None else next + return indexes[0].__class__(start, stop, step) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index de6221987a59aa..a21e6df3ffc93d 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -1745,18 +1745,17 @@ def append(self, other): names = set([obj.name for obj in to_concat]) name = None if len(names) > 1 else self.name - if self.is_categorical(): - # if calling index is category, don't check dtype of others - from pandas.core.indexes.category import CategoricalIndex - return CategoricalIndex._append_same_dtype(self, to_concat, name) + return self._concat(to_concat, name) + + def _concat(self, to_concat, name): typs = _concat.get_dtype_kinds(to_concat) if len(typs) == 1: - return self._append_same_dtype(to_concat, name=name) + return self._concat_same_dtype(to_concat, name=name) return _concat._concat_index_asobject(to_concat, name=name) - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index ac4698b570d172..f22407308e0944 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -633,7 +633,11 @@ def insert(self, loc, item): codes = np.concatenate((codes[:loc], code, codes[loc:])) return self._create_from_codes(codes) - def _append_same_dtype(self, to_concat, name): + def _concat(self, to_concat, name): + # if calling index is category, don't check dtype of others + return CategoricalIndex._concat_same_dtype(self, to_concat, name) + + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class ValueError if other is not in the categories diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 845c71b6c41d8b..c3232627fce74c 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -837,7 +837,7 @@ def summary(self, name=None): result = result.replace("'", "") return result - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ Concatenate to_concat which has the same class """ diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index e90378184e3f3e..e0ed6c7ea35c0c 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg): raise ValueError(error_msg) return other - def _append_same_dtype(self, to_concat, name): + def _concat_same_dtype(self, to_concat, name): """ assert that we all have the same .closed we allow a 0-len index here as well @@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name): msg = ('can only append two IntervalIndex objects ' 'that are closed on the same side') raise ValueError(msg) - return super(IntervalIndex, self)._append_same_dtype(to_concat, name) + return super(IntervalIndex, self)._concat_same_dtype(to_concat, name) @Appender(_index_shared_docs['take'] % _index_doc_kwargs) def take(self, indices, axis=0, allow_fill=True, diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index ac4cc6986cace2..82412d3a7ef57a 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -14,6 +14,7 @@ from pandas.compat.numpy import function as nv from pandas.core.indexes.base import Index, _index_shared_docs from pandas.util._decorators import Appender, cache_readonly +import pandas.core.dtypes.concat as _concat import pandas.core.indexes.base as ibase from pandas.core.indexes.numeric import Int64Index @@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False, return super(RangeIndex, self).join(other, how, level, return_indexers, sort) - def append(self, other): - """ - Append a collection of Index options together - - Parameters - ---------- - other : Index or list/tuple of indices - - Returns - ------- - appended : RangeIndex if all indexes are consecutive RangeIndexes, - otherwise Int64Index or Index - """ - - to_concat = [self] - - if isinstance(other, (list, tuple)): - to_concat = to_concat + list(other) - else: - to_concat.append(other) - - if not all([isinstance(i, RangeIndex) for i in to_concat]): - return super(RangeIndex, self).append(other) - - start = step = next = None - - for obj in to_concat: - if not len(obj): - continue - - if start is None: - # This is set by the first non-empty index - start = obj._start - if step is None and len(obj) > 1: - step = obj._step - elif step is None: - # First non-empty index had only one element - if obj._start == start: - return super(RangeIndex, self).append(other) - step = obj._start - start - - non_consecutive = ((step != obj._step and len(obj) > 1) or - (next is not None and obj._start != next)) - if non_consecutive: - return super(RangeIndex, self).append(other) - - if step is not None: - next = obj[-1] + step - - if start is None: - start = obj._start - step = obj._step - stop = obj._stop if next is None else next - names = set([obj.name for obj in to_concat]) - name = None if len(names) > 1 else self.name - return RangeIndex(start, stop, step, name=name) + def _concat_same_dtype(self, indexes, name): + return _concat._concat_rangeindex_same_dtype(indexes).rename(name) def __len__(self): """