Skip to content

Commit

Permalink
CLN: Index.append() refactoring (#16236)
Browse files Browse the repository at this point in the history
  • Loading branch information
toobaz authored and jorisvandenbossche committed Aug 22, 2017
1 parent a4c4ede commit 2f00159
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 67 deletions.
48 changes: 47 additions & 1 deletion pandas/core/dtypes/concat.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
_TD_DTYPE)
from pandas.core.dtypes.generic import (
ABCDatetimeIndex, ABCTimedeltaIndex,
ABCPeriodIndex)
ABCPeriodIndex, ABCRangeIndex)


def get_dtype_kinds(l):
Expand All @@ -41,6 +41,8 @@ def get_dtype_kinds(l):
typ = 'category'
elif is_sparse(arr):
typ = 'sparse'
elif isinstance(arr, ABCRangeIndex):
typ = 'range'
elif is_datetimetz(arr):
# if to_concat contains different tz,
# the result must be object dtype
Expand Down Expand Up @@ -559,3 +561,47 @@ def convert_sparse(x, axis):
# coerce to object if needed
result = result.astype('object')
return result


def _concat_rangeindex_same_dtype(indexes):
"""
Concatenates multiple RangeIndex instances. All members of "indexes" must
be of type RangeIndex; result will be RangeIndex if possible, Int64Index
otherwise. E.g.:
indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)
indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Int64Index([0,1,2,4,5])
"""

start = step = next = None

for obj in indexes:
if not len(obj):
continue

if start is None:
# This is set by the first non-empty index
start = obj._start
if step is None and len(obj) > 1:
step = obj._step
elif step is None:
# First non-empty index had only one element
if obj._start == start:
return _concat_index_asobject(indexes)
step = obj._start - start

non_consecutive = ((step != obj._step and len(obj) > 1) or
(next is not None and obj._start != next))
if non_consecutive:
# Int64Index._append_same_dtype([ix.astype(int) for ix in indexes])
# would be preferred... but it currently resorts to
# _concat_index_asobject anyway.
return _concat_index_asobject(indexes)

if step is not None:
next = obj[-1] + step

if start is None:
start = obj._start
step = obj._step
stop = obj._stop if next is None else next
return indexes[0].__class__(start, stop, step)
11 changes: 5 additions & 6 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,18 +1745,17 @@ def append(self, other):
names = set([obj.name for obj in to_concat])
name = None if len(names) > 1 else self.name

if self.is_categorical():
# if calling index is category, don't check dtype of others
from pandas.core.indexes.category import CategoricalIndex
return CategoricalIndex._append_same_dtype(self, to_concat, name)
return self._concat(to_concat, name)

def _concat(self, to_concat, name):

typs = _concat.get_dtype_kinds(to_concat)

if len(typs) == 1:
return self._append_same_dtype(to_concat, name=name)
return self._concat_same_dtype(to_concat, name=name)
return _concat._concat_index_asobject(to_concat, name=name)

def _append_same_dtype(self, to_concat, name):
def _concat_same_dtype(self, to_concat, name):
"""
Concatenate to_concat which has the same class
"""
Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/category.py
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,11 @@ def insert(self, loc, item):
codes = np.concatenate((codes[:loc], code, codes[loc:]))
return self._create_from_codes(codes)

def _append_same_dtype(self, to_concat, name):
def _concat(self, to_concat, name):
# if calling index is category, don't check dtype of others
return CategoricalIndex._concat_same_dtype(self, to_concat, name)

def _concat_same_dtype(self, to_concat, name):
"""
Concatenate to_concat which has the same class
ValueError if other is not in the categories
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ def summary(self, name=None):
result = result.replace("'", "")
return result

def _append_same_dtype(self, to_concat, name):
def _concat_same_dtype(self, to_concat, name):
"""
Concatenate to_concat which has the same class
"""
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,7 +876,7 @@ def _as_like_interval_index(self, other, error_msg):
raise ValueError(error_msg)
return other

def _append_same_dtype(self, to_concat, name):
def _concat_same_dtype(self, to_concat, name):
"""
assert that we all have the same .closed
we allow a 0-len index here as well
Expand All @@ -885,7 +885,7 @@ def _append_same_dtype(self, to_concat, name):
msg = ('can only append two IntervalIndex objects '
'that are closed on the same side')
raise ValueError(msg)
return super(IntervalIndex, self)._append_same_dtype(to_concat, name)
return super(IntervalIndex, self)._concat_same_dtype(to_concat, name)

@Appender(_index_shared_docs['take'] % _index_doc_kwargs)
def take(self, indices, axis=0, allow_fill=True,
Expand Down
59 changes: 3 additions & 56 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from pandas.compat.numpy import function as nv
from pandas.core.indexes.base import Index, _index_shared_docs
from pandas.util._decorators import Appender, cache_readonly
import pandas.core.dtypes.concat as _concat
import pandas.core.indexes.base as ibase

from pandas.core.indexes.numeric import Int64Index
Expand Down Expand Up @@ -447,62 +448,8 @@ def join(self, other, how='left', level=None, return_indexers=False,
return super(RangeIndex, self).join(other, how, level, return_indexers,
sort)

def append(self, other):
"""
Append a collection of Index options together
Parameters
----------
other : Index or list/tuple of indices
Returns
-------
appended : RangeIndex if all indexes are consecutive RangeIndexes,
otherwise Int64Index or Index
"""

to_concat = [self]

if isinstance(other, (list, tuple)):
to_concat = to_concat + list(other)
else:
to_concat.append(other)

if not all([isinstance(i, RangeIndex) for i in to_concat]):
return super(RangeIndex, self).append(other)

start = step = next = None

for obj in to_concat:
if not len(obj):
continue

if start is None:
# This is set by the first non-empty index
start = obj._start
if step is None and len(obj) > 1:
step = obj._step
elif step is None:
# First non-empty index had only one element
if obj._start == start:
return super(RangeIndex, self).append(other)
step = obj._start - start

non_consecutive = ((step != obj._step and len(obj) > 1) or
(next is not None and obj._start != next))
if non_consecutive:
return super(RangeIndex, self).append(other)

if step is not None:
next = obj[-1] + step

if start is None:
start = obj._start
step = obj._step
stop = obj._stop if next is None else next
names = set([obj.name for obj in to_concat])
name = None if len(names) > 1 else self.name
return RangeIndex(start, stop, step, name=name)
def _concat_same_dtype(self, indexes, name):
return _concat._concat_rangeindex_same_dtype(indexes).rename(name)

def __len__(self):
"""
Expand Down

0 comments on commit 2f00159

Please sign in to comment.