Merge pull request #7891 from jreback/index

CLN/INT: remove Index as a sub-class of NDArray
pandas-dev · Aug 7, 2014 · c7bfb4e · c7bfb4e
2 parents 83ed483 + 8d3cb3f
commit c7bfb4e
Show file tree

Hide file tree

Showing 51 changed files with 1,391 additions and 758 deletions.
diff --git a/doc/source/api.rst b/doc/source/api.rst
@@ -1104,6 +1104,8 @@ Modifying and Computations
    Index.order
    Index.reindex
    Index.repeat
+   Index.take
+   Index.putmask
    Index.set_names
    Index.unique
    Index.nunique

diff --git a/doc/source/indexing.rst b/doc/source/indexing.rst
@@ -52,6 +52,12 @@ indexing.
    should be avoided.  See :ref:`Returning a View versus Copy
    <indexing.view_versus_copy>`
 
+.. warning::
+
+   In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
+   but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This should be
+   a transparent change with only very limited API implications (See the :ref:`Internal Refactoring <whatsnew_0150.refactoring>`)
+
 See the :ref:`cookbook<cookbook.selection>` for some advanced strategies
 
 Different Choices for Indexing (``loc``, ``iloc``, and ``ix``)
@@ -2175,7 +2181,7 @@ you can specify ``inplace=True`` to have the data change in place.
 
 .. versionadded:: 0.15.0
 
-``set_names``, ``set_levels``, and ``set_labels`` also take an optional 
+``set_names``, ``set_levels``, and ``set_labels`` also take an optional
 `level`` argument
 
 .. ipython:: python

diff --git a/doc/source/v0.15.0.txt b/doc/source/v0.15.0.txt
@@ -10,6 +10,7 @@ users upgrade to this version.
 - Highlights include:
 
   - The ``Categorical`` type was integrated as a first-class pandas type, see :ref:`here <whatsnew_0150.cat>`
+  - Internal refactoring of the ``Index`` class to no longer sub-class ``ndarray``, see :ref:`Internal Refactoring <whatsnew_0150.refactoring>`
 
 - :ref:`Other Enhancements <whatsnew_0150.enhancements>`
 
@@ -25,6 +26,12 @@ users upgrade to this version.
 
 - :ref:`Bug Fixes <whatsnew_0150.bug_fixes>`
 
+.. warning::
+
+   In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
+   but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be
+   a transparent change with only very limited API implications (See the :ref:`Internal Refactoring <whatsnew_0150.refactoring>`)
+
 .. _whatsnew_0150.api:
 
 API changes
@@ -155,6 +162,18 @@ previously results in ``Exception`` or ``TypeError`` (:issue:`7812`)
      didx
      didx.tz_localize(None)
 
+.. _whatsnew_0150.refactoring:
+
+Internal Refactoring
+~~~~~~~~~~~~~~~~~~~~
+
+In 0.15.0 ``Index`` has internally been refactored to no longer sub-class ``ndarray``
+but instead subclass ``PandasObject``, similarly to the rest of the pandas objects. This change allows very easy sub-classing and creation of new index types. This should be
+a transparent change with only very limited API implications (:issue:`5080`,:issue:`7439`,:issue:`7796`)
+
+- you may need to unpickle pandas version < 0.15.0 pickles using ``pd.read_pickle`` rather than ``pickle.load``. See :ref:`pickle docs <io.pickle>`
+- when plotting with a ``PeriodIndex``. The ``matplotlib`` internal axes will now be arrays of ``Period`` rather than a ``PeriodIndex``. (this is similar to how a ``DatetimeIndex`` passess arrays of ``datetimes`` now)
+
 .. _whatsnew_0150.cat:
 
 Categoricals in Series/DataFrame
@@ -278,7 +297,7 @@ Performance
 ~~~~~~~~~~~
 
 - Performance improvements in ``DatetimeIndex.__iter__`` to allow faster iteration (:issue:`7683`)
-
+- Performance improvements in ``Period`` creation (and ``PeriodIndex`` setitem) (:issue:`5155`)
 
 
 
@@ -386,7 +405,7 @@ Bug Fixes
 - Bug in ``GroupBy.filter()`` where fast path vs. slow path made the filter
   return a non scalar value that appeared valid but wasn't (:issue:`7870`).
 - Bug in ``date_range()``/``DatetimeIndex()`` when the timezone was inferred from input dates yet incorrect
-  times were returned when crossing DST boundaries (:issue:`7835`, :issue:`7901`).  
+  times were returned when crossing DST boundaries (:issue:`7835`, :issue:`7901`).
 
 
 

diff --git a/pandas/compat/pickle_compat.py b/pandas/compat/pickle_compat.py
@@ -5,29 +5,32 @@
 import pandas
 import copy
 import pickle as pkl
-from pandas import compat
+from pandas import compat, Index
 from pandas.compat import u, string_types
-from pandas.core.series import Series, TimeSeries
-from pandas.sparse.series import SparseSeries, SparseTimeSeries
-
 
 def load_reduce(self):
     stack = self.stack
     args = stack.pop()
     func = stack[-1]
+
     if type(args[0]) is type:
         n = args[0].__name__
-        if n == u('DeprecatedSeries') or n == u('DeprecatedTimeSeries'):
-            stack[-1] = object.__new__(Series)
-            return
-        elif (n == u('DeprecatedSparseSeries') or
-              n == u('DeprecatedSparseTimeSeries')):
-            stack[-1] = object.__new__(SparseSeries)
-            return
 
     try:
-        value = func(*args)
-    except:
+        stack[-1] = func(*args)
+        return
+    except Exception as e:
+
+        # if we have a deprecated function
+        # try to replace and try again
+
+        if '_reconstruct: First argument must be a sub-type of ndarray' in str(e):
+            try:
+                cls = args[0]
+                stack[-1] = object.__new__(cls)
+                return
+            except:
+                pass
 
         # try to reencode the arguments
         if getattr(self,'encoding',None) is not None:
@@ -57,6 +60,35 @@ class Unpickler(pkl.Unpickler):
 Unpickler.dispatch = copy.copy(Unpickler.dispatch)
 Unpickler.dispatch[pkl.REDUCE[0]] = load_reduce
 
+def load_newobj(self):
+    args = self.stack.pop()
+    cls = self.stack[-1]
+
+    # compat
+    if issubclass(cls, Index):
+        obj = object.__new__(cls)
+    else:
+        obj = cls.__new__(cls, *args)
+
+    self.stack[-1] = obj
+Unpickler.dispatch[pkl.NEWOBJ[0]] = load_newobj
+
+# py3 compat
+def load_newobj_ex(self):
+    kwargs = self.stack.pop()
+    args = self.stack.pop()
+    cls = self.stack.pop()
+
+    # compat
+    if issubclass(cls, Index):
+        obj = object.__new__(cls)
+    else:
+        obj = cls.__new__(cls, *args, **kwargs)
+    self.append(obj)
+try:
+    Unpickler.dispatch[pkl.NEWOBJ_EX[0]] = load_newobj_ex
+except:
+    pass
 
 def load(fh, encoding=None, compat=False, is_verbose=False):
     """load a pickle, with a provided encoding
@@ -74,11 +106,6 @@ def load(fh, encoding=None, compat=False, is_verbose=False):
     """
 
     try:
-        if compat:
-            pandas.core.series.Series = DeprecatedSeries
-            pandas.core.series.TimeSeries = DeprecatedTimeSeries
-            pandas.sparse.series.SparseSeries = DeprecatedSparseSeries
-            pandas.sparse.series.SparseTimeSeries = DeprecatedSparseTimeSeries
         fh.seek(0)
         if encoding is not None:
             up = Unpickler(fh, encoding=encoding)
@@ -89,25 +116,3 @@ def load(fh, encoding=None, compat=False, is_verbose=False):
         return up.load()
     except:
         raise
-    finally:
-        if compat:
-            pandas.core.series.Series = Series
-            pandas.core.series.Series = TimeSeries
-            pandas.sparse.series.SparseSeries = SparseSeries
-            pandas.sparse.series.SparseTimeSeries = SparseTimeSeries
-
-
-class DeprecatedSeries(np.ndarray, Series):
-    pass
-
-
-class DeprecatedTimeSeries(DeprecatedSeries):
-    pass
-
-
-class DeprecatedSparseSeries(DeprecatedSeries):
-    pass
-
-
-class DeprecatedSparseTimeSeries(DeprecatedSparseSeries):
-    pass
diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -8,7 +8,7 @@
 from pandas.core import common as com
 import pandas.core.nanops as nanops
 import pandas.tslib as tslib
-from pandas.util.decorators import cache_readonly
+from pandas.util.decorators import Appender, cache_readonly
 
 class StringMixin(object):
 
@@ -205,6 +205,19 @@ def __unicode__(self):
                                  quote_strings=True)
         return "%s(%s, dtype='%s')" % (type(self).__name__, prepr, self.dtype)
 
+def _unbox(func):
+    @Appender(func.__doc__)
+    def f(self, *args, **kwargs):
+        result = func(self.values, *args, **kwargs)
+        from pandas.core.index import Index
+        if isinstance(result, (np.ndarray, com.ABCSeries, Index)) and result.ndim == 0:
+            # return NumPy type
+            return result.dtype.type(result.item())
+        else:  # pragma: no cover
+            return result
+    f.__name__ = func.__name__
+    return f
+
 class IndexOpsMixin(object):
     """ common ops mixin to support a unified inteface / docs for Series / Index """
 
@@ -238,6 +251,64 @@ def _wrap_access_object(self, obj):
 
         return obj
 
+    # ndarray compatibility
+    __array_priority__ = 1000
+
+    def transpose(self):
+        """ return the transpose, which is by definition self """
+        return self
+
+    T = property(transpose, doc="return the transpose, which is by definition self")
+
+    @property
+    def shape(self):
+        """ return a tuple of the shape of the underlying data """
+        return self._data.shape
+
+    @property
+    def ndim(self):
+        """ return the number of dimensions of the underlying data, by definition 1 """
+        return 1
+
+    def item(self):
+        """ return the first element of the underlying data as a python scalar """
+        return self.values.item()
+
+    @property
+    def data(self):
+        """ return the data pointer of the underlying data """
+        return self.values.data
+
+    @property
+    def itemsize(self):
+        """ return the size of the dtype of the item of the underlying data """
+        return self.values.itemsize
+
+    @property
+    def nbytes(self):
+        """ return the number of bytes in the underlying data """
+        return self.values.nbytes
+
+    @property
+    def strides(self):
+        """ return the strides of the underlying data """
+        return self.values.strides
+
+    @property
+    def size(self):
+        """ return the number of elements in the underlying data """
+        return self.values.size
+
+    @property
+    def flags(self):
+        """ return the ndarray.flags for the underlying data """
+        return self.values.flags
+
+    @property
+    def base(self):
+        """ return the base object if the memory of the underlying data is shared """
+        return self.values.base
+
     def max(self):
         """ The maximum value of the object """
         return nanops.nanmax(self.values)
@@ -340,6 +411,20 @@ def factorize(self, sort=False, na_sentinel=-1):
         from pandas.core.algorithms import factorize
         return factorize(self, sort=sort, na_sentinel=na_sentinel)
 
+    def searchsorted(self, key, side='left'):
+        """ np.ndarray searchsorted compat """
+
+        ### FIXME in GH7447
+        #### needs coercion on the key (DatetimeIndex does alreay)
+        #### needs tests/doc-string
+        return self.values.searchsorted(key, side=side)
+
+    #----------------------------------------------------------------------
+    # unbox reductions
+
+    all = _unbox(np.ndarray.all)
+    any = _unbox(np.ndarray.any)
+
 # facilitate the properties on the wrapped ops
 def _field_accessor(name, docstring=None):
     op_accessor = '_{0}'.format(name)
@@ -431,13 +516,17 @@ def asobject(self):
 
     def tolist(self):
         """
-        See ndarray.tolist
+        return a list of the underlying data
         """
         return list(self.asobject)
 
     def min(self, axis=None):
         """
-        Overridden ndarray.min to return an object
+        return the minimum value of the Index
+
+        See also
+        --------
+        numpy.ndarray.min
         """
         try:
             i8 = self.asi8
@@ -456,9 +545,30 @@ def min(self, axis=None):
         except ValueError:
             return self._na_value
 
+    def argmin(self, axis=None):
+        """
+        return a ndarray of the minimum argument indexer
+
+        See also
+        --------
+        numpy.ndarray.argmin
+        """
+
+        ##### FIXME: need some tests (what do do if all NaT?)
+        i8 = self.asi8
+        if self.hasnans:
+            mask = i8 == tslib.iNaT
+            i8 = i8.copy()
+            i8[mask] = np.iinfo('int64').max
+        return i8.argmin()
+
     def max(self, axis=None):
         """
-        Overridden ndarray.max to return an object
+        return the maximum value of the Index
+
+        See also
+        --------
+        numpy.ndarray.max
         """
         try:
             i8 = self.asi8
@@ -477,6 +587,23 @@ def max(self, axis=None):
         except ValueError:
             return self._na_value
 
+    def argmax(self, axis=None):
+        """
+        return a ndarray of the maximum argument indexer
+
+        See also
+        --------
+        numpy.ndarray.argmax
+        """
+
+        #### FIXME: need some tests (what do do if all NaT?)
+        i8 = self.asi8
+        if self.hasnans:
+            mask = i8 == tslib.iNaT
+            i8 = i8.copy()
+            i8[mask] = 0
+        return i8.argmax()
+
     @property
     def _formatter_func(self):
         """