From 41f09d899c4eaa726f0f0f7ffbc55d924a5dcab7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sat, 3 Feb 2018 14:13:57 -0600
Subject: [PATCH 01/36] REF/Clean: Internal / External values

---
 doc/source/internals.rst                      | 15 +++++
 pandas/core/base.py                           | 48 +++++++++++---
 pandas/core/dtypes/concat.py                  | 15 +++--
 pandas/core/indexes/base.py                   | 65 ++++++++++++-------
 pandas/core/indexes/category.py               | 25 +++++--
 pandas/core/indexes/datetimelike.py           |  2 +-
 pandas/core/indexes/datetimes.py              |  9 +++
 pandas/core/indexes/multi.py                  | 38 ++++++-----
 pandas/core/indexes/numeric.py                |  2 +-
 pandas/core/indexes/period.py                 | 42 +++++++-----
 pandas/core/series.py                         |  4 +-
 pandas/io/formats/format.py                   |  2 +-
 pandas/io/pytables.py                         |  2 +-
 pandas/plotting/_converter.py                 |  6 +-
 pandas/tests/indexes/common.py                |  6 +-
 .../tests/indexes/period/test_construction.py |  4 +-
 pandas/tests/indexes/period/test_period.py    |  6 +-
 pandas/tests/indexes/period/test_tools.py     |  2 +-
 pandas/tests/test_base.py                     | 65 ++++++++++++++++++-
 19 files changed, 265 insertions(+), 93 deletions(-)

diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index ee4df879d9478..29aaed318b802 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -89,6 +89,21 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the
 constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
 if you compute the levels and labels yourself, please be careful.
 
+Values
+~~~~~~
+
+Pandas extends NumPy's type system in a few places, so we have multiple notions of "values" floating around.
+For 1-D containers (``Index`` classes and ``Series``) we have the following convention:
+
+* ``cls._ndarray_values`` is *always* and ``ndarray``
+* ``cls._values`` refers is the "best possible" array. This could be an ``ndarray``, ``ExtensionArray``, or
+  in ``Index`` subclass (note: we're in the process of removing the index subclasses here so that it's
+  always an ``ndarray`` or ``ExtensionArray``).
+
+So, for example, ``Series[category]._values`` is a ``Categorical``, while ``Series[category]._ndarray_values`` is
+the underlying ndarray.
+
+
 .. _ref-subclassing-pandas:
 
 Subclassing pandas Data Structures
diff --git a/pandas/core/base.py b/pandas/core/base.py
index d5b204dba063e..52b1f82e8824d 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -7,7 +7,8 @@
 import numpy as np
 
 from pandas.core.dtypes.missing import isna
-from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
+from pandas.core.dtypes.generic import (
+    ABCDataFrame, ABCSeries, ABCIndexClass, ABCDatetimeIndex)
 from pandas.core.dtypes.common import (
     is_object_dtype,
     is_list_like,
@@ -706,7 +707,7 @@ def transpose(self, *args, **kwargs):
     @property
     def shape(self):
         """ return a tuple of the shape of the underlying data """
-        return self._values.shape
+        return self._ndarray_values.shape
 
     @property
     def ndim(self):
@@ -734,22 +735,22 @@ def data(self):
     @property
     def itemsize(self):
         """ return the size of the dtype of the item of the underlying data """
-        return self._values.itemsize
+        return self._ndarray_values.itemsize
 
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self._values.nbytes
+        return self._ndarray_values.nbytes
 
     @property
     def strides(self):
         """ return the strides of the underlying data """
-        return self._values.strides
+        return self._ndarray_values.strides
 
     @property
     def size(self):
         """ return the number of elements in the underlying data """
-        return self._values.size
+        return self._ndarray_values.size
 
     @property
     def flags(self):
@@ -763,9 +764,34 @@ def base(self):
         """
         return self.values.base
 
+    @property
+    def _ndarray_values(self):
+        """The data as an ndarray. See '_values' for more."""
+        # type: () -> np.ndarray
+        return self.values
+
     @property
     def _values(self):
-        """ the internal implementation """
+        # type: () -> Union[ExtensionArray, Index]
+        # TODO: remove index types as they become is extension arrays
+        """ The best array representation.
+
+        This is an ndarray, ExtensionArray, or Index subclass. This differs
+        from '._ndarray_values', which always returns an ndarray. It may differ
+        from the public '.values'
+
+        index             | values          | _values
+        ----------------- | -------------- -| ----------
+        CategoricalIndex  | Categorical     | Categorical
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
+        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
+        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+
+        See Also
+        --------
+        values
+        _ndarray_values
+        """
         return self.values
 
     @property
@@ -816,7 +842,7 @@ def tolist(self):
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
         else:
-            return self._values.tolist()
+            return self._ndarray_values.tolist()
 
     def __iter__(self):
         """
@@ -973,8 +999,12 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
     def unique(self):
         values = self._values
-
+        if isinstance(values, ABCDatetimeIndex):
+            values = values._ndarray_values
+        # TODO: Make unique part of the ExtensionArray interface.
+        # else, this could be surprising.
         if hasattr(values, 'unique'):
+
             result = values.unique()
         else:
             from pandas.core.algorithms import unique1d
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index ddecbe85087d8..a49a2680e4daa 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -480,7 +480,7 @@ def _concat_datetimetz(to_concat, name=None):
 
 def _concat_index_same_dtype(indexes, klass=None):
     klass = klass if klass is not None else indexes[0].__class__
-    return klass(np.concatenate([x._values for x in indexes]))
+    return klass(np.concatenate([x._ndarray_values for x in indexes]))
 
 
 def _concat_index_asobject(to_concat, name=None):
@@ -498,9 +498,16 @@ def _concat_index_asobject(to_concat, name=None):
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    to_concat = [x._values if isinstance(x, Index) else x
-                 for x in to_concat]
-    return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
+    arrays = []
+    for x in to_concat:
+        if is_categorical_dtype(x):
+            arrays.append(np.asarray(x, dtype=object))
+        elif isinstance(x, Index):
+            arrays.append(x._values)
+        else:
+            arrays.append(x)
+
+    return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
 
 
 def _concat_sparse(to_concat, axis=0, typs=None):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 1e1bb0d49b3df..450e0f47ef6ff 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -392,7 +392,7 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
                 values = np.array(values, copy=False)
                 if is_object_dtype(values):
                     values = cls(values, name=name, dtype=dtype,
-                                 **kwargs)._values
+                                 **kwargs)._ndarray_values
 
         result = object.__new__(cls)
         result._data = values
@@ -644,7 +644,7 @@ def ravel(self, order='C'):
         --------
         numpy.ndarray.ravel
         """
-        return self._values.ravel(order=order)
+        return self._ndarray_values.ravel(order=order)
 
     # construction helpers
     @classmethod
@@ -1577,7 +1577,7 @@ def _constructor(self):
     @cache_readonly
     def _engine(self):
         # property, for now, slow to look up
-        return self._engine_type(lambda: self._values, len(self))
+        return self._engine_type(lambda: self._ndarray_values, len(self))
 
     def _validate_index_level(self, level):
         """
@@ -2208,27 +2208,37 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
+        if is_categorical_dtype(self):
+            lvals = self.values
+        else:
+            lvals = self._ndarray_values
+
+        if is_categorical_dtype(other):
+            rvals = other.values
+        else:
+            rvals = other._ndarray_values
+
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._outer_indexer(self._values, other._values)[0]
+                result = self._outer_indexer(lvals, rvals)[0]
             except TypeError:
                 # incomparable objects
-                result = list(self._values)
+                result = list(lvals)
 
                 # worth making this faster? a very unusual case
-                value_set = set(self._values)
-                result.extend([x for x in other._values if x not in value_set])
+                value_set = set(lvals)
+                result.extend([x for x in rvals if x not in value_set])
         else:
             indexer = self.get_indexer(other)
             indexer, = (indexer == -1).nonzero()
 
             if len(indexer) > 0:
-                other_diff = algos.take_nd(other._values, indexer,
+                other_diff = algos.take_nd(rvals, indexer,
                                            allow_fill=False)
-                result = _concat._concat_compat((self._values, other_diff))
+                result = _concat._concat_compat((lvals, other_diff))
 
                 try:
-                    self._values[0] < other_diff[0]
+                    lvals[0] < other_diff[0]
                 except TypeError as e:
                     warnings.warn("%s, sort order is undefined for "
                                   "incomparable objects" % e, RuntimeWarning,
@@ -2240,7 +2250,7 @@ def union(self, other):
                         result.sort()
 
             else:
-                result = self._values
+                result = lvals
 
                 try:
                     result = np.sort(result)
@@ -2293,18 +2303,21 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._values, other._values)[0]
+                result = self._inner_indexer(self._ndarray_values,
+                                             other._ndarray_values)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
 
         try:
-            indexer = Index(other._values).get_indexer(self._values)
+            indexer = Index(other._ndarray_values).get_indexer(
+                self._ndarray_values)
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except Exception:
             # duplicates
             indexer = algos.unique1d(
-                Index(other._values).get_indexer_non_unique(self._values)[0])
+                Index(other._ndarray_values).get_indexer_non_unique(
+                    self._ndarray_values)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
@@ -2680,7 +2693,7 @@ def get_indexer(self, target, method=None, limit=None, tolerance=None):
                 raise ValueError('limit argument only valid if doing pad, '
                                  'backfill or nearest reindexing')
 
-            indexer = self._engine.get_indexer(target._values)
+            indexer = self._engine.get_indexer(target._ndarray_values)
 
         return _ensure_platform_int(indexer)
 
@@ -2696,12 +2709,13 @@ def _get_fill_indexer(self, target, method, limit=None, tolerance=None):
         if self.is_monotonic_increasing and target.is_monotonic_increasing:
             method = (self._engine.get_pad_indexer if method == 'pad' else
                       self._engine.get_backfill_indexer)
-            indexer = method(target._values, limit)
+            indexer = method(target._ndarray_values, limit)
         else:
             indexer = self._get_fill_indexer_searchsorted(target, method,
                                                           limit)
         if tolerance is not None:
-            indexer = self._filter_indexer_tolerance(target._values, indexer,
+            indexer = self._filter_indexer_tolerance(target._ndarray_values,
+                                                     indexer,
                                                      tolerance)
         return indexer
 
@@ -2792,7 +2806,7 @@ def get_indexer_non_unique(self, target):
             self = Index(self.asi8)
             tgt_values = target.asi8
         else:
-            tgt_values = target._values
+            tgt_values = target._ndarray_values
 
         indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
         return _ensure_platform_int(indexer), missing
@@ -3227,16 +3241,17 @@ def _join_multi(self, other, how, return_indexers=True):
     def _join_non_unique(self, other, how='left', return_indexers=False):
         from pandas.core.reshape.merge import _get_join_indexers
 
-        left_idx, right_idx = _get_join_indexers([self._values],
-                                                 [other._values], how=how,
+        left_idx, right_idx = _get_join_indexers([self._ndarray_values],
+                                                 [other._ndarray_values],
+                                                 how=how,
                                                  sort=True)
 
         left_idx = _ensure_platform_int(left_idx)
         right_idx = _ensure_platform_int(right_idx)
 
-        join_index = np.asarray(self._values.take(left_idx))
+        join_index = np.asarray(self._ndarray_values.take(left_idx))
         mask = left_idx == -1
-        np.putmask(join_index, mask, other._values.take(right_idx))
+        np.putmask(join_index, mask, other._ndarray_values.take(right_idx))
 
         join_index = self._wrap_joined_index(join_index, other)
 
@@ -3383,8 +3398,8 @@ def _join_monotonic(self, other, how='left', return_indexers=False):
             else:
                 return ret_index
 
-        sv = self._values
-        ov = other._values
+        sv = self._ndarray_values
+        ov = other._ndarray_values
 
         if self.is_unique and other.is_unique:
             # We can perform much better than the general case
@@ -3736,7 +3751,7 @@ def insert(self, loc, item):
             item = self._na_value
 
         _self = np.asarray(self)
-        item = self._coerce_scalar_to_index(item)._values
+        item = self._coerce_scalar_to_index(item)._ndarray_values
         idx = np.concatenate((_self[:loc], item, _self[loc:]))
         return self._shallow_copy_with_infer(idx)
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 2c7be2b21f959..5b01f7d2cbe95 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -227,7 +227,7 @@ def _is_dtype_compat(self, other):
         """
         if is_categorical_dtype(other):
             if isinstance(other, CategoricalIndex):
-                other = other._values
+                other = other.values
             if not other.is_dtype_equal(self):
                 raise TypeError("categories must match existing categories "
                                 "when appending")
@@ -293,6 +293,23 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
+    @property
+    def _values(self):
+        return self._data
+
+    @property
+    def _ndarray_values(self):
+        return self._data.codes
+
+    @property
+    def itemsize(self):
+        return self.values.itemsize
+
+    @property
+    def nbytes(self):
+        """ return the number of bytes in the underlying data """
+        return self.values.nbytes
+
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
@@ -386,8 +403,8 @@ def is_monotonic_decreasing(self):
     def unique(self, level=None):
         if level is not None:
             self._validate_index_level(level)
-        result = base.IndexOpsMixin.unique(self)
-        # CategoricalIndex._shallow_copy uses keeps original categories
+        result = self.values.unique()
+        # CategoricalIndex._shallow_copy keeps original categories
         # and ordered if not otherwise specified
         return self._shallow_copy(result, categories=result.categories,
                                   ordered=result.ordered)
@@ -762,7 +779,7 @@ def _evaluate_compare(self, other):
 
     def _delegate_method(self, name, *args, **kwargs):
         """ method delegation to the ._values """
-        method = getattr(self._values, name)
+        method = getattr(self.values, name)
         if 'inplace' in kwargs:
             raise ValueError("cannot use inplace with CategoricalIndex")
         res = method(*args, **kwargs)
diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py
index 8e77c7a7fa48c..94500a58edd4c 100644
--- a/pandas/core/indexes/datetimelike.py
+++ b/pandas/core/indexes/datetimelike.py
@@ -389,7 +389,7 @@ def sort_values(self, return_indexer=False, ascending=True):
             sorted_index = self.take(_as)
             return sorted_index, _as
         else:
-            sorted_values = np.sort(self._values)
+            sorted_values = np.sort(self._ndarray_values)
             attribs = self._get_attributes_dict()
             freq = attribs['freq']
 
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index e09fa87477122..c32d7ce930a7c 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -678,6 +678,15 @@ def _assert_tzawareness_compat(self, other):
             raise TypeError('Cannot compare tz-naive and tz-aware '
                             'datetime-like objects')
 
+    @property
+    def _values(self):
+        # tz-naive -> ndarray
+        # tz-aware -> DatetimeIndex
+        if self.tz is not None:
+            return self
+        else:
+            return self.values
+
     @property
     def tzinfo(self):
         """
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 510f7245cebd8..1478012aa9dbe 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -799,9 +799,11 @@ def values(self):
             box = hasattr(lev, '_box_values')
             # Try to minimize boxing.
             if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._values, lab))
+                taken = lev._box_values(algos.take_1d(lev._values,
+                                                      lab))
             elif box:
-                taken = algos.take_1d(lev._box_values(lev._values), lab,
+                taken = algos.take_1d(lev._box_values(lev._ndarray_values),
+                                      lab,
                                       fill_value=_get_na_value(lev.dtype.type))
             else:
                 taken = algos.take_1d(np.asarray(lev._values), lab)
@@ -1317,7 +1319,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._values
+                tuples = tuples._ndarray_values
 
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
@@ -2410,7 +2412,7 @@ def convert_indexer(start, stop, step, indexer=indexer, labels=labels):
                 mapper = Series(indexer)
                 indexer = labels.take(_ensure_platform_int(indexer))
                 result = Series(Index(indexer).isin(r).nonzero()[0])
-                m = result.map(mapper)._values
+                m = result.map(mapper)._ndarray_values
 
             else:
                 m = np.zeros(len(labels), dtype=bool)
@@ -2569,7 +2571,7 @@ def _update_indexer(idxr, indexer=indexer):
                 else:
                     from .numeric import Int64Index
                     # no matches we are done
-                    return Int64Index([])._values
+                    return Int64Index([])._ndarray_values
 
             elif com.is_null_slice(k):
                 # empty slice
@@ -2589,8 +2591,8 @@ def _update_indexer(idxr, indexer=indexer):
 
         # empty indexer
         if indexer is None:
-            return Int64Index([])._values
-        return indexer._values
+            return Int64Index([])._ndarray_values
+        return indexer._ndarray_values
 
     def truncate(self, before=None, after=None):
         """
@@ -2639,7 +2641,7 @@ def equals(self, other):
 
         if not isinstance(other, MultiIndex):
             other_vals = com._values_from_object(_ensure_index(other))
-            return array_equivalent(self._values, other_vals)
+            return array_equivalent(self._ndarray_values, other_vals)
 
         if self.nlevels != other.nlevels:
             return False
@@ -2650,13 +2652,15 @@ def equals(self, other):
         for i in range(self.nlevels):
             slabels = self.labels[i]
             slabels = slabels[slabels != -1]
-            svalues = algos.take_nd(np.asarray(self.levels[i]._values),
-                                    slabels, allow_fill=False)
+            svalues = algos.take_nd(
+                np.asarray(self.levels[i]._values),
+                slabels, allow_fill=False)
 
             olabels = other.labels[i]
             olabels = olabels[olabels != -1]
-            ovalues = algos.take_nd(np.asarray(other.levels[i]._values),
-                                    olabels, allow_fill=False)
+            ovalues = algos.take_nd(
+                np.asarray(other.levels[i]._values),
+                olabels, allow_fill=False)
 
             # since we use NaT both datetime64 and timedelta64
             # we can have a situation where a level is typed say
@@ -2704,7 +2708,8 @@ def union(self, other):
         if len(other) == 0 or self.equals(other):
             return self
 
-        uniq_tuples = lib.fast_unique_multiple([self._values, other._values])
+        uniq_tuples = lib.fast_unique_multiple([self._ndarray_values,
+                                                other._ndarray_values])
         return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0,
                                       names=result_names)
 
@@ -2726,8 +2731,8 @@ def intersection(self, other):
         if self.equals(other):
             return self
 
-        self_tuples = self._values
-        other_tuples = other._values
+        self_tuples = self._ndarray_values
+        other_tuples = other._ndarray_values
         uniq_tuples = sorted(set(self_tuples) & set(other_tuples))
         if len(uniq_tuples) == 0:
             return MultiIndex(levels=[[]] * self.nlevels,
@@ -2756,7 +2761,8 @@ def difference(self, other):
                               labels=[[]] * self.nlevels,
                               names=result_names, verify_integrity=False)
 
-        difference = sorted(set(self._values) - set(other._values))
+        difference = sorted(set(self._ndarray_values) -
+                            set(other._ndarray_values))
 
         if len(difference) == 0:
             return MultiIndex(levels=[[]] * self.nlevels,
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
index b02aee0495d8c..a4558116bfa63 100644
--- a/pandas/core/indexes/numeric.py
+++ b/pandas/core/indexes/numeric.py
@@ -378,7 +378,7 @@ def equals(self, other):
             if (not is_dtype_equal(self.dtype, other.dtype) or
                     self.shape != other.shape):
                 return False
-            left, right = self._values, other._values
+            left, right = self._ndarray_values, other._ndarray_values
             return ((left == right) | (self._isnan & other._isnan)).all()
         except (TypeError, ValueError):
             return False
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index 1f8542ed5ee60..c8b7d6063e378 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -54,7 +54,7 @@
 def _field_accessor(name, alias, docstring=None):
     def f(self):
         base, mult = _gfc(self.freq)
-        result = get_period_field_arr(alias, self._values, base)
+        result = get_period_field_arr(alias, self._ndarray_values, base)
         return Index(result, name=self.name)
     f.__name__ = name
     f.__doc__ = docstring
@@ -82,7 +82,7 @@ def _period_index_cmp(opname, cls, nat_result=False):
 
     def wrapper(self, other):
         if isinstance(other, Period):
-            func = getattr(self._values, opname)
+            func = getattr(self._ndarray_values, opname)
             other_base, _ = _gfc(other.freq)
             if other.freq != self.freq:
                 msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
@@ -94,7 +94,8 @@ def wrapper(self, other):
                 msg = _DIFFERENT_FREQ_INDEX.format(self.freqstr, other.freqstr)
                 raise IncompatibleFrequency(msg)
 
-            result = getattr(self._values, opname)(other._values)
+            op = getattr(self._ndarray_values, opname)
+            result = op(other._ndarray_values)
 
             mask = self._isnan | other._isnan
             if mask.any():
@@ -102,11 +103,11 @@ def wrapper(self, other):
 
             return result
         elif other is tslib.NaT:
-            result = np.empty(len(self._values), dtype=bool)
+            result = np.empty(len(self._ndarray_values), dtype=bool)
             result.fill(nat_result)
         else:
             other = Period(other, freq=self.freq)
-            func = getattr(self._values, opname)
+            func = getattr(self._ndarray_values, opname)
             result = func(other.ordinal)
 
         if self.hasnans:
@@ -275,11 +276,11 @@ def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
         if isinstance(data, PeriodIndex):
             if freq is None or freq == data.freq:  # no freq change
                 freq = data.freq
-                data = data._values
+                data = data._ndarray_values
             else:
                 base1, _ = _gfc(data.freq)
                 base2, _ = _gfc(freq)
-                data = period.period_asfreq_arr(data._values,
+                data = period.period_asfreq_arr(data._ndarray_values,
                                                 base1, base2, 1)
             return cls._simple_new(data, name=name, freq=freq)
 
@@ -374,7 +375,7 @@ def _shallow_copy(self, values=None, freq=None, **kwargs):
         if freq is None:
             freq = self.freq
         if values is None:
-            values = self._values
+            values = self._ndarray_values
         return super(PeriodIndex, self)._shallow_copy(values=values,
                                                       freq=freq, **kwargs)
 
@@ -407,7 +408,7 @@ def __contains__(self, key):
 
     @property
     def asi8(self):
-        return self._values.view('i8')
+        return self._ndarray_values.view('i8')
 
     @cache_readonly
     def _int64index(self):
@@ -419,6 +420,12 @@ def values(self):
 
     @property
     def _values(self):
+        # TODO: return PeriodArray
+        return self.values
+
+    @property
+    def _ndarray_values(self):
+        # Ordinals
         return self._data
 
     def __array__(self, dtype=None):
@@ -489,13 +496,15 @@ def asof_locs(self, where, mask):
         if isinstance(where_idx, DatetimeIndex):
             where_idx = PeriodIndex(where_idx.values, freq=self.freq)
 
-        locs = self._values[mask].searchsorted(where_idx._values, side='right')
+        locs = self._ndarray_values[mask].searchsorted(
+            where_idx._ndarray_values, side='right')
 
         locs = np.where(locs > 0, locs - 1, 0)
         result = np.arange(len(self))[mask].take(locs)
 
         first = mask.argmax()
-        result[(locs == 0) & (where_idx._values < self._values[first])] = -1
+        result[(locs == 0) & (where_idx._ndarray_values <
+                              self._ndarray_values[first])] = -1
 
         return result
 
@@ -523,7 +532,8 @@ def searchsorted(self, value, side='left', sorter=None):
         elif isinstance(value, compat.string_types):
             value = Period(value, freq=self.freq).ordinal
 
-        return self._values.searchsorted(value, side=side, sorter=sorter)
+        return self._ndarray_values.searchsorted(value, side=side,
+                                                 sorter=sorter)
 
     @property
     def is_all_dates(self):
@@ -664,7 +674,7 @@ def to_timestamp(self, freq=None, how='start'):
         base, mult = _gfc(freq)
         new_data = self.asfreq(freq, how)
 
-        new_data = period.periodarr_to_dt64arr(new_data._values, base)
+        new_data = period.periodarr_to_dt64arr(new_data._ndarray_values, base)
         return DatetimeIndex(new_data, freq='infer', name=self.name)
 
     def _maybe_convert_timedelta(self, other):
@@ -744,7 +754,7 @@ def shift(self, n):
         -------
         shifted : PeriodIndex
         """
-        values = self._values + n * self.freq.n
+        values = self._ndarray_values + n * self.freq.n
         if self.hasnans:
             values[self._isnan] = tslib.iNaT
         return self._shallow_copy(values=values)
@@ -775,7 +785,7 @@ def get_value(self, series, key):
                 grp = resolution.Resolution.get_freq_group(reso)
                 freqn = resolution.get_freq_group(self.freq)
 
-                vals = self._values
+                vals = self._ndarray_values
 
                 # if our data is higher resolution than requested key, slice
                 if grp < freqn:
@@ -786,7 +796,7 @@ def get_value(self, series, key):
                     if ord2 < vals[0] or ord1 > vals[-1]:
                         raise KeyError(key)
 
-                    pos = np.searchsorted(self._values, [ord1, ord2])
+                    pos = np.searchsorted(self._ndarray_values, [ord1, ord2])
                     key = slice(pos[0], pos[1] + 1)
                     return series[key]
                 elif grp == freqn:
diff --git a/pandas/core/series.py b/pandas/core/series.py
index e4b8979d6393a..b0ad76d12f1d9 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1303,7 +1303,9 @@ def unique(self):
             # to return an object array of tz-aware Timestamps
 
             # TODO: it must return DatetimeArray with tz in pandas 2.0
-            result = result.astype(object).values
+            # XXX: This surely will have issues around DST boundaries.
+            result = (DatetimeIndex(result, tz='UTC').tz_convert(self.dtype.tz)
+                      .astype(object).values)
 
         return result
 
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 269c81b380b5e..bbeb9e162452d 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1884,7 +1884,7 @@ def _format(x):
 
         vals = self.values
         if isinstance(vals, Index):
-            vals = vals._values
+            vals = vals._ndarray_values
         elif isinstance(vals, ABCSparseArray):
             vals = vals.values
 
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 0d833807602e1..2437b7d396e84 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -4430,7 +4430,7 @@ def _convert_index(index, encoding=None, format_type=None):
     elif isinstance(index, (Int64Index, PeriodIndex)):
         atom = _tables().Int64Col()
         # avoid to store ndarray of Period objects
-        return IndexCol(index._values, 'integer', atom,
+        return IndexCol(index._ndarray_values, 'integer', atom,
                         freq=getattr(index, 'freq', None),
                         index_name=index_name)
 
diff --git a/pandas/plotting/_converter.py b/pandas/plotting/_converter.py
index 07163615c6ba4..9ca06475290e4 100644
--- a/pandas/plotting/_converter.py
+++ b/pandas/plotting/_converter.py
@@ -249,11 +249,11 @@ def _convert_1d(values, units, axis):
                 is_float(values)):
             return get_datevalue(values, axis.freq)
         if isinstance(values, PeriodIndex):
-            return values.asfreq(axis.freq)._values
+            return values.asfreq(axis.freq)._ndarray_values
         if isinstance(values, Index):
             return values.map(lambda x: get_datevalue(x, axis.freq))
         if is_period_arraylike(values):
-            return PeriodIndex(values, freq=axis.freq)._values
+            return PeriodIndex(values, freq=axis.freq)._ndarray_values
         if isinstance(values, (list, tuple, np.ndarray, Index)):
             return [get_datevalue(x, axis.freq) for x in values]
         return values
@@ -642,7 +642,7 @@ def _daily_finder(vmin, vmax, freq):
     info = np.zeros(span,
                     dtype=[('val', np.int64), ('maj', bool),
                            ('min', bool), ('fmt', '|S20')])
-    info['val'][:] = dates_._values
+    info['val'][:] = dates_._ndarray_values
     info['fmt'][:] = ''
     info['maj'][[0, -1]] = True
     # .. and set some shortcuts
diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
index 8948c5f79900d..2d8d70aa2ac84 100644
--- a/pandas/tests/indexes/common.py
+++ b/pandas/tests/indexes/common.py
@@ -314,7 +314,8 @@ def test_ensure_copied_data(self):
                 # .values an object array of Period, thus copied
                 result = index_type(ordinal=index.asi8, copy=False,
                                     **init_kwargs)
-                tm.assert_numpy_array_equal(index._values, result._values,
+                tm.assert_numpy_array_equal(index._ndarray_values,
+                                            result._ndarray_values,
                                             check_same='same')
             elif isinstance(index, IntervalIndex):
                 # checked in test_interval.py
@@ -323,7 +324,8 @@ def test_ensure_copied_data(self):
                 result = index_type(index.values, copy=False, **init_kwargs)
                 tm.assert_numpy_array_equal(index.values, result.values,
                                             check_same='same')
-                tm.assert_numpy_array_equal(index._values, result._values,
+                tm.assert_numpy_array_equal(index._ndarray_values,
+                                            result._ndarray_values,
                                             check_same='same')
 
     def test_copy_and_deepcopy(self, indices):
diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py
index 639a9272c3808..eca80d17b1dc3 100644
--- a/pandas/tests/indexes/period/test_construction.py
+++ b/pandas/tests/indexes/period/test_construction.py
@@ -119,8 +119,8 @@ def test_constructor_fromarraylike(self):
         tm.assert_index_equal(PeriodIndex(idx.values), idx)
         tm.assert_index_equal(PeriodIndex(list(idx.values)), idx)
 
-        pytest.raises(ValueError, PeriodIndex, idx._values)
-        pytest.raises(ValueError, PeriodIndex, list(idx._values))
+        pytest.raises(ValueError, PeriodIndex, idx._ndarray_values)
+        pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values))
         pytest.raises(TypeError, PeriodIndex,
                       data=Period('2007', freq='A'))
 
diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py
index 6fc7fa5486f82..e3b1256fa0584 100644
--- a/pandas/tests/indexes/period/test_period.py
+++ b/pandas/tests/indexes/period/test_period.py
@@ -205,7 +205,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
         idx = pd.PeriodIndex(['2011-01', pd.NaT], freq='M')
 
@@ -213,7 +213,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([492, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
         idx = pd.PeriodIndex(['2011-01-01', pd.NaT], freq='D')
 
@@ -222,7 +222,7 @@ def test_values(self):
         tm.assert_numpy_array_equal(idx.values, exp)
         tm.assert_numpy_array_equal(idx.get_values(), exp)
         exp = np.array([14975, -9223372036854775808], dtype=np.int64)
-        tm.assert_numpy_array_equal(idx._values, exp)
+        tm.assert_numpy_array_equal(idx._ndarray_values, exp)
 
     def test_period_index_length(self):
         pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009')
diff --git a/pandas/tests/indexes/period/test_tools.py b/pandas/tests/indexes/period/test_tools.py
index 0e72cadb5d494..f5a62371ae799 100644
--- a/pandas/tests/indexes/period/test_tools.py
+++ b/pandas/tests/indexes/period/test_tools.py
@@ -22,7 +22,7 @@ class TestPeriodRepresentation(object):
     def _check_freq(self, freq, base_date):
         rng = PeriodIndex(start=base_date, periods=10, freq=freq)
         exp = np.arange(10, dtype=np.int64)
-        tm.assert_numpy_array_equal(rng._values, exp)
+
         tm.assert_numpy_array_equal(rng.asi8, exp)
 
     def test_annual(self):
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index df2547fc7b0da..5a67aa3f989ae 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -338,8 +338,9 @@ def test_ops(self):
                 if not isinstance(o, PeriodIndex):
                     expected = getattr(o.values, op)()
                 else:
-                    expected = pd.Period(ordinal=getattr(o._values, op)(),
-                                         freq=o.freq)
+                    expected = pd.Period(
+                        ordinal=getattr(o._ndarray_values, op)(),
+                        freq=o.freq)
                 try:
                     assert result == expected
                 except TypeError:
@@ -450,7 +451,7 @@ def test_value_counts_unique_nunique_null(self):
             for orig in self.objs:
                 o = orig.copy()
                 klass = type(o)
-                values = o._values
+                values = o._ndarray_values
 
                 if not self._allow_na_ops(o):
                     continue
@@ -1175,3 +1176,61 @@ def test_iter_box(self):
             assert isinstance(res, pd.Period)
             assert res.freq == 'M'
             assert res == exp
+
+
+@pytest.mark.parametrize('arr, expected', [
+    (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
+    (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
+     pd.DatetimeIndex(['2017'], tz='US/Eastern')),
+])
+def test_unique_datetime_index(arr, expected):
+    result = arr.unique()
+
+    if isinstance(expected, np.ndarray):
+        tm.assert_numpy_array_equal(result, expected)
+    if isinstance(expected, pd.Series):
+        tm.assert_series_equal(result, expected)
+    if isinstance(expected, pd.DatetimeIndex):
+        tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize('arr, expected', [
+    (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
+     np.array(['2017'], dtype='M8[ns]')),
+    (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
+     np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
+])
+def test_unique_datetime_series(arr, expected):
+    result = arr.unique()
+
+    if isinstance(expected, np.ndarray):
+        tm.assert_numpy_array_equal(result, expected)
+    if isinstance(expected, pd.Series):
+        tm.assert_series_equal(result, expected)
+    if isinstance(expected, pd.DatetimeIndex):
+        tm.assert_index_equal(result, expected)
+
+
+@pytest.mark.parametrize('array, expected_type', [
+    (np.array([0, 1]), np.ndarray),
+    (np.array(['a', 'b']), np.ndarray),
+    (pd.Categorical(['a', 'b']), pd.Categorical),
+    (pd.DatetimeIndex(['2017', '2018']), np.ndarray),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray),
+    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex),
+])
+def test_values_consistent(array, expected_type):
+    l_values = pd.Series(array)._values
+    r_values = pd.Index(array)._values
+    assert type(l_values) is expected_type
+    assert type(l_values) is type(r_values)
+
+    if isinstance(l_values, np.ndarray):
+        tm.assert_numpy_array_equal(l_values, r_values)
+    elif isinstance(l_values, pd.Index):
+        tm.assert_index_equal(l_values, r_values)
+    elif pd.api.types.is_categorical(l_values):
+        tm.assert_categorical_equal(l_values, r_values)
+    else:
+        raise TypeError("Unexpected type {}".format(type(l_values)))

From 29cfd7c22dd0b5b67c44144f1520f0bce8bf0e74 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 6 Feb 2018 14:34:22 -0600
Subject: [PATCH 02/36] Move to index base

---
 pandas/core/base.py         | 24 ------------------------
 pandas/core/indexes/base.py | 24 ++++++++++++++++++++++++
 2 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 52b1f82e8824d..ab4c969810c93 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -770,30 +770,6 @@ def _ndarray_values(self):
         # type: () -> np.ndarray
         return self.values
 
-    @property
-    def _values(self):
-        # type: () -> Union[ExtensionArray, Index]
-        # TODO: remove index types as they become is extension arrays
-        """ The best array representation.
-
-        This is an ndarray, ExtensionArray, or Index subclass. This differs
-        from '._ndarray_values', which always returns an ndarray. It may differ
-        from the public '.values'
-
-        index             | values          | _values
-        ----------------- | -------------- -| ----------
-        CategoricalIndex  | Categorical     | Categorical
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
-        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
-        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
-
-        See Also
-        --------
-        values
-        _ndarray_values
-        """
-        return self.values
-
     @property
     def empty(self):
         return not self.size
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 450e0f47ef6ff..d84c4dcb58f83 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -574,6 +574,30 @@ def values(self):
         """ return the underlying data as an ndarray """
         return self._data.view(np.ndarray)
 
+    @property
+    def _values(self):
+        # type: () -> Union[ExtensionArray, Index]
+        # TODO: remove index types as they become is extension arrays
+        """The best array representation.
+
+        This is an ndarray, ExtensionArray, or Index subclass. This differs
+        from '._ndarray_values', which always returns an ndarray. It may differ
+        from the public '.values'
+
+        index             | values          | _values
+        ----------------- | -------------- -| ----------
+        CategoricalIndex  | Categorical     | Categorical
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
+        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
+        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+
+        See Also
+        --------
+        values
+        _ndarray_values
+        """
+        return self.values
+
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self.values

From 3185f4e08fdde6736a02edb52da2647cae8d599c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 13:17:40 -0600
Subject: [PATCH 03/36] Cleanup unique handling

---
 pandas/core/base.py              |  4 +---
 pandas/core/indexes/datetimes.py | 12 ++++++++++++
 pandas/core/series.py            |  4 +---
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index ab4c969810c93..7a8b5f9b608c7 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -975,10 +975,8 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     @Appender(_shared_docs['unique'] % _indexops_doc_kwargs)
     def unique(self):
         values = self._values
-        if isinstance(values, ABCDatetimeIndex):
-            values = values._ndarray_values
+
         # TODO: Make unique part of the ExtensionArray interface.
-        # else, this could be surprising.
         if hasattr(values, 'unique'):
 
             result = values.unique()
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index c32d7ce930a7c..d749f8aec50cd 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1095,6 +1095,18 @@ def snap(self, freq='S'):
         # we know it conforms; skip check
         return DatetimeIndex(snapped, freq=freq, verify_integrity=False)
 
+    def unique(self, level=None):
+        # Override here since IndexOpsMixin.unique uses self._values.unique
+        # For DatetimeIndex with TZ, that's a DatetimeIndex -> recursion error
+        # So we extract the tz-naive DatetimeIndex, unique that, and wrap the
+        # result with out TZ.
+        if self.tz is not None:
+            naive = type(self)(self._ndarray_values, copy=False)
+        else:
+            naive = self
+        result = super(DatetimeIndex, naive).unique(level=level)
+        return self._simple_new(result, name=self.name, tz=self.tz, freq=self.freq)
+
     def union(self, other):
         """
         Specialized union for DatetimeIndex objects. If combine
diff --git a/pandas/core/series.py b/pandas/core/series.py
index b0ad76d12f1d9..e4b8979d6393a 100644
--- a/pandas/core/series.py
+++ b/pandas/core/series.py
@@ -1303,9 +1303,7 @@ def unique(self):
             # to return an object array of tz-aware Timestamps
 
             # TODO: it must return DatetimeArray with tz in pandas 2.0
-            # XXX: This surely will have issues around DST boundaries.
-            result = (DatetimeIndex(result, tz='UTC').tz_convert(self.dtype.tz)
-                      .astype(object).values)
+            result = result.astype(object).values
 
         return result
 

From 476f75d3b8cf07fb9965a1fa96dcdf932a01bde8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 14:29:02 -0600
Subject: [PATCH 04/36] Simplify object concat

---
 pandas/core/dtypes/concat.py | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index a49a2680e4daa..d6b55d03ebccd 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -493,20 +493,11 @@ def _concat_index_asobject(to_concat, name=None):
     to_concat = [x.astype(object) if isinstance(x, klasses) else x
                  for x in to_concat]
 
-    from pandas import Index
     self = to_concat[0]
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    arrays = []
-    for x in to_concat:
-        if is_categorical_dtype(x):
-            arrays.append(np.asarray(x, dtype=object))
-        elif isinstance(x, Index):
-            arrays.append(x._values)
-        else:
-            arrays.append(x)
-
+    arrays = [np.array(x, copy=False, dtype=object) for x in to_concat]
     return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
 
 

From b15ee5a000003e42bf65389308c7277b6461fd05 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 14:38:58 -0600
Subject: [PATCH 05/36] Use values for intersection

I think eventually we'll want to ndarray_values for this, but it'll
require a bit more work to support. Currently, using ndarary_values
causes occasional failures on categorical.
---
 pandas/core/indexes/base.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index dd4c8ac2e86a3..70c0c822fb5e8 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2347,8 +2347,7 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._ndarray_values,
-                                             other._ndarray_values)[0]
+                result = self._inner_indexer(self._values, other._values)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass

From 659073f8a67e513267048d467da715c60d885c51 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Wed, 7 Feb 2018 15:14:32 -0600
Subject: [PATCH 06/36] hmm

---
 pandas/core/indexes/base.py     | 22 +++++++++++++++++++++-
 pandas/core/indexes/category.py | 17 +++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 70c0c822fb5e8..260016661a735 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -2310,6 +2310,24 @@ def _wrap_union_result(self, other, result):
         name = self.name if self.name == other.name else None
         return self.__class__(result, name=name)
 
+    def _ensure_join(self, values):
+        """Ensure that the 'values' are ready for our join indexer.
+
+        The default join indexers are object, so this just returns 'values'.
+        This is called before calling those.
+
+
+        Parameters
+        ----------
+        values : array-like
+
+        Returns
+        -------
+        values : ndarray
+            Expected to have the correct type for self.inner_indexer
+        """
+        return values
+
     def intersection(self, other):
         """
         Form the intersection of two Index objects.
@@ -2347,7 +2365,9 @@ def intersection(self, other):
 
         if self.is_monotonic and other.is_monotonic:
             try:
-                result = self._inner_indexer(self._values, other._values)[0]
+                lvals = self._ensure_join(self._ndarray_values)
+                rvals = self._ensure_join(other._ndarray_values)
+                result = self._inner_indexer(lvals, rvals)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 5b01f7d2cbe95..48cdd28911487 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -1,5 +1,6 @@
 import numpy as np
 from pandas._libs import index as libindex
+from pandas._libs import join as libjoin
 
 from pandas import compat
 from pandas.compat.numpy import function as nv
@@ -8,6 +9,8 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     _ensure_platform_int,
+    _ensure_int32,
+    _ensure_int64,
     is_list_like,
     is_interval_dtype,
     is_scalar)
@@ -214,6 +217,14 @@ def _shallow_copy(self, values=None, categories=None, ordered=None,
             values=values, categories=categories,
             ordered=ordered, **kwargs)
 
+    @cache_readonly
+    def _inner_indexer(self):
+        if self.codes.dtype.itemsize <= 4:
+            # int8, int16, int32
+            return libjoin.inner_join_indexer_int32
+        else:
+            return libjoin.inner_join_indexer_int64
+
     def _is_dtype_compat(self, other):
         """
         *this is an internal non-public method*
@@ -787,6 +798,12 @@ def _delegate_method(self, name, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
+    def _ensure_join(self, values):
+        if self.codes.dtype.itemsize <= 4:
+            return _ensure_int32(values)
+        else:
+            return _ensure_int64(values)
+
     @classmethod
     def _add_accessors(cls):
         """ add in Categorical accessor methods """

From 9b8d2a51857a4d8c78ce09c6e54097ab9eddbb08 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 08:54:19 -0600
Subject: [PATCH 07/36] Additional testing

---
 pandas/tests/test_base.py | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 5a67aa3f989ae..0dbced114ce51 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1211,16 +1211,17 @@ def test_unique_datetime_series(arr, expected):
         tm.assert_index_equal(result, expected)
 
 
-@pytest.mark.parametrize('array, expected_type', [
-    (np.array([0, 1]), np.ndarray),
-    (np.array(['a', 'b']), np.ndarray),
-    (pd.Categorical(['a', 'b']), pd.Categorical),
-    (pd.DatetimeIndex(['2017', '2018']), np.ndarray),
-    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray),
-    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray),
-    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex),
+@pytest.mark.parametrize('array, expected_type, dtype', [
+    (np.array([0, 1]), np.ndarray, 'int64'),
+    (np.array(['a', 'b']), np.ndarray, 'object'),
+    (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
+    (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
+    (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex,
+     'datetime64[ns, US/Central]'),
 ])
-def test_values_consistent(array, expected_type):
+def test_values_consistent(array, expected_type, dtype):
     l_values = pd.Series(array)._values
     r_values = pd.Index(array)._values
     assert type(l_values) is expected_type
@@ -1234,3 +1235,13 @@ def test_values_consistent(array, expected_type):
         tm.assert_categorical_equal(l_values, r_values)
     else:
         raise TypeError("Unexpected type {}".format(type(l_values)))
+
+    assert l_values.dtype == dtype
+    assert r_values.dtype == dtype
+
+
+def test_values_periodindex():
+    arr = pd.period_range("2017", periods=4, freq='D')
+    result = arr._values
+    expected = np.array(arr.astype(object))
+    tm.assert_numpy_array_equal(result, expected)

From 9fbac2959dc34f64133b44fa8274189abcc07655 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 13:55:34 -0600
Subject: [PATCH 08/36] More tests

---
 pandas/tests/test_base.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 0dbced114ce51..94449663b580b 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1216,10 +1216,11 @@ def test_unique_datetime_series(arr, expected):
     (np.array(['a', 'b']), np.ndarray, 'object'),
     (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
     (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
-    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
-    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
     (pd.DatetimeIndex(['2017', '2018'], tz="US/Central"), pd.DatetimeIndex,
      'datetime64[ns, US/Central]'),
+    (pd.TimedeltaIndex([10**10]), np.ndarray, 'm8[ns]'),
+    (pd.PeriodIndex([2018, 2019], freq='A'), np.ndarray, 'object'),
+    (pd.IntervalIndex.from_breaks([0, 1, 2]), np.ndarray, 'object'),
 ])
 def test_values_consistent(array, expected_type, dtype):
     l_values = pd.Series(array)._values
@@ -1245,3 +1246,24 @@ def test_values_periodindex():
     result = arr._values
     expected = np.array(arr.astype(object))
     tm.assert_numpy_array_equal(result, expected)
+
+
+@pytest.mark.parametrize('array, expected', [
+    (np.array([0, 1]), np.array([0, 1])),
+    (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
+    (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
+    (pd.DatetimeIndex(['2017-01-01T00:00:00']),
+     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
+    (pd.DatetimeIndex(['2017-01-01T00:00:00'], tz="US/Eastern"),
+     np.array(['2017-01-01T05:00:00'], dtype='M8[ns]')),
+    (pd.TimedeltaIndex([10**10]), np.array([10**10], dtype='m8[ns]')),
+    pytest.mark.xfail(reason='PeriodArray not implemented')((
+        pd.PeriodIndex(['2017', '2018'], freq='D'),
+        np.array([17167, 17532]),
+    )),
+])
+def test_ndarray_values(array, expected):
+    l_values = pd.Series(array)._ndarray_values
+    r_values = pd.Index(array)._ndarray_values
+    tm.assert_numpy_array_equal(l_values, r_values)
+    tm.assert_numpy_array_equal(l_values, expected)

From 55305dc197cf7444aa50eab3ba426d5b7244672a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 14:29:08 -0600
Subject: [PATCH 09/36] ndarray_values

---
 pandas/core/base.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 62f237e253c96..dd950a7b8ff00 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -772,6 +772,11 @@ def base(self):
     def _ndarray_values(self):
         """The data as an ndarray. See '_values' for more."""
         # type: () -> np.ndarray
+        from pandas.core.dtypes.common import is_categorical_dtype
+
+        if is_categorical_dtype(self):
+            return self._values.codes
+
         return self.values
 
     @property

From 0e637086e1e89ed7c580e5b731b030d524431a34 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:01:28 -0600
Subject: [PATCH 10/36] API: Default ExtensionArray.astype

(cherry picked from commit 943a915562b72bed147c857de927afa0daf31c1a)
(cherry picked from commit fbf0a0672380e210d3cb3c527fa8045a204d81be)
---
 pandas/core/arrays/base.py                   | 30 +++++++++++++++++
 pandas/tests/extension_arrays/test_common.py | 34 ++++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 pandas/tests/extension_arrays/test_common.py

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 1556b653819a6..8c3d033dffba7 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -1,4 +1,6 @@
 """An interface for extending pandas with custom arrays."""
+import numpy as np
+
 from pandas.errors import AbstractMethodError
 
 _not_implemented_message = "{} does not implement {}."
@@ -138,6 +140,34 @@ def nbytes(self):
     # ------------------------------------------------------------------------
     # Additional Methods
     # ------------------------------------------------------------------------
+    def astype(self, dtype, copy=True):
+        """Cast to a NumPy array with 'dtype'.
+
+        The default implementation only allows casting to 'object' dtype.
+
+        Parameters
+        ----------
+        dtype : str or dtype
+            Typecode or data-type to which the array is cast.
+        copy : bool, default True
+            Whether to copy the data, even if not necessary. If False,
+            a copy is made only if the old dtype does not match the
+            new dtype.
+
+        Returns
+        -------
+        array : ndarray
+            NumPy ndarray with 'dtype' for its dtype.
+        """
+        np_dtype = np.dtype(dtype)
+
+        if np_dtype != 'object':
+            msg = ("{} can only be coerced to 'object' dtype, "
+                   "not '{}'.").format(type(self).__name__, dtype)
+            raise ValueError(msg)
+
+        return np.array(self, dtype=np_dtype, copy=copy)
+
     def isna(self):
         # type: () -> np.ndarray
         """Boolean NumPy array indicating if each value is missing.
diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
new file mode 100644
index 0000000000000..7feb7fdf09ec6
--- /dev/null
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -0,0 +1,34 @@
+import numpy as np
+
+import pandas.util.testing as tm
+from pandas.core.arrays import ExtensionArray
+
+
+class DummyArray(ExtensionArray):
+
+    def __init__(self, data):
+        self.data = data
+
+    def __array__(self, dtype):
+        return self.data
+
+
+def test_astype():
+    arr = DummyArray(np.array([1, 2, 3]))
+    expected = np.array([1, 2, 3], dtype=object)
+
+    result = arr.astype(object)
+    tm.assert_numpy_array_equal(result, expected)
+
+    result = arr.astype('object')
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_astype_raises():
+    arr = DummyArray(np.array([1, 2, 3]))
+
+    xpr = ("DummyArray can only be coerced to 'object' dtype, not "
+           "'<class 'int'>'")
+
+    with tm.assert_raises_regex(ValueError, xpr):
+        arr.astype(int)

From fbbbc8a08b9bfe66cbe06621795163d65dbd3c77 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:22:43 -0600
Subject: [PATCH 11/36] Simplify concat_as_object

---
 pandas/core/dtypes/concat.py          | 10 +++++++---
 pandas/tests/indexes/test_category.py |  8 ++++++++
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index d6b55d03ebccd..b36dc03bbc82b 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -488,8 +488,11 @@ def _concat_index_asobject(to_concat, name=None):
     concat all inputs as object. DatetimeIndex, TimedeltaIndex and
     PeriodIndex are converted to object dtype before concatenation
     """
+    from pandas import Index
+    from pandas.core.arrays import ExtensionArray
 
-    klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex
+    klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex,
+               ExtensionArray)
     to_concat = [x.astype(object) if isinstance(x, klasses) else x
                  for x in to_concat]
 
@@ -497,8 +500,9 @@ def _concat_index_asobject(to_concat, name=None):
     attribs = self._get_attributes_dict()
     attribs['name'] = name
 
-    arrays = [np.array(x, copy=False, dtype=object) for x in to_concat]
-    return self._shallow_copy_with_infer(np.concatenate(arrays), **attribs)
+    to_concat = [x._values if isinstance(x, Index) else x
+                 for x in to_concat]
+    return self._shallow_copy_with_infer(np.concatenate(to_concat), **attribs)
 
 
 def _concat_sparse(to_concat, axis=0, typs=None):
diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py
index c2e40c79f8914..e9fddfde90348 100644
--- a/pandas/tests/indexes/test_category.py
+++ b/pandas/tests/indexes/test_category.py
@@ -353,6 +353,14 @@ def test_append(self):
         expected = Index(list('caaabbca'))
         tm.assert_index_equal(result, expected, exact=True)
 
+    def test_append_to_another(self):
+        # hits _concat_index_asobject
+        fst = Index(['a', 'b'])
+        snd = CategoricalIndex(['d', 'e'])
+        result = fst.append(snd)
+        expected = Index(['a', 'b', 'd', 'e'])
+        tm.assert_index_equal(result, expected)
+
     def test_insert(self):
 
         ci = self.create_index()

From 46a0a49352a1242077e616056f802b0ce35eb8d9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 15:46:36 -0600
Subject: [PATCH 12/36] Py2 compat

(cherry picked from commit b20e12cae68dd86ff51597464045656763d369f7)
---
 pandas/tests/extension_arrays/test_common.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
index 7feb7fdf09ec6..f19754482b04f 100644
--- a/pandas/tests/extension_arrays/test_common.py
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -27,8 +27,10 @@ def test_astype():
 def test_astype_raises():
     arr = DummyArray(np.array([1, 2, 3]))
 
+    # type  int for py2
+    # class int for py3
     xpr = ("DummyArray can only be coerced to 'object' dtype, not "
-           "'<class 'int'>'")
+           "'<.* 'int'>'")
 
     with tm.assert_raises_regex(ValueError, xpr):
         arr.astype(int)

From 2c4445a365d19979b400295ce6a7c671396cb0da Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 16:30:11 -0600
Subject: [PATCH 13/36] Set-ops ugliness

---
 pandas/core/indexes/base.py     | 52 +++++++++++++--------------------
 pandas/core/indexes/category.py |  6 ----
 2 files changed, 21 insertions(+), 37 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 260016661a735..3ce3ecce1c140 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -31,12 +31,14 @@
     is_object_dtype,
     is_categorical_dtype,
     is_interval_dtype,
+    is_period_dtype,
     is_bool,
     is_bool_dtype,
     is_signed_integer_dtype,
     is_unsigned_integer_dtype,
     is_integer_dtype, is_float_dtype,
     is_datetime64_any_dtype,
+    is_datetime64tz_dtype,
     is_timedelta64_dtype,
     needs_i8_conversion,
     is_iterator, is_list_like,
@@ -2252,15 +2254,15 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
-        if is_categorical_dtype(self):
-            lvals = self.values
-        else:
+        # TODO: setops-refactor, clean all this up
+        if is_period_dtype(self) or is_datetime64tz_dtype(self):
             lvals = self._ndarray_values
-
-        if is_categorical_dtype(other):
-            rvals = other.values
         else:
+            lvals = self._values
+        if is_period_dtype(other) or is_datetime64tz_dtype(other):
             rvals = other._ndarray_values
+        else:
+            rvals = other._values
 
         if self.is_monotonic and other.is_monotonic:
             try:
@@ -2310,24 +2312,6 @@ def _wrap_union_result(self, other, result):
         name = self.name if self.name == other.name else None
         return self.__class__(result, name=name)
 
-    def _ensure_join(self, values):
-        """Ensure that the 'values' are ready for our join indexer.
-
-        The default join indexers are object, so this just returns 'values'.
-        This is called before calling those.
-
-
-        Parameters
-        ----------
-        values : array-like
-
-        Returns
-        -------
-        values : ndarray
-            Expected to have the correct type for self.inner_indexer
-        """
-        return values
-
     def intersection(self, other):
         """
         Form the intersection of two Index objects.
@@ -2363,24 +2347,30 @@ def intersection(self, other):
             other = other.astype('O')
             return this.intersection(other)
 
+        # TODO: setops-refactor, clean all this up
+        if is_period_dtype(self):
+            lvals = self._ndarray_values
+        else:
+            lvals = self._values
+        if is_period_dtype(other):
+            rvals = other._ndarray_values
+        else:
+            rvals = other._values
+
         if self.is_monotonic and other.is_monotonic:
             try:
-                lvals = self._ensure_join(self._ndarray_values)
-                rvals = self._ensure_join(other._ndarray_values)
                 result = self._inner_indexer(lvals, rvals)[0]
                 return self._wrap_union_result(other, result)
             except TypeError:
                 pass
 
         try:
-            indexer = Index(other._ndarray_values).get_indexer(
-                self._ndarray_values)
+            indexer = Index(rvals).get_indexer(lvals)
             indexer = indexer.take((indexer != -1).nonzero()[0])
         except Exception:
-            # duplicates
+            # duplicateters
             indexer = algos.unique1d(
-                Index(other._ndarray_values).get_indexer_non_unique(
-                    self._ndarray_values)[0])
+                Index(rvals).get_indexer_non_unique(lvals)[0])
             indexer = indexer[indexer != -1]
 
         taken = other.take(indexer)
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 4381b35f6cb86..93ed2507cb829 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -805,12 +805,6 @@ def _delegate_method(self, name, *args, **kwargs):
             return res
         return CategoricalIndex(res, name=self.name)
 
-    def _ensure_join(self, values):
-        if self.codes.dtype.itemsize <= 4:
-            return _ensure_int32(values)
-        else:
-            return _ensure_int64(values)
-
     @classmethod
     def _add_accessors(cls):
         """ add in Categorical accessor methods """

From 5612cda29f77b5865df92bb97c6e7a2abde6bcb6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 16:46:02 -0600
Subject: [PATCH 14/36] better docstrings

---
 pandas/core/base.py         |  9 ++++++++-
 pandas/core/indexes/base.py | 27 ++++++++++++++++++---------
 2 files changed, 26 insertions(+), 10 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index dd950a7b8ff00..744d448b16682 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -770,7 +770,14 @@ def base(self):
 
     @property
     def _ndarray_values(self):
-        """The data as an ndarray. See '_values' for more."""
+        """The data as an ndarray, possibly losing information.
+
+        The expectation is that this is cheap to compute.
+
+        - categorical -> codes
+
+        See '_values' for more.
+        """
         # type: () -> np.ndarray
         from pandas.core.dtypes.common import is_categorical_dtype
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 3ce3ecce1c140..afefa5de2477e 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -603,15 +603,24 @@ def _values(self):
         """The best array representation.
 
         This is an ndarray, ExtensionArray, or Index subclass. This differs
-        from '._ndarray_values', which always returns an ndarray. It may differ
-        from the public '.values'
-
-        index             | values          | _values
-        ----------------- | -------------- -| ----------
-        CategoricalIndex  | Categorical     | Categorical
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]
-        PeriodIndex       | ndarray[Period] | ndarray[Pd] (soon PeriodArray)
-        IntervalIndex     | ndarray[IV]     | ndarray[IV] (soon IntervalArray)
+        from ``_ndarray_values``, which always returns an ndarray.
+
+        Both ``_values`` and ``_ndarray_values`` are consistent between
+        ``Series`` and ``Index``.
+
+        It may differ from the public '.values' method.
+
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | -------------- -| ----------- | --------------- |
+        CategoricalIndex  | Categorical     | Categorical | codes           |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | datetime@UTC    |
+
+        In the near-future, we'll implement two more.
+
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | --------------- | ----------- | --------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray | ordinals        |
+        IntervalIndex     | ndarray[object] | IVArray     | ndarray[object] |
 
         See Also
         --------

From b012c1967b6de548b999514fe4b560ba9b7ee635 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 17:03:15 -0600
Subject: [PATCH 15/36] tolist

---
 pandas/core/base.py         | 3 +++
 pandas/core/indexes/base.py | 8 ++++----
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 744d448b16682..f3b0fb9dbe142 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -14,6 +14,7 @@
     is_list_like,
     is_scalar,
     is_datetimelike,
+    is_categorical_dtype,
     is_extension_type)
 
 from pandas.util._validators import validate_bool_kwarg
@@ -833,6 +834,8 @@ def tolist(self):
 
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
+        elif is_categorical_dtype(self):
+            return self.values.tolist()
         else:
             return self._ndarray_values.tolist()
 
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index afefa5de2477e..9eb0ac1276280 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -617,10 +617,10 @@ def _values(self):
 
         In the near-future, we'll implement two more.
 
-        index             | values          | _values     | _ndarray_values |
-        ----------------- | --------------- | ----------- | --------------- |
-        PeriodIndex       | ndarray[object] | PeriodArray | ordinals        |
-        IntervalIndex     | ndarray[object] | IVArray     | ndarray[object] |
+        index             | values          | _values     | ndarray_values |
+        ----------------- | --------------- | ----------- | -------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray
+        IntervalIndex     | IntervalArray   | ndarray[Interval]
 
         See Also
         --------

From d49e6aa649a0b02ce612b9d18b663668ade6485a Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Thu, 8 Feb 2018 17:05:46 -0600
Subject: [PATCH 16/36] linting

---
 pandas/core/indexes/datetimes.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 788005531efe1..22ce690b3d420 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -1105,7 +1105,8 @@ def unique(self, level=None):
         else:
             naive = self
         result = super(DatetimeIndex, naive).unique(level=level)
-        return self._simple_new(result, name=self.name, tz=self.tz, freq=self.freq)
+        return self._simple_new(result, name=self.name, tz=self.tz,
+                                freq=self.freq)
 
     def union(self, other):
         """

From d7d31eecc1411f9d68755bd86f80b2a97a34776e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 08:21:51 -0600
Subject: [PATCH 17/36] Moved dtypes

(cherry picked from commit d1362271bca8a7b183f3241e5c2f040c422118b8)
---
 pandas/tests/dtypes/test_dtypes.py           | 32 +-------------------
 pandas/tests/extension_arrays/test_common.py | 29 ++++++++++++++++++
 2 files changed, 30 insertions(+), 31 deletions(-)

diff --git a/pandas/tests/dtypes/test_dtypes.py b/pandas/tests/dtypes/test_dtypes.py
index eca4dd4cf2106..d800a7b92b559 100644
--- a/pandas/tests/dtypes/test_dtypes.py
+++ b/pandas/tests/dtypes/test_dtypes.py
@@ -10,14 +10,12 @@
     Series, Categorical, CategoricalIndex, IntervalIndex, date_range)
 
 from pandas.compat import string_types
-from pandas.core.arrays import ExtensionArray
 from pandas.core.dtypes.dtypes import (
     DatetimeTZDtype, PeriodDtype,
-    IntervalDtype, CategoricalDtype, ExtensionDtype)
+    IntervalDtype, CategoricalDtype)
 from pandas.core.dtypes.common import (
     is_categorical_dtype, is_categorical,
     is_datetime64tz_dtype, is_datetimetz,
-    is_extension_array_dtype,
     is_period_dtype, is_period,
     is_dtype_equal, is_datetime64_ns_dtype,
     is_datetime64_dtype, is_interval_dtype,
@@ -744,31 +742,3 @@ def test_categorical_categories(self):
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
         c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
         tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
-
-
-class DummyArray(ExtensionArray):
-    pass
-
-
-class DummyDtype(ExtensionDtype):
-    pass
-
-
-class TestExtensionArrayDtype(object):
-
-    @pytest.mark.parametrize('values', [
-        pd.Categorical([]),
-        pd.Categorical([]).dtype,
-        pd.Series(pd.Categorical([])),
-        DummyDtype(),
-        DummyArray(),
-    ])
-    def test_is_extension_array_dtype(self, values):
-        assert is_extension_array_dtype(values)
-
-    @pytest.mark.parametrize('values', [
-        np.array([]),
-        pd.Series(np.array([])),
-    ])
-    def test_is_not_extension_array_dtype(self, values):
-        assert not is_extension_array_dtype(values)
diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
index f19754482b04f..1fc4526aff951 100644
--- a/pandas/tests/extension_arrays/test_common.py
+++ b/pandas/tests/extension_arrays/test_common.py
@@ -1,7 +1,15 @@
 import numpy as np
+import pytest
 
+import pandas as pd
 import pandas.util.testing as tm
 from pandas.core.arrays import ExtensionArray
+from pandas.core.dtypes.common import is_extension_array_dtype
+from pandas.core.dtypes.dtypes import ExtensionDtype
+
+
+class DummyDtype(ExtensionDtype):
+    pass
 
 
 class DummyArray(ExtensionArray):
@@ -13,7 +21,28 @@ def __array__(self, dtype):
         return self.data
 
 
+class TestExtensionArrayDtype(object):
+
+    @pytest.mark.parametrize('values', [
+        pd.Categorical([]),
+        pd.Categorical([]).dtype,
+        pd.Series(pd.Categorical([])),
+        DummyDtype(),
+        DummyArray(np.array([1, 2])),
+    ])
+    def test_is_extension_array_dtype(self, values):
+        assert is_extension_array_dtype(values)
+
+    @pytest.mark.parametrize('values', [
+        np.array([]),
+        pd.Series(np.array([])),
+    ])
+    def test_is_not_extension_array_dtype(self, values):
+        assert not is_extension_array_dtype(values)
+
+
 def test_astype():
+
     arr = DummyArray(np.array([1, 2, 3]))
     expected = np.array([1, 2, 3], dtype=object)
 

From 7b89f1b3dc80c23d02c8b57c9c5d94cd491082c8 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 08:36:44 -0600
Subject: [PATCH 18/36] clean

---
 pandas/tests/extension_arrays/test_common.py | 65 --------------------
 1 file changed, 65 deletions(-)
 delete mode 100644 pandas/tests/extension_arrays/test_common.py

diff --git a/pandas/tests/extension_arrays/test_common.py b/pandas/tests/extension_arrays/test_common.py
deleted file mode 100644
index 1fc4526aff951..0000000000000
--- a/pandas/tests/extension_arrays/test_common.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import numpy as np
-import pytest
-
-import pandas as pd
-import pandas.util.testing as tm
-from pandas.core.arrays import ExtensionArray
-from pandas.core.dtypes.common import is_extension_array_dtype
-from pandas.core.dtypes.dtypes import ExtensionDtype
-
-
-class DummyDtype(ExtensionDtype):
-    pass
-
-
-class DummyArray(ExtensionArray):
-
-    def __init__(self, data):
-        self.data = data
-
-    def __array__(self, dtype):
-        return self.data
-
-
-class TestExtensionArrayDtype(object):
-
-    @pytest.mark.parametrize('values', [
-        pd.Categorical([]),
-        pd.Categorical([]).dtype,
-        pd.Series(pd.Categorical([])),
-        DummyDtype(),
-        DummyArray(np.array([1, 2])),
-    ])
-    def test_is_extension_array_dtype(self, values):
-        assert is_extension_array_dtype(values)
-
-    @pytest.mark.parametrize('values', [
-        np.array([]),
-        pd.Series(np.array([])),
-    ])
-    def test_is_not_extension_array_dtype(self, values):
-        assert not is_extension_array_dtype(values)
-
-
-def test_astype():
-
-    arr = DummyArray(np.array([1, 2, 3]))
-    expected = np.array([1, 2, 3], dtype=object)
-
-    result = arr.astype(object)
-    tm.assert_numpy_array_equal(result, expected)
-
-    result = arr.astype('object')
-    tm.assert_numpy_array_equal(result, expected)
-
-
-def test_astype_raises():
-    arr = DummyArray(np.array([1, 2, 3]))
-
-    # type  int for py2
-    # class int for py3
-    xpr = ("DummyArray can only be coerced to 'object' dtype, not "
-           "'<.* 'int'>'")
-
-    with tm.assert_raises_regex(ValueError, xpr):
-        arr.astype(int)

From b0dbffd72376d88bfc1dd8d4d89c890978686d4e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 09:34:39 -0600
Subject: [PATCH 19/36] cleanup

---
 pandas/core/indexes/base.py     | 10 +++---
 pandas/core/indexes/category.py |  2 +-
 pandas/core/indexes/multi.py    |  2 +-
 pandas/tests/test_base.py       | 56 ++++++++++++++++++++++++++++-----
 4 files changed, 56 insertions(+), 14 deletions(-)

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 9eb0ac1276280..d8b4a65a91ecc 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -613,14 +613,14 @@ def _values(self):
         index             | values          | _values     | _ndarray_values |
         ----------------- | -------------- -| ----------- | --------------- |
         CategoricalIndex  | Categorical     | Categorical | codes           |
-        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | datetime@UTC    |
+        DatetimeIndex[tz] | ndarray[M8ns]   | DTI[tz]     | ndarray[M8ns]   |
 
         In the near-future, we'll implement two more.
 
-        index             | values          | _values     | ndarray_values |
-        ----------------- | --------------- | ----------- | -------------- |
-        PeriodIndex       | ndarray[object] | PeriodArray
-        IntervalIndex     | IntervalArray   | ndarray[Interval]
+        index             | values          | _values     | _ndarray_values |
+        ----------------- | --------------- | ----------- | --------------- |
+        PeriodIndex       | ndarray[object] | PeriodArray | ndarray[int]    |
+        IntervalIndex     | ndarray[object] | PeriodArray | ndarray[object] |
 
         See Also
         --------
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 93ed2507cb829..166832cbe6bb1 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -797,7 +797,7 @@ def _evaluate_compare(self, other):
 
     def _delegate_method(self, name, *args, **kwargs):
         """ method delegation to the ._values """
-        method = getattr(self.values, name)
+        method = getattr(self._values, name)
         if 'inplace' in kwargs:
             raise ValueError("cannot use inplace with CategoricalIndex")
         res = method(*args, **kwargs)
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 1478012aa9dbe..a257a1ba26128 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -799,7 +799,7 @@ def values(self):
             box = hasattr(lev, '_box_values')
             # Try to minimize boxing.
             if box and len(lev) > len(lab):
-                taken = lev._box_values(algos.take_1d(lev._values,
+                taken = lev._box_values(algos.take_1d(lev._ndarray_values,
                                                       lab))
             elif box:
                 taken = algos.take_1d(lev._box_values(lev._ndarray_values),
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 94449663b580b..66ec2d37c680e 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1241,13 +1241,6 @@ def test_values_consistent(array, expected_type, dtype):
     assert r_values.dtype == dtype
 
 
-def test_values_periodindex():
-    arr = pd.period_range("2017", periods=4, freq='D')
-    result = arr._values
-    expected = np.array(arr.astype(object))
-    tm.assert_numpy_array_equal(result, expected)
-
-
 @pytest.mark.parametrize('array, expected', [
     (np.array([0, 1]), np.array([0, 1])),
     (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
@@ -1267,3 +1260,52 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
+
+
+def test_values_multiindex_datetimesindex():
+    # Test to ensure we hit the boxing / nobox part of MI.values
+    ints = np.arange(10**18, 10**18 + 5)
+    naive = pd.DatetimeIndex(ints)
+    aware = pd.DatetimeIndex(ints, tz='US/Central')
+
+    idx = pd.MultiIndex.from_arrays([naive, aware])
+    result = idx.values
+
+    outer = pd.DatetimeIndex([x[0] for x in result])
+    tm.assert_index_equal(outer, naive)
+
+    inner = pd.DatetimeIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, aware)
+
+    # n_lev > n_lab
+    result = idx[:2].values
+
+    outer = pd.DatetimeIndex([x[0] for x in result])
+    tm.assert_index_equal(outer, naive[:2])
+
+    inner = pd.DatetimeIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, aware[:2])
+
+
+def test_values_multiindex_datetimesindex():
+    # Test to ensure we hit the boxing / nobox part of MI.values
+    ints = np.arange(2007, 2012)
+    pidx = pd.PeriodIndex(ints, freq='D')
+
+    idx = pd.MultiIndex.from_arrays([ints, pidx])
+    result = idx.values
+
+    outer = pd.Int64Index([x[0] for x in result])
+    tm.assert_index_equal(outer, pd.Int64Index(ints))
+
+    inner = pd.PeriodIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, pidx)
+
+    # n_lev > n_lab
+    result = idx[:2].values
+
+    outer = pd.Int64Index([x[0] for x in result])
+    tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
+
+    inner = pd.PeriodIndex([x[1] for x in result])
+    tm.assert_index_equal(inner, pidx[:2])

From 66b936f00b72e3152df807e6e5913f1111084cef Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 09:42:37 -0600
Subject: [PATCH 20/36] NumPy compat

---
 pandas/tests/test_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 66ec2d37c680e..e649667e3dda1 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1196,7 +1196,7 @@ def test_unique_datetime_index(arr, expected):
 
 @pytest.mark.parametrize('arr, expected', [
     (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
-     np.array(['2017'], dtype='M8[ns]')),
+     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
     (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
      np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
 ])

From 32ee0eff6893bd02ed1469330054b0c37914306e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:10:15 -0600
Subject: [PATCH 21/36] Use base _values for CategoricalIndex

---
 pandas/core/indexes/category.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 166832cbe6bb1..f03f8571121f0 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -304,10 +304,6 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
-    @property
-    def _values(self):
-        return self._data
-
     @property
     def _ndarray_values(self):
         return self._data.codes

From a9882e23defc47272f941932c4ce53af9b5ba0e6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:10:34 -0600
Subject: [PATCH 22/36] Update dev docs

---
 doc/source/internals.rst | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/doc/source/internals.rst b/doc/source/internals.rst
index 29aaed318b802..957f82fd9eba7 100644
--- a/doc/source/internals.rst
+++ b/doc/source/internals.rst
@@ -92,16 +92,20 @@ if you compute the levels and labels yourself, please be careful.
 Values
 ~~~~~~
 
-Pandas extends NumPy's type system in a few places, so we have multiple notions of "values" floating around.
-For 1-D containers (``Index`` classes and ``Series``) we have the following convention:
-
-* ``cls._ndarray_values`` is *always* and ``ndarray``
-* ``cls._values`` refers is the "best possible" array. This could be an ``ndarray``, ``ExtensionArray``, or
-  in ``Index`` subclass (note: we're in the process of removing the index subclasses here so that it's
-  always an ``ndarray`` or ``ExtensionArray``).
-
-So, for example, ``Series[category]._values`` is a ``Categorical``, while ``Series[category]._ndarray_values`` is
-the underlying ndarray.
+Pandas extends NumPy's type system with custom types, like ``Categorical`` or
+datetimes with a timezone, so we have multiple notions of "values". For 1-D
+containers (``Index`` classes and ``Series``) we have the following convention:
+
+* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
+  ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
+  this returns the codes, not the array of objects.
+* ``cls._values`` refers is the "best possible" array. This could be an
+  ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
+  process of removing the index subclasses here so that it's always an
+  ``ndarray`` or ``ExtensionArray``).
+
+So, for example, ``Series[category]._values`` is a ``Categorical``, while
+``Series[category]._ndarray_values`` is the underlying codes.
 
 
 .. _ref-subclassing-pandas:

From 242562108b099b4e7a205541ee15b9272dcb5265 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 10:59:22 -0600
Subject: [PATCH 23/36] cleanup

---
 pandas/core/dtypes/cast.py      |  2 +-
 pandas/core/indexes/category.py | 13 +------------
 pandas/core/indexes/multi.py    |  8 +++-----
 pandas/core/indexes/period.py   |  5 -----
 4 files changed, 5 insertions(+), 23 deletions(-)

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
index b2816343fc8eb..55919fb2bea0d 100644
--- a/pandas/core/dtypes/cast.py
+++ b/pandas/core/dtypes/cast.py
@@ -927,7 +927,7 @@ def try_timedelta(v):
         # will try first with a string & object conversion
         from pandas import to_timedelta
         try:
-            return to_timedelta(v)._values.reshape(shape)
+            return to_timedelta(v)._ndarray_values.reshape(shape)
         except Exception:
             return v.reshape(shape)
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index f03f8571121f0..5aa940499a368 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -1,6 +1,5 @@
 import numpy as np
 from pandas._libs import index as libindex
-from pandas._libs import join as libjoin
 
 from pandas import compat
 from pandas.compat.numpy import function as nv
@@ -9,8 +8,6 @@
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
     _ensure_platform_int,
-    _ensure_int32,
-    _ensure_int64,
     is_list_like,
     is_interval_dtype,
     is_scalar)
@@ -217,14 +214,6 @@ def _shallow_copy(self, values=None, categories=None, ordered=None,
             values=values, categories=categories,
             ordered=ordered, **kwargs)
 
-    @cache_readonly
-    def _inner_indexer(self):
-        if self.codes.dtype.itemsize <= 4:
-            # int8, int16, int32
-            return libjoin.inner_join_indexer_int32
-        else:
-            return libjoin.inner_join_indexer_int64
-
     def _is_dtype_compat(self, other):
         """
         *this is an internal non-public method*
@@ -238,7 +227,7 @@ def _is_dtype_compat(self, other):
         """
         if is_categorical_dtype(other):
             if isinstance(other, CategoricalIndex):
-                other = other.values
+                other = other._values
             if not other.is_dtype_equal(self):
                 raise TypeError("categories must match existing categories "
                                 "when appending")
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index a257a1ba26128..907bbb2e8762e 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -2507,6 +2507,7 @@ def get_locs(self, seq):
         MultiIndex.slice_locs : Get slice location given start label(s) and
                                 end label(s).
         """
+        from .numeric import Int64Index
 
         # must be lexsorted to at least as many levels
         true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
@@ -2532,7 +2533,6 @@ def _convert_to_indexer(r):
                                      "that is not the same length as the "
                                      "index")
                 r = r.nonzero()[0]
-            from .numeric import Int64Index
             return Int64Index(r)
 
         def _update_indexer(idxr, indexer=indexer):
@@ -2569,7 +2569,6 @@ def _update_indexer(idxr, indexer=indexer):
                 if indexers is not None:
                     indexer = _update_indexer(indexers, indexer=indexer)
                 else:
-                    from .numeric import Int64Index
                     # no matches we are done
                     return Int64Index([])._ndarray_values
 
@@ -2652,9 +2651,8 @@ def equals(self, other):
         for i in range(self.nlevels):
             slabels = self.labels[i]
             slabels = slabels[slabels != -1]
-            svalues = algos.take_nd(
-                np.asarray(self.levels[i]._values),
-                slabels, allow_fill=False)
+            svalues = algos.take_nd(np.asarray(self.levels[i]._values),
+                                    slabels, allow_fill=False)
 
             olabels = other.labels[i]
             olabels = olabels[olabels != -1]
diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index c8b7d6063e378..e90d3827fe84e 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -418,11 +418,6 @@ def _int64index(self):
     def values(self):
         return self.astype(object).values
 
-    @property
-    def _values(self):
-        # TODO: return PeriodArray
-        return self.values
-
     @property
     def _ndarray_values(self):
         # Ordinals

From 170d0c7959a54276fff730b002195f46ec64de63 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:33:49 -0600
Subject: [PATCH 24/36] Linting

---
 pandas/core/base.py       | 3 +--
 pandas/tests/test_base.py | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index f3b0fb9dbe142..01dba132e00c5 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -7,8 +7,7 @@
 import numpy as np
 
 from pandas.core.dtypes.missing import isna
-from pandas.core.dtypes.generic import (
-    ABCDataFrame, ABCSeries, ABCIndexClass, ABCDatetimeIndex)
+from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
     is_object_dtype,
     is_list_like,
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index e649667e3dda1..31fa278f906f5 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1262,7 +1262,7 @@ def test_ndarray_values(array, expected):
     tm.assert_numpy_array_equal(l_values, expected)
 
 
-def test_values_multiindex_datetimesindex():
+def test_values_multiindex_datetimeindex():
     # Test to ensure we hit the boxing / nobox part of MI.values
     ints = np.arange(10**18, 10**18 + 5)
     naive = pd.DatetimeIndex(ints)
@@ -1287,7 +1287,7 @@ def test_values_multiindex_datetimesindex():
     tm.assert_index_equal(inner, aware[:2])
 
 
-def test_values_multiindex_datetimesindex():
+def test_values_multiindex_periodindex():
     # Test to ensure we hit the boxing / nobox part of MI.values
     ints = np.arange(2007, 2012)
     pidx = pd.PeriodIndex(ints, freq='D')

From 402620f3ca75d14dd203f809226ec528113ae54c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Fri, 9 Feb 2018 13:35:24 -0600
Subject: [PATCH 25/36] Precision in tests

---
 pandas/tests/test_base.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index 31fa278f906f5..ce1e3d492741d 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1212,7 +1212,7 @@ def test_unique_datetime_series(arr, expected):
 
 
 @pytest.mark.parametrize('array, expected_type, dtype', [
-    (np.array([0, 1]), np.ndarray, 'int64'),
+    (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
     (np.array(['a', 'b']), np.ndarray, 'object'),
     (pd.Categorical(['a', 'b']), pd.Categorical, 'category'),
     (pd.DatetimeIndex(['2017', '2018']), np.ndarray, 'datetime64[ns]'),
@@ -1242,7 +1242,7 @@ def test_values_consistent(array, expected_type, dtype):
 
 
 @pytest.mark.parametrize('array, expected', [
-    (np.array([0, 1]), np.array([0, 1])),
+    (np.array([0, 1], dtype=np.int64), np.array([0, 1], dtype=np.int64)),
     (np.array(['0', '1']), np.array(['0', '1'], dtype=object)),
     (pd.Categorical(['a', 'a']), np.array([0, 0], dtype='int8')),
     (pd.DatetimeIndex(['2017-01-01T00:00:00']),

From 815d202e96e910a64a292f6815737447ffdc1847 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:13:50 -0600
Subject: [PATCH 26/36] Push _ndarray_values to ExtensionArray

Now IndexOpsMixin._ndarray_values will dispatch all the way down to the EA.
Subclasses like Categorical can override it as they see fit.
---
 pandas/core/arrays/base.py        | 12 ++++++++++++
 pandas/core/arrays/categorical.py |  4 ++++
 pandas/core/base.py               | 15 ++++++---------
 pandas/core/dtypes/common.py      |  2 +-
 pandas/core/indexes/category.py   |  4 ----
 5 files changed, 23 insertions(+), 14 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 553e1e0ac2066..e618dc6b69b2d 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -266,3 +266,15 @@ def _can_hold_na(self):
         Setting this to false will optimize some operations like fillna.
         """
         return True
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return np.array(self)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 62c6a6b16cbe9..8d2cf9d2b2f92 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -410,6 +410,10 @@ def dtype(self):
         """The :class:`~pandas.api.types.CategoricalDtype` for this instance"""
         return self._dtype
 
+    @property
+    def _ndarray_values(self):
+        return self.codes
+
     @property
     def _constructor(self):
         return Categorical
diff --git a/pandas/core/base.py b/pandas/core/base.py
index 01dba132e00c5..0e70e3eb64fcb 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -14,7 +14,8 @@
     is_scalar,
     is_datetimelike,
     is_categorical_dtype,
-    is_extension_type)
+    is_extension_type,
+    is_extension_array_dtype)
 
 from pandas.util._validators import validate_bool_kwarg
 
@@ -772,18 +773,14 @@ def base(self):
     def _ndarray_values(self):
         """The data as an ndarray, possibly losing information.
 
-        The expectation is that this is cheap to compute.
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
 
         - categorical -> codes
-
-        See '_values' for more.
         """
         # type: () -> np.ndarray
-        from pandas.core.dtypes.common import is_categorical_dtype
-
-        if is_categorical_dtype(self):
-            return self._values.codes
-
+        if is_extension_array_dtype(self):
+            return self.values._ndarray_values
         return self.values
 
     @property
diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
index c66e7fcfc6978..c2b71bc316fe8 100644
--- a/pandas/core/dtypes/common.py
+++ b/pandas/core/dtypes/common.py
@@ -1709,7 +1709,7 @@ def is_extension_array_dtype(arr_or_dtype):
     from pandas.core.arrays import ExtensionArray
 
     # we want to unpack series, anything else?
-    if isinstance(arr_or_dtype, ABCSeries):
+    if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
         arr_or_dtype = arr_or_dtype._values
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index 5aa940499a368..d71b7ea774f52 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -293,10 +293,6 @@ def values(self):
         """ return the underlying data, which is a Categorical """
         return self._data
 
-    @property
-    def _ndarray_values(self):
-        return self._data.codes
-
     @property
     def itemsize(self):
         return self.values.itemsize

From a727b217f42e959f9ebb355e911f3ec641db0b49 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:27:46 -0600
Subject: [PATCH 27/36] Clean up tolist

---
 pandas/core/base.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 0e70e3eb64fcb..0b4c03d6b4b25 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -830,10 +830,8 @@ def tolist(self):
 
         if is_datetimelike(self):
             return [com._maybe_box_datetimelike(x) for x in self._values]
-        elif is_categorical_dtype(self):
-            return self.values.tolist()
         else:
-            return self._ndarray_values.tolist()
+            return self._values.tolist()
 
     def __iter__(self):
         """

From f368c29d6a45832f95181a8a6e8b7411d87763c7 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Sun, 11 Feb 2018 14:33:46 -0600
Subject: [PATCH 28/36] Move test locations

---
 .../tests/indexes/datetimes/test_datetime.py  | 15 ++++
 pandas/tests/indexes/test_multi.py            | 48 +++++++++++
 pandas/tests/test_base.py                     | 82 -------------------
 3 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index a75ace2933b71..e9176e749564e 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -469,3 +469,18 @@ def test_factorize_dst(self):
             arr, res = obj.factorize()
             tm.assert_numpy_array_equal(arr, np.arange(12, dtype=np.intp))
             tm.assert_index_equal(res, idx)
+
+    @pytest.mark.parametrize('arr, expected', [
+        (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
+        (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
+         pd.DatetimeIndex(['2017'], tz='US/Eastern')),
+    ])
+    def test_unique(self, arr, expected):
+        result = arr.unique()
+
+        if isinstance(expected, np.ndarray):
+            tm.assert_numpy_array_equal(result, expected)
+        if isinstance(expected, pd.Series):
+            tm.assert_series_equal(result, expected)
+        if isinstance(expected, pd.DatetimeIndex):
+            tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index e59456b8a2d5e..97370b279245c 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -962,6 +962,54 @@ def test_values_boxed(self):
         # Check that code branches for boxed values produce identical results
         tm.assert_numpy_array_equal(result.values[:4], result[:4].values)
 
+    def test_values_multiindex_datetimeindex(self):
+        # Test to ensure we hit the boxing / nobox part of MI.values
+        ints = np.arange(10**18, 10**18 + 5)
+        naive = pd.DatetimeIndex(ints)
+        aware = pd.DatetimeIndex(ints, tz='US/Central')
+
+        idx = pd.MultiIndex.from_arrays([naive, aware])
+        result = idx.values
+
+        outer = pd.DatetimeIndex([x[0] for x in result])
+        tm.assert_index_equal(outer, naive)
+
+        inner = pd.DatetimeIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, aware)
+
+        # n_lev > n_lab
+        result = idx[:2].values
+
+        outer = pd.DatetimeIndex([x[0] for x in result])
+        tm.assert_index_equal(outer, naive[:2])
+
+        inner = pd.DatetimeIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, aware[:2])
+
+
+    def test_values_multiindex_periodindex():
+        # Test to ensure we hit the boxing / nobox part of MI.values
+        ints = np.arange(2007, 2012)
+        pidx = pd.PeriodIndex(ints, freq='D')
+
+        idx = pd.MultiIndex.from_arrays([ints, pidx])
+        result = idx.values
+
+        outer = pd.Int64Index([x[0] for x in result])
+        tm.assert_index_equal(outer, pd.Int64Index(ints))
+
+        inner = pd.PeriodIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, pidx)
+
+        # n_lev > n_lab
+        result = idx[:2].values
+
+        outer = pd.Int64Index([x[0] for x in result])
+        tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
+
+        inner = pd.PeriodIndex([x[1] for x in result])
+        tm.assert_index_equal(inner, pidx[:2])
+
     def test_append(self):
         result = self.index[:3].append(self.index[3:])
         assert result.equals(self.index)
diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py
index ce1e3d492741d..4b5ad336139b0 100644
--- a/pandas/tests/test_base.py
+++ b/pandas/tests/test_base.py
@@ -1178,39 +1178,6 @@ def test_iter_box(self):
             assert res == exp
 
 
-@pytest.mark.parametrize('arr, expected', [
-    (pd.DatetimeIndex(['2017', '2017']), pd.DatetimeIndex(['2017'])),
-    (pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern'),
-     pd.DatetimeIndex(['2017'], tz='US/Eastern')),
-])
-def test_unique_datetime_index(arr, expected):
-    result = arr.unique()
-
-    if isinstance(expected, np.ndarray):
-        tm.assert_numpy_array_equal(result, expected)
-    if isinstance(expected, pd.Series):
-        tm.assert_series_equal(result, expected)
-    if isinstance(expected, pd.DatetimeIndex):
-        tm.assert_index_equal(result, expected)
-
-
-@pytest.mark.parametrize('arr, expected', [
-    (pd.Series(pd.DatetimeIndex(['2017', '2017'])),
-     np.array(['2017-01-01T00:00:00'], dtype='M8[ns]')),
-    (pd.Series(pd.DatetimeIndex(['2017', '2017'], tz='US/Eastern')),
-     np.array([pd.Timestamp('2017', tz="US/Eastern")], dtype=object)),
-])
-def test_unique_datetime_series(arr, expected):
-    result = arr.unique()
-
-    if isinstance(expected, np.ndarray):
-        tm.assert_numpy_array_equal(result, expected)
-    if isinstance(expected, pd.Series):
-        tm.assert_series_equal(result, expected)
-    if isinstance(expected, pd.DatetimeIndex):
-        tm.assert_index_equal(result, expected)
-
-
 @pytest.mark.parametrize('array, expected_type, dtype', [
     (np.array([0, 1], dtype=np.int64), np.ndarray, 'int64'),
     (np.array(['a', 'b']), np.ndarray, 'object'),
@@ -1260,52 +1227,3 @@ def test_ndarray_values(array, expected):
     r_values = pd.Index(array)._ndarray_values
     tm.assert_numpy_array_equal(l_values, r_values)
     tm.assert_numpy_array_equal(l_values, expected)
-
-
-def test_values_multiindex_datetimeindex():
-    # Test to ensure we hit the boxing / nobox part of MI.values
-    ints = np.arange(10**18, 10**18 + 5)
-    naive = pd.DatetimeIndex(ints)
-    aware = pd.DatetimeIndex(ints, tz='US/Central')
-
-    idx = pd.MultiIndex.from_arrays([naive, aware])
-    result = idx.values
-
-    outer = pd.DatetimeIndex([x[0] for x in result])
-    tm.assert_index_equal(outer, naive)
-
-    inner = pd.DatetimeIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, aware)
-
-    # n_lev > n_lab
-    result = idx[:2].values
-
-    outer = pd.DatetimeIndex([x[0] for x in result])
-    tm.assert_index_equal(outer, naive[:2])
-
-    inner = pd.DatetimeIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, aware[:2])
-
-
-def test_values_multiindex_periodindex():
-    # Test to ensure we hit the boxing / nobox part of MI.values
-    ints = np.arange(2007, 2012)
-    pidx = pd.PeriodIndex(ints, freq='D')
-
-    idx = pd.MultiIndex.from_arrays([ints, pidx])
-    result = idx.values
-
-    outer = pd.Int64Index([x[0] for x in result])
-    tm.assert_index_equal(outer, pd.Int64Index(ints))
-
-    inner = pd.PeriodIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, pidx)
-
-    # n_lev > n_lab
-    result = idx[:2].values
-
-    outer = pd.Int64Index([x[0] for x in result])
-    tm.assert_index_equal(outer, pd.Int64Index(ints[:2]))
-
-    inner = pd.PeriodIndex([x[1] for x in result])
-    tm.assert_index_equal(inner, pidx[:2])

From d74c5c96040882378e3598e0df27e59aff57de51 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 06:33:05 -0600
Subject: [PATCH 29/36] Fixed test

---
 pandas/tests/indexes/test_multi.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 97370b279245c..cd6a5c761d0c2 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -986,8 +986,7 @@ def test_values_multiindex_datetimeindex(self):
         inner = pd.DatetimeIndex([x[1] for x in result])
         tm.assert_index_equal(inner, aware[:2])
 
-
-    def test_values_multiindex_periodindex():
+    def test_values_multiindex_periodindex(self):
         # Test to ensure we hit the boxing / nobox part of MI.values
         ints = np.arange(2007, 2012)
         pidx = pd.PeriodIndex(ints, freq='D')

From 8104ee5d8a887454fec6869eb1f4e63fe74d72e6 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 08:40:56 -0600
Subject: [PATCH 30/36] REF: Update per comments

---
 pandas/core/base.py                             | 2 +-
 pandas/core/dtypes/concat.py                    | 2 +-
 pandas/core/indexes/category.py                 | 6 +-----
 pandas/core/indexes/multi.py                    | 2 +-
 pandas/io/formats/format.py                     | 2 +-
 pandas/tests/indexes/datetimes/test_datetime.py | 8 +-------
 6 files changed, 6 insertions(+), 16 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 0b4c03d6b4b25..8081e20faaeb3 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -745,7 +745,7 @@ def itemsize(self):
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self._ndarray_values.nbytes
+        return self.values.nbytes
 
     @property
     def strides(self):
diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
index b36dc03bbc82b..d306d0d78f1f4 100644
--- a/pandas/core/dtypes/concat.py
+++ b/pandas/core/dtypes/concat.py
@@ -480,7 +480,7 @@ def _concat_datetimetz(to_concat, name=None):
 
 def _concat_index_same_dtype(indexes, klass=None):
     klass = klass if klass is not None else indexes[0].__class__
-    return klass(np.concatenate([x._ndarray_values for x in indexes]))
+    return klass(np.concatenate([x._values for x in indexes]))
 
 
 def _concat_index_asobject(to_concat, name=None):
diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py
index d71b7ea774f52..7d4a864b465e8 100644
--- a/pandas/core/indexes/category.py
+++ b/pandas/core/indexes/category.py
@@ -295,13 +295,9 @@ def values(self):
 
     @property
     def itemsize(self):
+        # Size of the items in categories, not codes.
         return self.values.itemsize
 
-    @property
-    def nbytes(self):
-        """ return the number of bytes in the underlying data """
-        return self.values.nbytes
-
     def get_values(self):
         """ return the underlying data as an ndarray """
         return self._data.get_values()
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 907bbb2e8762e..94dbd8b884e47 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -1319,7 +1319,7 @@ def from_tuples(cls, tuples, sortorder=None, names=None):
             arrays = [[]] * len(names)
         elif isinstance(tuples, (np.ndarray, Index)):
             if isinstance(tuples, Index):
-                tuples = tuples._ndarray_values
+                tuples = tuples._values
 
             arrays = list(lib.tuples_to_object_array(tuples).T)
         elif isinstance(tuples, list):
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index d590499faa65e..621641747f376 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1897,7 +1897,7 @@ def _format(x):
 
         vals = self.values
         if isinstance(vals, Index):
-            vals = vals._ndarray_values
+            vals = vals._values
         elif isinstance(vals, ABCSparseArray):
             vals = vals.values
 
diff --git a/pandas/tests/indexes/datetimes/test_datetime.py b/pandas/tests/indexes/datetimes/test_datetime.py
index e9176e749564e..05678b0c8dd45 100644
--- a/pandas/tests/indexes/datetimes/test_datetime.py
+++ b/pandas/tests/indexes/datetimes/test_datetime.py
@@ -477,10 +477,4 @@ def test_factorize_dst(self):
     ])
     def test_unique(self, arr, expected):
         result = arr.unique()
-
-        if isinstance(expected, np.ndarray):
-            tm.assert_numpy_array_equal(result, expected)
-        if isinstance(expected, pd.Series):
-            tm.assert_series_equal(result, expected)
-        if isinstance(expected, pd.DatetimeIndex):
-            tm.assert_index_equal(result, expected)
+        tm.assert_index_equal(result, expected)

From f8e29b918f7b4cc306ff7b18efa549e17aedbbe9 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 09:53:55 -0600
Subject: [PATCH 31/36] lint

---
 pandas/core/base.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index 8081e20faaeb3..cf48b419b7df1 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -13,7 +13,6 @@
     is_list_like,
     is_scalar,
     is_datetimelike,
-    is_categorical_dtype,
     is_extension_type,
     is_extension_array_dtype)
 

From 0cd9faa5b42df01c96a8dddb7f7a73cea32d0a91 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:04:50 -0600
Subject: [PATCH 32/36] REF: Use _values for size and shape

---
 pandas/core/base.py              |  4 ++--
 pandas/core/indexes/datetimes.py | 14 ++++++++++++++
 2 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index cf48b419b7df1..f6f1ba982e1d9 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -711,7 +711,7 @@ def transpose(self, *args, **kwargs):
     @property
     def shape(self):
         """ return a tuple of the shape of the underlying data """
-        return self._ndarray_values.shape
+        return self._values.shape
 
     @property
     def ndim(self):
@@ -754,7 +754,7 @@ def strides(self):
     @property
     def size(self):
         """ return the number of elements in the underlying data """
-        return self._ndarray_values.size
+        return self._values.size
 
     @property
     def flags(self):
diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 22ce690b3d420..689610af7603f 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -694,6 +694,20 @@ def tzinfo(self):
         """
         return self.tz
 
+    @property
+    def size(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.size
+
+    @property
+    def shape(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.shape
+
     @cache_readonly
     def _timezone(self):
         """ Comparable timezone both for pytz / dateutil"""

From 8fcdb7040345e1d0017367695354d9c858c71e09 Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:09:13 -0600
Subject: [PATCH 33/36] PERF: Implement size, shape for IntervalIndex

---
 pandas/core/indexes/interval.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py
index 3bf783b5a2faa..d431ea1e51e31 100644
--- a/pandas/core/indexes/interval.py
+++ b/pandas/core/indexes/interval.py
@@ -680,6 +680,16 @@ def length(self):
                    'e.g. Intervals with string endpoints')
             raise TypeError(msg)
 
+    @property
+    def size(self):
+        # Avoid materializing self.values
+        return self.left.size
+
+    @property
+    def shape(self):
+        # Avoid materializing self.values
+        return self.left.shape
+
     def __len__(self):
         return len(self.left)
 

From 34a6a22e2255eb11e5c6b6c5478350fb84ce656e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 13:11:00 -0600
Subject: [PATCH 34/36] PERF: Avoid materializing values for PeriodIndex shape,
 size

---
 pandas/core/indexes/period.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py
index e90d3827fe84e..8f2d7d382a16e 100644
--- a/pandas/core/indexes/period.py
+++ b/pandas/core/indexes/period.py
@@ -477,6 +477,16 @@ def _to_embed(self, keep_tz=False, dtype=None):
 
         return self.astype(object).values
 
+    @property
+    def size(self):
+        # Avoid materializing self._values
+        return self._ndarray_values.size
+
+    @property
+    def shape(self):
+        # Avoid materializing self._values
+        return self._ndarray_values.shape
+
     @property
     def _formatter_func(self):
         return lambda x: "'%s'" % x

From d6e8051d1ebab7cf99bd7ac23eea348d0e3a0d4c Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Mon, 12 Feb 2018 20:55:19 -0600
Subject: [PATCH 35/36] Cleanup

---
 pandas/core/base.py         | 3 +--
 pandas/core/indexes/base.py | 6 +++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/pandas/core/base.py b/pandas/core/base.py
index f6f1ba982e1d9..0ca029ffd4c25 100644
--- a/pandas/core/base.py
+++ b/pandas/core/base.py
@@ -744,7 +744,7 @@ def itemsize(self):
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self.values.nbytes
+        return self._values.nbytes
 
     @property
     def strides(self):
@@ -988,7 +988,6 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     def unique(self):
         values = self._values
 
-        # TODO: Make unique part of the ExtensionArray interface.
         if hasattr(values, 'unique'):
 
             result = values.unique()
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index a09a4c59a819a..be7c1624936bf 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -599,7 +599,7 @@ def values(self):
     @property
     def _values(self):
         # type: () -> Union[ExtensionArray, Index]
-        # TODO: remove index types as they become is extension arrays
+        # TODO(EA): remove index types as they become extension arrays
         """The best array representation.
 
         This is an ndarray, ExtensionArray, or Index subclass. This differs
@@ -2264,7 +2264,7 @@ def union(self, other):
             other = other.astype('O')
             return this.union(other)
 
-        # TODO: setops-refactor, clean all this up
+        # TODO(EA): setops-refactor, clean all this up
         if is_period_dtype(self) or is_datetime64tz_dtype(self):
             lvals = self._ndarray_values
         else:
@@ -2357,7 +2357,7 @@ def intersection(self, other):
             other = other.astype('O')
             return this.intersection(other)
 
-        # TODO: setops-refactor, clean all this up
+        # TODO(EA): setops-refactor, clean all this up
         if is_period_dtype(self):
             lvals = self._ndarray_values
         else:

From 3af8a21ea0e13ba5fc73db464f6e327552c71b0e Mon Sep 17 00:00:00 2001
From: Tom Augspurger <tom.w.augspurger@gmail.com>
Date: Tue, 13 Feb 2018 05:54:27 -0600
Subject: [PATCH 36/36] Override nbytes

---
 pandas/core/indexes/datetimes.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py
index 689610af7603f..cc9ce1f3fd5eb 100644
--- a/pandas/core/indexes/datetimes.py
+++ b/pandas/core/indexes/datetimes.py
@@ -708,6 +708,13 @@ def shape(self):
         # for TZ-aware
         return self._ndarray_values.shape
 
+    @property
+    def nbytes(self):
+        # TODO: Remove this when we have a DatetimeTZArray
+        # Necessary to avoid recursion error since DTI._values is a DTI
+        # for TZ-aware
+        return self._ndarray_values.nbytes
+
     @cache_readonly
     def _timezone(self):
         """ Comparable timezone both for pytz / dateutil"""