pandas-dev · TomAugspurger · Feb 13, 2018 · Feb 3, 2018 · Feb 6, 2018 · Feb 7, 2018
diff --git a/doc/source/internals.rst b/doc/source/internals.rst
@@ -89,6 +89,25 @@ not check (or care) whether the levels themselves are sorted. Fortunately, the
 constructors ``from_tuples`` and ``from_arrays`` ensure that this is true, but
 if you compute the levels and labels yourself, please be careful.
 
+Values
+~~~~~~
+
+Pandas extends NumPy's type system with custom types, like ``Categorical`` or
+datetimes with a timezone, so we have multiple notions of "values". For 1-D
+containers (``Index`` classes and ``Series``) we have the following convention:
+
+* ``cls._ndarray_values`` is *always* a NumPy ``ndarray``. Ideally,
+  ``_ndarray_values`` is cheap to compute. For example, for a ``Categorical``,
+  this returns the codes, not the array of objects.
+* ``cls._values`` refers is the "best possible" array. This could be an
+  ``ndarray``, ``ExtensionArray``, or in ``Index`` subclass (note: we're in the
+  process of removing the index subclasses here so that it's always an
+  ``ndarray`` or ``ExtensionArray``).
+
+So, for example, ``Series[category]._values`` is a ``Categorical``, while
+``Series[category]._ndarray_values`` is the underlying codes.
+
+
 .. _ref-subclassing-pandas:
 
 Subclassing pandas Data Structures

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -266,3 +266,15 @@ def _can_hold_na(self):
         Setting this to false will optimize some operations like fillna.
         """
         return True
+
+    @property
+    def _ndarray_values(self):
+        # type: () -> np.ndarray
+        """Internal pandas method for lossy conversion to a NumPy ndarray.
+
+        This method is not part of the pandas interface.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+        """
+        return np.array(self)
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -410,6 +410,10 @@ def dtype(self):
         """The :class:`~pandas.api.types.CategoricalDtype` for this instance"""
         return self._dtype
 
+    @property
+    def _ndarray_values(self):
+        return self.codes
+
     @property
     def _constructor(self):
         return Categorical

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -13,7 +13,9 @@
     is_list_like,
     is_scalar,
     is_datetimelike,
-    is_extension_type)
+    is_categorical_dtype,
+    is_extension_type,
+    is_extension_array_dtype)
 
 from pandas.util._validators import validate_bool_kwarg
 
@@ -710,7 +712,7 @@ def transpose(self, *args, **kwargs):
     @property
     def shape(self):
         """ return a tuple of the shape of the underlying data """
-        return self._values.shape
+        return self._ndarray_values.shape
 
     @property
     def ndim(self):
@@ -738,22 +740,22 @@ def data(self):
     @property
     def itemsize(self):
         """ return the size of the dtype of the item of the underlying data """
-        return self._values.itemsize
+        return self._ndarray_values.itemsize
 
     @property
     def nbytes(self):
         """ return the number of bytes in the underlying data """
-        return self._values.nbytes
+        return self._ndarray_values.nbytes
 
     @property
     def strides(self):
         """ return the strides of the underlying data """
-        return self._values.strides
+        return self._ndarray_values.strides
 
     @property
     def size(self):
         """ return the number of elements in the underlying data """
-        return self._values.size
+        return self._ndarray_values.size
 
     @property
     def flags(self):
@@ -768,8 +770,17 @@ def base(self):
         return self.values.base
 
     @property
-    def _values(self):
-        """ the internal implementation """
+    def _ndarray_values(self):
+        """The data as an ndarray, possibly losing information.
+
+        The expectation is that this is cheap to compute, and is primarily
+        used for interacting with our indexers.
+
+        - categorical -> codes
+        """
+        # type: () -> np.ndarray
+        if is_extension_array_dtype(self):
+            return self.values._ndarray_values
         return self.values
 
     @property
@@ -978,7 +989,9 @@ def value_counts(self, normalize=False, sort=True, ascending=False,
     def unique(self):
         values = self._values
 
+        # TODO: Make unique part of the ExtensionArray interface.
         if hasattr(values, 'unique'):
+
             result = values.unique()
         else:
             from pandas.core.algorithms import unique1d

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -927,7 +927,7 @@ def try_timedelta(v):
         # will try first with a string & object conversion
         from pandas import to_timedelta
         try:
-            return to_timedelta(v)._values.reshape(shape)
+            return to_timedelta(v)._ndarray_values.reshape(shape)
         except Exception:
             return v.reshape(shape)
 

diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py
@@ -1709,7 +1709,7 @@ def is_extension_array_dtype(arr_or_dtype):
     from pandas.core.arrays import ExtensionArray
 
     # we want to unpack series, anything else?
-    if isinstance(arr_or_dtype, ABCSeries):
+    if isinstance(arr_or_dtype, (ABCIndexClass, ABCSeries)):
         arr_or_dtype = arr_or_dtype._values
     return isinstance(arr_or_dtype, (ExtensionDtype, ExtensionArray))
 

diff --git a/pandas/core/dtypes/concat.py b/pandas/core/dtypes/concat.py
@@ -480,20 +480,22 @@ def _concat_datetimetz(to_concat, name=None):
 
 def _concat_index_same_dtype(indexes, klass=None):
     klass = klass if klass is not None else indexes[0].__class__
-    return klass(np.concatenate([x._values for x in indexes]))
+    return klass(np.concatenate([x._ndarray_values for x in indexes]))
 
 
 def _concat_index_asobject(to_concat, name=None):
     """
     concat all inputs as object. DatetimeIndex, TimedeltaIndex and
     PeriodIndex are converted to object dtype before concatenation
     """
+    from pandas import Index
+    from pandas.core.arrays import ExtensionArray
 
-    klasses = ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex
+    klasses = (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex,
+               ExtensionArray)
     to_concat = [x.astype(object) if isinstance(x, klasses) else x
                  for x in to_concat]
 
-    from pandas import Index
     self = to_concat[0]
     attribs = self._get_attributes_dict()
     attribs['name'] = name