pandas-dev · Dr-Irv · Apr 30, 2018 · May 1, 2018 · May 1, 2018 · jorisvandenbossche
diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -53,6 +53,13 @@ class ExtensionArray(object):
     * factorize / _values_for_factorize
     * argsort / _values_for_argsort
 
+    For logical operators, the default is to return a Series of boolean.
+    However, if the underlying ExtensionDtype overrides the logical
+    operators, then the implementer may want to have an ExtensionArray
+    subclass contain the result.  This can be done by changing the property
+    _logical_result from its default value of None to the _from_sequence
+    method of the ExtensionArray subclass.
+
     This class does not inherit from 'abc.ABCMeta' for performance reasons.
     Methods and properties required by the interface raise
     ``pandas.errors.AbstractMethodError`` and no ``register`` method is
@@ -567,6 +574,9 @@ def copy(self, deep=False):
         """
         raise AbstractMethodError(self)
 
+    # See documentation above
+    _logical_result = None
+
     # ------------------------------------------------------------------------
     # Block-related methods
     # ------------------------------------------------------------------------
@@ -610,3 +620,14 @@ def _ndarray_values(self):
         used for interacting with our indexers.
         """
         return np.array(self)
+
+    # ------------------------------------------------------------------------
+    # Utilities for use by subclasses
+    # ------------------------------------------------------------------------
+    def is_sequence_of_dtype(self, seq):
+        """
+        Given a sequence, determine whether all members have the appropriate
+        type for this instance of an ExtensionArray
+        """
+        thistype = self.dtype.type
+        return all(isinstance(i, thistype) for i in seq)
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -3081,13 +3081,23 @@ def get_value(self, series, key):
         # if we have something that is Index-like, then
         # use this, e.g. DatetimeIndex
         s = getattr(series, '_values', None)
-        if isinstance(s, (ExtensionArray, Index)) and is_scalar(key):
-            try:
-                return s[key]
-            except (IndexError, ValueError):
+        if is_scalar(key):
+            if isinstance(s, Index):
+                try:
+                    return s[key]
+                except (IndexError, ValueError):
 
-                # invalid type as an indexer
-                pass
+                    # invalid type as an indexer
+                    pass
+            elif isinstance(s, ExtensionArray):
+                try:
+                    # This should call the ExtensionArray __getitem__
+                    iloc = self.get_loc(key)
+                    return s[iloc]
+                except (IndexError, ValueError):
+
+                    # invalid type as an indexer
+                    pass
 
         s = com._values_from_object(series)
         k = com._values_from_object(key)

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -6,6 +6,7 @@
 # necessary to enforce truediv in Python 2.X
 from __future__ import division
 import operator
+import inspect
 
 import numpy as np
 import pandas as pd
@@ -30,7 +31,7 @@
     is_bool_dtype,
     is_list_like,
     is_scalar,
-    _ensure_object)
+    _ensure_object, is_extension_array_dtype)
 from pandas.core.dtypes.cast import (
     maybe_upcast_putmask, find_common_type,
     construct_1d_object_array_from_listlike)
@@ -990,6 +991,93 @@ def _construct_divmod_result(left, result, index, name, dtype):
     )
 
 
+def dispatch_to_extension_op(left, right, op_name=None, is_logical=False):
+    """
+    Assume that left is a Series backed by an ExtensionArray,
+    apply the operator defined by op_name.
+    """
+
+    method = getattr(left.values, op_name, None)
+    deflen = len(left)
+    excons = type(left.values)._from_sequence
+    exclass = type(left.values)
+    testseq = left.values
+
+    if is_logical:
+        if exclass._logical_result is not None:
+            excons = exclass._logical_result
+        else:
+            excons = None  # Indicates boolean
+
+    # The idea here is as follows.  First we see if the op is
+    # defined in the ExtensionArray subclass, and returns a
+    # result that is not NotImplemented.  If so, we use that
+    # result. If that fails, then we try an
+    # element by element operator, invoking the operator
+    # on each element
+
+    # First see if the extension array object supports the op
+    res = NotImplemented
+    if method is not None and inspect.ismethod(method):
+        rvalues = right
+        if is_extension_array_dtype(right) and isinstance(right, ABCSeries):
+            rvalues = right.values
+        try:
+            res = method(rvalues)
+        except TypeError:
+            pass
+        except Exception as e:
+            raise e
+
+    def convert_values(parm):
+        if is_extension_array_dtype(parm):
+            ovalues = parm.values
+        elif is_list_like(parm):
+            ovalues = parm
+        else:  # Assume its an object
+            ovalues = [parm] * deflen
+        return ovalues
+
+    if res is NotImplemented:
+        # Try it on each element.  Support operation to another
+        # ExtensionArray, or something that is list like, or
+        # a single object.  This allows a result of an operator
+        # to be an object or any type
+        lvalues = convert_values(left)
+        rvalues = convert_values(right)
+
+        # Get the method for each object.
+        def callfunc(a, b):
+            f = getattr(a, op_name, None)
+            if f is not None:
+                return f(b)
+            else:
+                return NotImplemented
+        res = [callfunc(a, b) for (a, b) in zip(lvalues, rvalues)]
+
+        # We can't use (NotImplemented in res) because the
+        # results might be objects that have overridden __eq__
+        if any(isinstance(r, type(NotImplemented)) for r in res):
+            msg = "invalid operation {opn} between {one} and {two}"
+            raise TypeError(msg.format(opn=op_name,
+                                       one=type(lvalues),
+                                       two=type(rvalues)))
+
+    # At this point we have the result
+    # always return a full value series here
+    res_values = com._values_from_object(res)
+    if excons is not None:
+        if testseq.is_sequence_of_dtype(res_values):
+            # Convert to the ExtensionArray type if each result is of that
+            # type.  If _logical_result was not None, this will then use
+            # the function set there to return an appropriate result
+            res_values = excons(res_values)
+
+    res_name = get_op_result_name(left, right)
+    return left._constructor(res_values, index=left.index,
+                             name=res_name)
+
+
 def _arith_method_SERIES(cls, op, special):
     """
     Wrapper function for Series arithmetic operations, to avoid
@@ -1058,6 +1146,9 @@ def wrapper(left, right):
             raise TypeError("{typ} cannot perform the operation "
                             "{op}".format(typ=type(left).__name__, op=str_rep))
 
+        elif is_extension_array_dtype(left):
+            return dispatch_to_extension_op(left, right, op_name)
+
         lvalues = left.values
         rvalues = right
         if isinstance(rvalues, ABCSeries):
@@ -1208,6 +1299,9 @@ def wrapper(self, other, axis=None):
             return self._constructor(res_values, index=self.index,
                                      name=res_name)
 
+        elif is_extension_array_dtype(self):
+            return dispatch_to_extension_op(self, other, op_name, True)
+
         elif isinstance(other, ABCSeries):
             # By this point we have checked that self._indexed_same(other)
             res_values = na_op(self.values, other.values)

diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2185,18 +2185,34 @@ def _binop(self, other, func, level=None, fill_value=None):
 
         this_vals, other_vals = ops.fill_binop(this.values, other.values,
                                                fill_value)
-
-        with np.errstate(all='ignore'):
-            result = func(this_vals, other_vals)
         name = ops.get_op_result_name(self, other)
+
+        if is_extension_array_dtype(this) or is_extension_array_dtype(other):
+            try:
+                result = func(this_vals, other_vals)
+            except TypeError:
+                result = NotImplemented
+            except Exception as e:
+                raise e
+
+            if result is NotImplemented:
+                result = [func(a, b) for a, b in zip(this_vals, other_vals)]
+                if is_extension_array_dtype(this):
+                    excons = type(this_vals)._from_sequence
+                else:
+                    excons = type(other_vals)._from_sequence
+                result = excons(result)
+        else:
+            with np.errstate(all='ignore'):
+                result = func(this_vals, other_vals)
         result = self._constructor(result, index=new_index, name=name)
         result = result.__finalize__(self)
         if name is None:
             # When name is None, __finalize__ overwrites current name
             result.name = None
         return result
 
-    def combine(self, other, func, fill_value=np.nan):
+    def combine(self, other, func, fill_value=None):
         """
         Perform elementwise binary operation on two Series using given function
         with optional fill value when an index is missing from one Series or
@@ -2208,6 +2224,9 @@ def combine(self, other, func, fill_value=np.nan):
         func : function
             Function that takes two scalars as inputs and return a scalar
         fill_value : scalar value
+            The default specifies to use np.nan unless self is
+            backed by ExtensionArray, in which case the ExtensionArray
+            na_value is used.
 
         Returns
         -------
@@ -2227,20 +2246,33 @@ def combine(self, other, func, fill_value=np.nan):
         Series.combine_first : Combine Series values, choosing the calling
             Series's values first
         """
+        self_is_ext = is_extension_array_dtype(self)
+        if fill_value is None:
+            if self_is_ext:
+                fill_value = self.dtype.na_value
+            else:
+                fill_value = np.nan
         if isinstance(other, Series):
             new_index = self.index.union(other.index)
             new_name = ops.get_op_result_name(self, other)
-            new_values = np.empty(len(new_index), dtype=self.dtype)
+            new_values = []
             for i, idx in enumerate(new_index):
                 lv = self.get(idx, fill_value)
                 rv = other.get(idx, fill_value)
                 with np.errstate(all='ignore'):
-                    new_values[i] = func(lv, rv)
+                    new_values.append(func(lv, rv))
         else:
             new_index = self.index
-            with np.errstate(all='ignore'):
-                new_values = func(self._values, other)
+            if not self_is_ext:
+                with np.errstate(all='ignore'):
+                    new_values = func(self._values, other)
+            else:
+                new_values = [func(lv, other) for lv in self._values]
             new_name = self.name
+
+        if (self_is_ext and self.values.is_sequence_of_dtype(new_values)):
+            new_values = self._values._from_sequence(new_values)
+
         return self._constructor(new_values, index=new_index, name=new_name)
 
     def combine_first(self, other):

diff --git a/pandas/tests/extension/base/getitem.py b/pandas/tests/extension/base/getitem.py
@@ -117,6 +117,18 @@ def test_getitem_slice(self, data):
         result = data[slice(1)]  # scalar
         assert isinstance(result, type(data))
 
+    def test_get(self, data):
+        # GH 20882
+        s = pd.Series(data, index=[2 * i for i in range(len(data))])
+        assert s.get(4) == s.iloc[2]
+
+        result = s.get([4, 6])
+        expected = s.iloc[[2, 3]]
+        self.assert_series_equal(result, expected)
+
+        s = pd.Series(data[:6], index=list('abcdef'))
+        assert s.get('c') == s.iloc[2]
+
     def test_take_sequence(self, data):
         result = pd.Series(data)[[0, 1, 3]]
         assert result.iloc[0] == data[0]

diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -2,6 +2,9 @@
 
 import pytest
 import numpy as np
+import pandas as pd
+
+import pandas.util.testing as tm
 
 from pandas.api.types import CategoricalDtype
 from pandas import Categorical
@@ -157,3 +160,13 @@ def test_value_counts(self, all_data, dropna):
 
 class TestCasting(base.BaseCastingTests):
     pass
+
+
+def test_combine():
+    orig_data1 = make_data()
+    orig_data2 = make_data()
+    s1 = pd.Series(Categorical(orig_data1, ordered=True))
+    s2 = pd.Series(Categorical(orig_data2, ordered=True))
+    result = s1.combine(s2, lambda x1, x2: x1 <= x2)
+    expected = pd.Series([a <= b for (a, b) in zip(orig_data1, orig_data2)])
+    tm.assert_series_equal(result, expected)