Use align_method in comp_method_FRAME (#22880)

Closes gh-20090
pandas-dev · Oct 13, 2018 · e96c691 · e96c691
1 parent 241bde1
commit e96c691
Show file tree

Hide file tree

Showing 7 changed files with 123 additions and 175 deletions.
diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt
@@ -510,6 +510,88 @@ Previous Behavior:
         0
     0 NaT
 
+.. _whatsnew_0240.api.dataframe_cmp_broadcasting:
+
+DataFrame Comparison Operations Broadcasting Changes
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Previously, the broadcasting behavior of :class:`DataFrame` comparison
+operations (``==``, ``!=``, ...) was inconsistent with the behavior of
+arithmetic operations (``+``, ``-``, ...).  The behavior of the comparison
+operations has been changed to match the arithmetic operations in these cases.
+(:issue:`22880`)
+
+The affected cases are:
+
+- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`).
+- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`.
+- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`).
+
+Previous Behavior:
+
+.. code-block:: ipython
+
+   In [3]: arr = np.arange(6).reshape(3, 2)
+   In [4]: df = pd.DataFrame(arr)
+
+   In [5]: df == arr[[0], :]
+      ...: # comparison previously broadcast where arithmetic would raise
+   Out[5]:
+          0      1
+   0   True   True
+   1  False  False
+   2  False  False
+   In [6]: df + arr[[0], :]
+   ...
+   ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)
+
+   In [7]: df == (1, 2)
+      ...: # length matches number of columns;
+      ...: # comparison previously raised where arithmetic would broadcast
+   ...
+   ValueError: Invalid broadcasting comparison [(1, 2)] with block values
+   In [8]: df + (1, 2)
+   Out[8]:
+      0  1
+   0  1  3
+   1  3  5
+   2  5  7
+
+   In [9]: df == (1, 2, 3)
+      ...: # length matches number of rows
+      ...: # comparison previously broadcast where arithmetic would raise
+   Out[9]:
+          0      1
+   0  False   True
+   1   True  False
+   2  False  False
+   In [10]: df + (1, 2, 3)
+   ...
+   ValueError: Unable to coerce to Series, length must be 2: given 3
+
+*Current Behavior*:
+
+.. ipython:: python
+   :okexcept:
+
+   arr = np.arange(6).reshape(3, 2)
+   df = pd.DataFrame(arr)
+
+.. ipython:: python
+   # Comparison operations and arithmetic operations both broadcast.
+   df == arr[[0], :]
+   df + arr[[0], :]
+
+.. ipython:: python
+   # Comparison operations and arithmetic operations both broadcast.
+   df == (1, 2)
+   df + (1, 2)
+
+.. ipython:: python
+   :okexcept:
+   # Comparison operations and arithmetic opeartions both raise ValueError.
+   df == (1, 2, 3)
+   df + (1, 2, 3)
+
 
 .. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -4948,13 +4948,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
         return ops.dispatch_to_series(left, right, func, axis="columns")
 
     def _combine_const(self, other, func, errors='raise', try_cast=True):
-        if lib.is_scalar(other) or np.ndim(other) == 0:
-            return ops.dispatch_to_series(self, other, func)
-
-        new_data = self._data.eval(func=func, other=other,
-                                   errors=errors,
-                                   try_cast=try_cast)
-        return self._constructor(new_data)
+        assert lib.is_scalar(other) or np.ndim(other) == 0
+        return ops.dispatch_to_series(self, other, func)
 
     def combine(self, other, func, fill_value=None, overwrite=True):
         """

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -1313,145 +1313,6 @@ def shift(self, periods, axis=0, mgr=None):
 
         return [self.make_block(new_values)]
 
-    def eval(self, func, other, errors='raise', try_cast=False, mgr=None):
-        """
-        evaluate the block; return result block from the result
-
-        Parameters
-        ----------
-        func  : how to combine self, other
-        other : a ndarray/object
-        errors : str, {'raise', 'ignore'}, default 'raise'
-            - ``raise`` : allow exceptions to be raised
-            - ``ignore`` : suppress exceptions. On error return original object
-
-        try_cast : try casting the results to the input type
-
-        Returns
-        -------
-        a new block, the result of the func
-        """
-        orig_other = other
-        values = self.values
-
-        other = getattr(other, 'values', other)
-
-        # make sure that we can broadcast
-        is_transposed = False
-        if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
-            if values.ndim != other.ndim:
-                is_transposed = True
-            else:
-                if values.shape == other.shape[::-1]:
-                    is_transposed = True
-                elif values.shape[0] == other.shape[-1]:
-                    is_transposed = True
-                else:
-                    # this is a broadcast error heree
-                    raise ValueError(
-                        "cannot broadcast shape [{t_shape}] with "
-                        "block values [{oth_shape}]".format(
-                            t_shape=values.T.shape, oth_shape=other.shape))
-
-        transf = (lambda x: x.T) if is_transposed else (lambda x: x)
-
-        # coerce/transpose the args if needed
-        try:
-            values, values_mask, other, other_mask = self._try_coerce_args(
-                transf(values), other)
-        except TypeError:
-            block = self.coerce_to_target_dtype(orig_other)
-            return block.eval(func, orig_other,
-                              errors=errors,
-                              try_cast=try_cast, mgr=mgr)
-
-        # get the result, may need to transpose the other
-        def get_result(other):
-
-            # avoid numpy warning of comparisons again None
-            if other is None:
-                result = not func.__name__ == 'eq'
-
-            # avoid numpy warning of elementwise comparisons to object
-            elif is_numeric_v_string_like(values, other):
-                result = False
-
-            # avoid numpy warning of elementwise comparisons
-            elif func.__name__ == 'eq':
-                if is_list_like(other) and not isinstance(other, np.ndarray):
-                    other = np.asarray(other)
-
-                    # if we can broadcast, then ok
-                    if values.shape[-1] != other.shape[-1]:
-                        return False
-                result = func(values, other)
-            else:
-                result = func(values, other)
-
-            # mask if needed
-            if isinstance(values_mask, np.ndarray) and values_mask.any():
-                result = result.astype('float64', copy=False)
-                result[values_mask] = np.nan
-            if other_mask is True:
-                result = result.astype('float64', copy=False)
-                result[:] = np.nan
-            elif isinstance(other_mask, np.ndarray) and other_mask.any():
-                result = result.astype('float64', copy=False)
-                result[other_mask.ravel()] = np.nan
-
-            return result
-
-        # error handler if we have an issue operating with the function
-        def handle_error():
-
-            if errors == 'raise':
-                # The 'detail' variable is defined in outer scope.
-                raise TypeError(
-                    'Could not operate {other!r} with block values '
-                    '{detail!s}'.format(other=other, detail=detail))  # noqa
-            else:
-                # return the values
-                result = np.empty(values.shape, dtype='O')
-                result.fill(np.nan)
-                return result
-
-        # get the result
-        try:
-            with np.errstate(all='ignore'):
-                result = get_result(other)
-
-        # if we have an invalid shape/broadcast error
-        # GH4576, so raise instead of allowing to pass through
-        except ValueError as detail:
-            raise
-        except Exception as detail:
-            result = handle_error()
-
-        # technically a broadcast error in numpy can 'work' by returning a
-        # boolean False
-        if not isinstance(result, np.ndarray):
-            if not isinstance(result, np.ndarray):
-
-                # differentiate between an invalid ndarray-ndarray comparison
-                # and an invalid type comparison
-                if isinstance(values, np.ndarray) and is_list_like(other):
-                    raise ValueError(
-                        'Invalid broadcasting comparison [{other!r}] with '
-                        'block values'.format(other=other))
-
-                raise TypeError('Could not compare [{other!r}] '
-                                'with block values'.format(other=other))
-
-        # transpose if needed
-        result = transf(result)
-
-        # try to cast if requested
-        if try_cast:
-            result = self._try_cast_result(result)
-
-        result = _block_shape(result, ndim=self.ndim)
-        return [self.make_block(result)]
-
     def where(self, other, cond, align=True, errors='raise',
               try_cast=False, axis=0, transpose=False, mgr=None):
         """

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
                 align_keys = ['new', 'mask']
             else:
                 align_keys = ['mask']
-        elif f == 'eval':
-            align_copy = False
-            align_keys = ['other']
         elif f == 'fillna':
             # fillna internally does putmask, maybe it's better to do this
             # at mgr, not block level?
@@ -511,9 +508,6 @@ def isna(self, func, **kwargs):
     def where(self, **kwargs):
         return self.apply('where', **kwargs)
 
-    def eval(self, **kwargs):
-        return self.apply('eval', **kwargs)
-
     def quantile(self, **kwargs):
         return self.reduction('quantile', **kwargs)
 

diff --git a/pandas/core/ops.py b/pandas/core/ops.py
@@ -1934,6 +1934,9 @@ def _comp_method_FRAME(cls, func, special):
 
     @Appender('Wrapper for comparison method {name}'.format(name=op_name))
     def f(self, other):
+
+        other = _align_method_FRAME(self, other, axis=None)
+
         if isinstance(other, ABCDataFrame):
             # Another DataFrame
             if not self._indexed_same(other):

diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py
@@ -48,15 +48,20 @@ def test_mixed_comparison(self):
         assert result.all().all()
 
     def test_df_boolean_comparison_error(self):
-        # GH 4576
-        # boolean comparisons with a tuple/list give unexpected results
+        # GH#4576, GH#22880
+        # comparing DataFrame against list/tuple with len(obj) matching
+        #  len(df.columns) is supported as of GH#22800
         df = pd.DataFrame(np.arange(6).reshape((3, 2)))
 
-        # not shape compatible
-        with pytest.raises(ValueError):
-            df == (2, 2)
-        with pytest.raises(ValueError):
-            df == [2, 2]
+        expected = pd.DataFrame([[False, False],
+                                 [True, False],
+                                 [False, False]])
+
+        result = df == (2, 2)
+        tm.assert_frame_equal(result, expected)
+
+        result = df == [2, 2]
+        tm.assert_frame_equal(result, expected)
 
     def test_df_float_none_comparison(self):
         df = pd.DataFrame(np.random.randn(8, 3), index=range(8),

diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py
@@ -752,8 +752,9 @@ def test_comp(func):
             result = func(df1, df2)
             tm.assert_numpy_array_equal(result.values,
                                         func(df1.values, df2.values))
+
             with tm.assert_raises_regex(ValueError,
-                                        'Wrong number of dimensions'):
+                                        'dim must be <= 2'):
                 func(df1, ndim_5)
 
             result2 = func(self.simple, row)
@@ -804,42 +805,49 @@ def test_boolean_comparison(self):
         result = df.values > b
         assert_numpy_array_equal(result, expected.values)
 
-        result = df > l
-        assert_frame_equal(result, expected)
+        msg1d = 'Unable to coerce to Series, length must be 2: given 3'
+        msg2d = 'Unable to coerce to DataFrame, shape must be'
+        msg2db = 'operands could not be broadcast together with shapes'
+        with tm.assert_raises_regex(ValueError, msg1d):
+            # wrong shape
+            df > l
 
-        result = df > tup
-        assert_frame_equal(result, expected)
+        with tm.assert_raises_regex(ValueError, msg1d):
+            # wrong shape
+            result = df > tup
 
+        # broadcasts like ndarray (GH#23000)
         result = df > b_r
         assert_frame_equal(result, expected)
 
         result = df.values > b_r
         assert_numpy_array_equal(result, expected.values)
 
-        with pytest.raises(ValueError):
+        with tm.assert_raises_regex(ValueError, msg2d):
             df > b_c
 
-        with pytest.raises(ValueError):
+        with tm.assert_raises_regex(ValueError, msg2db):
             df.values > b_c
 
         # ==
         expected = DataFrame([[False, False], [True, False], [False, False]])
         result = df == b
         assert_frame_equal(result, expected)
 
-        result = df == l
-        assert_frame_equal(result, expected)
+        with tm.assert_raises_regex(ValueError, msg1d):
+            result = df == l
 
-        result = df == tup
-        assert_frame_equal(result, expected)
+        with tm.assert_raises_regex(ValueError, msg1d):
+            result = df == tup
 
+        # broadcasts like ndarray (GH#23000)
         result = df == b_r
         assert_frame_equal(result, expected)
 
         result = df.values == b_r
         assert_numpy_array_equal(result, expected.values)
 
-        with pytest.raises(ValueError):
+        with tm.assert_raises_regex(ValueError, msg2d):
             df == b_c
 
         assert df.values.shape != b_c.shape
@@ -850,11 +858,11 @@ def test_boolean_comparison(self):
         expected.index = df.index
         expected.columns = df.columns
 
-        result = df == l
-        assert_frame_equal(result, expected)
+        with tm.assert_raises_regex(ValueError, msg1d):
+            result = df == l
 
-        result = df == tup
-        assert_frame_equal(result, expected)
+        with tm.assert_raises_regex(ValueError, msg1d):
+            result = df == tup
 
     def test_combine_generic(self):
         df1 = self.frame