diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 69ca0d7358066c..012a5051353974 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -511,6 +511,88 @@ Previous Behavior: 0 0 NaT +.. _whatsnew_0240.api.dataframe_cmp_broadcasting: + +DataFrame Comparison Operations Broadcasting Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the broadcasting behavior of :class:`DataFrame` comparison +operations (``==``, ``!=``, ...) was inconsistent with the behavior of +arithmetic operations (``+``, ``-``, ...). The behavior of the comparison +operations has been changed to match the arithmetic operations in these cases. +(:issue:`22880`) + +The affected cases are: + +- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`). +- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`. +- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`). + +Previous Behavior: + +.. code-block:: ipython + + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + + In [5]: df == arr[[0], :] + ...: # comparison previously broadcast where arithmetic would raise + Out[5]: + 0 1 + 0 True True + 1 False False + 2 False False + In [6]: df + arr[[0], :] + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + + In [7]: df == (1, 2) + ...: # length matches number of columns; + ...: # comparison previously raised where arithmetic would broadcast + ... + ValueError: Invalid broadcasting comparison [(1, 2)] with block values + In [8]: df + (1, 2) + Out[8]: + 0 1 + 0 1 3 + 1 3 5 + 2 5 7 + + In [9]: df == (1, 2, 3) + ...: # length matches number of rows + ...: # comparison previously broadcast where arithmetic would raise + Out[9]: + 0 1 + 0 False True + 1 True False + 2 False False + In [10]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +*Current Behavior*: + +.. ipython:: python + :okexcept: + + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + +.. ipython:: python + # Comparison operations and arithmetic operations both broadcast. + df == arr[[0], :] + df + arr[[0], :] + +.. ipython:: python + # Comparison operations and arithmetic operations both broadcast. + df == (1, 2) + df + (1, 2) + +.. ipython:: python + :okexcept: + # Comparison operations and arithmetic opeartions both raise ValueError. + df == (1, 2, 3) + df + (1, 2, 3) + .. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index e9be7a3e9afb80..d612e96ec0db2f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4948,13 +4948,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return ops.dispatch_to_series(left, right, func, axis="columns") def _combine_const(self, other, func, errors='raise', try_cast=True): - if lib.is_scalar(other) or np.ndim(other) == 0: - return ops.dispatch_to_series(self, other, func) - - new_data = self._data.eval(func=func, other=other, - errors=errors, - try_cast=try_cast) - return self._constructor(new_data) + assert lib.is_scalar(other) or np.ndim(other) == 0 + return ops.dispatch_to_series(self, other, func) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 93930fd844b950..290de0539db830 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1313,145 +1313,6 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values)] - def eval(self, func, other, errors='raise', try_cast=False, mgr=None): - """ - evaluate the block; return result block from the result - - Parameters - ---------- - func : how to combine self, other - other : a ndarray/object - errors : str, {'raise', 'ignore'}, default 'raise' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object - - try_cast : try casting the results to the input type - - Returns - ------- - a new block, the result of the func - """ - orig_other = other - values = self.values - - other = getattr(other, 'values', other) - - # make sure that we can broadcast - is_transposed = False - if hasattr(other, 'ndim') and hasattr(values, 'ndim'): - if values.ndim != other.ndim: - is_transposed = True - else: - if values.shape == other.shape[::-1]: - is_transposed = True - elif values.shape[0] == other.shape[-1]: - is_transposed = True - else: - # this is a broadcast error heree - raise ValueError( - "cannot broadcast shape [{t_shape}] with " - "block values [{oth_shape}]".format( - t_shape=values.T.shape, oth_shape=other.shape)) - - transf = (lambda x: x.T) if is_transposed else (lambda x: x) - - # coerce/transpose the args if needed - try: - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) - except TypeError: - block = self.coerce_to_target_dtype(orig_other) - return block.eval(func, orig_other, - errors=errors, - try_cast=try_cast, mgr=mgr) - - # get the result, may need to transpose the other - def get_result(other): - - # avoid numpy warning of comparisons again None - if other is None: - result = not func.__name__ == 'eq' - - # avoid numpy warning of elementwise comparisons to object - elif is_numeric_v_string_like(values, other): - result = False - - # avoid numpy warning of elementwise comparisons - elif func.__name__ == 'eq': - if is_list_like(other) and not isinstance(other, np.ndarray): - other = np.asarray(other) - - # if we can broadcast, then ok - if values.shape[-1] != other.shape[-1]: - return False - result = func(values, other) - else: - result = func(values, other) - - # mask if needed - if isinstance(values_mask, np.ndarray) and values_mask.any(): - result = result.astype('float64', copy=False) - result[values_mask] = np.nan - if other_mask is True: - result = result.astype('float64', copy=False) - result[:] = np.nan - elif isinstance(other_mask, np.ndarray) and other_mask.any(): - result = result.astype('float64', copy=False) - result[other_mask.ravel()] = np.nan - - return result - - # error handler if we have an issue operating with the function - def handle_error(): - - if errors == 'raise': - # The 'detail' variable is defined in outer scope. - raise TypeError( - 'Could not operate {other!r} with block values ' - '{detail!s}'.format(other=other, detail=detail)) # noqa - else: - # return the values - result = np.empty(values.shape, dtype='O') - result.fill(np.nan) - return result - - # get the result - try: - with np.errstate(all='ignore'): - result = get_result(other) - - # if we have an invalid shape/broadcast error - # GH4576, so raise instead of allowing to pass through - except ValueError as detail: - raise - except Exception as detail: - result = handle_error() - - # technically a broadcast error in numpy can 'work' by returning a - # boolean False - if not isinstance(result, np.ndarray): - if not isinstance(result, np.ndarray): - - # differentiate between an invalid ndarray-ndarray comparison - # and an invalid type comparison - if isinstance(values, np.ndarray) and is_list_like(other): - raise ValueError( - 'Invalid broadcasting comparison [{other!r}] with ' - 'block values'.format(other=other)) - - raise TypeError('Could not compare [{other!r}] ' - 'with block values'.format(other=other)) - - # transpose if needed - result = transf(result) - - # try to cast if requested - if try_cast: - result = self._try_cast_result(result) - - result = _block_shape(result, ndim=self.ndim) - return [self.make_block(result)] - def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2f29f1ae2509fc..1cbc09b4ca51a1 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, align_keys = ['new', 'mask'] else: align_keys = ['mask'] - elif f == 'eval': - align_copy = False - align_keys = ['other'] elif f == 'fillna': # fillna internally does putmask, maybe it's better to do this # at mgr, not block level? @@ -511,9 +508,6 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) - def eval(self, **kwargs): - return self.apply('eval', **kwargs) - def quantile(self, **kwargs): return self.reduction('quantile', **kwargs) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 20559bca9caedf..e894c763ebe03c 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1934,6 +1934,9 @@ def _comp_method_FRAME(cls, func, special): @Appender('Wrapper for comparison method {name}'.format(name=op_name)) def f(self, other): + + other = _align_method_FRAME(self, other, axis=None) + if isinstance(other, ABCDataFrame): # Another DataFrame if not self._indexed_same(other): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index d0eb7cd35b2682..8156c5ea671c2b 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -48,15 +48,20 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # GH 4576 - # boolean comparisons with a tuple/list give unexpected results + # GH#4576, GH#22880 + # comparing DataFrame against list/tuple with len(obj) matching + # len(df.columns) is supported as of GH#22800 df = pd.DataFrame(np.arange(6).reshape((3, 2))) - # not shape compatible - with pytest.raises(ValueError): - df == (2, 2) - with pytest.raises(ValueError): - df == [2, 2] + expected = pd.DataFrame([[False, False], + [True, False], + [False, False]]) + + result = df == (2, 2) + tm.assert_frame_equal(result, expected) + + result = df == [2, 2] + tm.assert_frame_equal(result, expected) def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 433b0f09e13bc1..9c0ef259ab6868 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -752,8 +752,9 @@ def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) + with tm.assert_raises_regex(ValueError, - 'Wrong number of dimensions'): + 'dim must be <= 2'): func(df1, ndim_5) result2 = func(self.simple, row) @@ -804,22 +805,28 @@ def test_boolean_comparison(self): result = df.values > b assert_numpy_array_equal(result, expected.values) - result = df > l - assert_frame_equal(result, expected) + msg1d = 'Unable to coerce to Series, length must be 2: given 3' + msg2d = 'Unable to coerce to DataFrame, shape must be' + msg2db = 'operands could not be broadcast together with shapes' + with tm.assert_raises_regex(ValueError, msg1d): + # wrong shape + df > l - result = df > tup - assert_frame_equal(result, expected) + with tm.assert_raises_regex(ValueError, msg1d): + # wrong shape + result = df > tup + # broadcasts like ndarray (GH#23000) result = df > b_r assert_frame_equal(result, expected) result = df.values > b_r assert_numpy_array_equal(result, expected.values) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2d): df > b_c - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2db): df.values > b_c # == @@ -827,19 +834,20 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - result = df == l - assert_frame_equal(result, expected) + with tm.assert_raises_regex(ValueError, msg1d): + result = df == l - result = df == tup - assert_frame_equal(result, expected) + with tm.assert_raises_regex(ValueError, msg1d): + result = df == tup + # broadcasts like ndarray (GH#23000) result = df == b_r assert_frame_equal(result, expected) result = df.values == b_r assert_numpy_array_equal(result, expected.values) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2d): df == b_c assert df.values.shape != b_c.shape @@ -850,11 +858,11 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - result = df == l - assert_frame_equal(result, expected) + with tm.assert_raises_regex(ValueError, msg1d): + result = df == l - result = df == tup - assert_frame_equal(result, expected) + with tm.assert_raises_regex(ValueError, msg1d): + result = df == tup def test_combine_generic(self): df1 = self.frame