diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 95eecba5b5ef6..5d7f45b92b75d 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -510,88 +510,6 @@ Previous Behavior: 0 0 NaT -.. _whatsnew_0240.api.dataframe_cmp_broadcasting: - -DataFrame Comparison Operations Broadcasting Changes -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Previously, the broadcasting behavior of :class:`DataFrame` comparison -operations (``==``, ``!=``, ...) was inconsistent with the behavior of -arithmetic operations (``+``, ``-``, ...). The behavior of the comparison -operations has been changed to match the arithmetic operations in these cases. -(:issue:`22880`) - -The affected cases are: - -- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`). -- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`. -- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`). - -Previous Behavior: - -.. code-block:: ipython - - In [3]: arr = np.arange(6).reshape(3, 2) - In [4]: df = pd.DataFrame(arr) - - In [5]: df == arr[[0], :] - ...: # comparison previously broadcast where arithmetic would raise - Out[5]: - 0 1 - 0 True True - 1 False False - 2 False False - In [6]: df + arr[[0], :] - ... - ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) - - In [7]: df == (1, 2) - ...: # length matches number of columns; - ...: # comparison previously raised where arithmetic would broadcast - ... - ValueError: Invalid broadcasting comparison [(1, 2)] with block values - In [8]: df + (1, 2) - Out[8]: - 0 1 - 0 1 3 - 1 3 5 - 2 5 7 - - In [9]: df == (1, 2, 3) - ...: # length matches number of rows - ...: # comparison previously broadcast where arithmetic would raise - Out[9]: - 0 1 - 0 False True - 1 True False - 2 False False - In [10]: df + (1, 2, 3) - ... - ValueError: Unable to coerce to Series, length must be 2: given 3 - -*Current Behavior*: - -.. ipython:: python - :okexcept: - - arr = np.arange(6).reshape(3, 2) - df = pd.DataFrame(arr) - -.. ipython:: python - # Comparison operations and arithmetic operations both broadcast. - df == arr[[0], :] - df + arr[[0], :] - -.. ipython:: python - # Comparison operations and arithmetic operations both broadcast. - df == (1, 2) - df + (1, 2) - -.. ipython:: python - :okexcept: - # Comparison operations and arithmetic opeartions both raise ValueError. - df == (1, 2, 3) - df + (1, 2, 3) - .. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d612e96ec0db2..e9be7a3e9afb8 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4948,8 +4948,13 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return ops.dispatch_to_series(left, right, func, axis="columns") def _combine_const(self, other, func, errors='raise', try_cast=True): - assert lib.is_scalar(other) or np.ndim(other) == 0 - return ops.dispatch_to_series(self, other, func) + if lib.is_scalar(other) or np.ndim(other) == 0: + return ops.dispatch_to_series(self, other, func) + + new_data = self._data.eval(func=func, other=other, + errors=errors, + try_cast=try_cast) + return self._constructor(new_data) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 290de0539db83..93930fd844b95 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1313,6 +1313,145 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values)] + def eval(self, func, other, errors='raise', try_cast=False, mgr=None): + """ + evaluate the block; return result block from the result + + Parameters + ---------- + func : how to combine self, other + other : a ndarray/object + errors : str, {'raise', 'ignore'}, default 'raise' + - ``raise`` : allow exceptions to be raised + - ``ignore`` : suppress exceptions. On error return original object + + try_cast : try casting the results to the input type + + Returns + ------- + a new block, the result of the func + """ + orig_other = other + values = self.values + + other = getattr(other, 'values', other) + + # make sure that we can broadcast + is_transposed = False + if hasattr(other, 'ndim') and hasattr(values, 'ndim'): + if values.ndim != other.ndim: + is_transposed = True + else: + if values.shape == other.shape[::-1]: + is_transposed = True + elif values.shape[0] == other.shape[-1]: + is_transposed = True + else: + # this is a broadcast error heree + raise ValueError( + "cannot broadcast shape [{t_shape}] with " + "block values [{oth_shape}]".format( + t_shape=values.T.shape, oth_shape=other.shape)) + + transf = (lambda x: x.T) if is_transposed else (lambda x: x) + + # coerce/transpose the args if needed + try: + values, values_mask, other, other_mask = self._try_coerce_args( + transf(values), other) + except TypeError: + block = self.coerce_to_target_dtype(orig_other) + return block.eval(func, orig_other, + errors=errors, + try_cast=try_cast, mgr=mgr) + + # get the result, may need to transpose the other + def get_result(other): + + # avoid numpy warning of comparisons again None + if other is None: + result = not func.__name__ == 'eq' + + # avoid numpy warning of elementwise comparisons to object + elif is_numeric_v_string_like(values, other): + result = False + + # avoid numpy warning of elementwise comparisons + elif func.__name__ == 'eq': + if is_list_like(other) and not isinstance(other, np.ndarray): + other = np.asarray(other) + + # if we can broadcast, then ok + if values.shape[-1] != other.shape[-1]: + return False + result = func(values, other) + else: + result = func(values, other) + + # mask if needed + if isinstance(values_mask, np.ndarray) and values_mask.any(): + result = result.astype('float64', copy=False) + result[values_mask] = np.nan + if other_mask is True: + result = result.astype('float64', copy=False) + result[:] = np.nan + elif isinstance(other_mask, np.ndarray) and other_mask.any(): + result = result.astype('float64', copy=False) + result[other_mask.ravel()] = np.nan + + return result + + # error handler if we have an issue operating with the function + def handle_error(): + + if errors == 'raise': + # The 'detail' variable is defined in outer scope. + raise TypeError( + 'Could not operate {other!r} with block values ' + '{detail!s}'.format(other=other, detail=detail)) # noqa + else: + # return the values + result = np.empty(values.shape, dtype='O') + result.fill(np.nan) + return result + + # get the result + try: + with np.errstate(all='ignore'): + result = get_result(other) + + # if we have an invalid shape/broadcast error + # GH4576, so raise instead of allowing to pass through + except ValueError as detail: + raise + except Exception as detail: + result = handle_error() + + # technically a broadcast error in numpy can 'work' by returning a + # boolean False + if not isinstance(result, np.ndarray): + if not isinstance(result, np.ndarray): + + # differentiate between an invalid ndarray-ndarray comparison + # and an invalid type comparison + if isinstance(values, np.ndarray) and is_list_like(other): + raise ValueError( + 'Invalid broadcasting comparison [{other!r}] with ' + 'block values'.format(other=other)) + + raise TypeError('Could not compare [{other!r}] ' + 'with block values'.format(other=other)) + + # transpose if needed + result = transf(result) + + # try to cast if requested + if try_cast: + result = self._try_cast_result(result) + + result = _block_shape(result, ndim=self.ndim) + return [self.make_block(result)] + def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 1cbc09b4ca51a..2f29f1ae2509f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -373,6 +373,9 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, align_keys = ['new', 'mask'] else: align_keys = ['mask'] + elif f == 'eval': + align_copy = False + align_keys = ['other'] elif f == 'fillna': # fillna internally does putmask, maybe it's better to do this # at mgr, not block level? @@ -508,6 +511,9 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) + def eval(self, **kwargs): + return self.apply('eval', **kwargs) + def quantile(self, **kwargs): return self.reduction('quantile', **kwargs) diff --git a/pandas/core/ops.py b/pandas/core/ops.py index e894c763ebe03..20559bca9caed 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1934,9 +1934,6 @@ def _comp_method_FRAME(cls, func, special): @Appender('Wrapper for comparison method {name}'.format(name=op_name)) def f(self, other): - - other = _align_method_FRAME(self, other, axis=None) - if isinstance(other, ABCDataFrame): # Another DataFrame if not self._indexed_same(other): diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 8156c5ea671c2..d0eb7cd35b268 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -48,20 +48,15 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # GH#4576, GH#22880 - # comparing DataFrame against list/tuple with len(obj) matching - # len(df.columns) is supported as of GH#22800 + # GH 4576 + # boolean comparisons with a tuple/list give unexpected results df = pd.DataFrame(np.arange(6).reshape((3, 2))) - expected = pd.DataFrame([[False, False], - [True, False], - [False, False]]) - - result = df == (2, 2) - tm.assert_frame_equal(result, expected) - - result = df == [2, 2] - tm.assert_frame_equal(result, expected) + # not shape compatible + with pytest.raises(ValueError): + df == (2, 2) + with pytest.raises(ValueError): + df == [2, 2] def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 9c0ef259ab686..433b0f09e13bc 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -752,9 +752,8 @@ def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) - with tm.assert_raises_regex(ValueError, - 'dim must be <= 2'): + 'Wrong number of dimensions'): func(df1, ndim_5) result2 = func(self.simple, row) @@ -805,28 +804,22 @@ def test_boolean_comparison(self): result = df.values > b assert_numpy_array_equal(result, expected.values) - msg1d = 'Unable to coerce to Series, length must be 2: given 3' - msg2d = 'Unable to coerce to DataFrame, shape must be' - msg2db = 'operands could not be broadcast together with shapes' - with tm.assert_raises_regex(ValueError, msg1d): - # wrong shape - df > l + result = df > l + assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): - # wrong shape - result = df > tup + result = df > tup + assert_frame_equal(result, expected) - # broadcasts like ndarray (GH#23000) result = df > b_r assert_frame_equal(result, expected) result = df.values > b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError): df > b_c - with tm.assert_raises_regex(ValueError, msg2db): + with pytest.raises(ValueError): df.values > b_c # == @@ -834,20 +827,19 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): - result = df == l + result = df == l + assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): - result = df == tup + result = df == tup + assert_frame_equal(result, expected) - # broadcasts like ndarray (GH#23000) result = df == b_r assert_frame_equal(result, expected) result = df.values == b_r assert_numpy_array_equal(result, expected.values) - with tm.assert_raises_regex(ValueError, msg2d): + with pytest.raises(ValueError): df == b_c assert df.values.shape != b_c.shape @@ -858,11 +850,11 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - with tm.assert_raises_regex(ValueError, msg1d): - result = df == l + result = df == l + assert_frame_equal(result, expected) - with tm.assert_raises_regex(ValueError, msg1d): - result = df == tup + result = df == tup + assert_frame_equal(result, expected) def test_combine_generic(self): df1 = self.frame