From b6e3ed99fcee1024250001d9e026af0847c427f6 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 28 Sep 2018 11:38:27 -0700 Subject: [PATCH 01/11] Use align_method in comp_method_FRAME --- pandas/core/frame.py | 9 ++------- pandas/core/ops.py | 3 +++ pandas/tests/frame/test_operators.py | 3 ++- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d5b273f37a3a2..33e3121a5abb0 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4923,13 +4923,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True): return self._constructor(new_data) def _combine_const(self, other, func, errors='raise', try_cast=True): - if lib.is_scalar(other) or np.ndim(other) == 0: - return ops.dispatch_to_series(self, other, func) - - new_data = self._data.eval(func=func, other=other, - errors=errors, - try_cast=try_cast) - return self._constructor(new_data) + assert lib.is_scalar(other) or np.ndim(other) == 0 + return ops.dispatch_to_series(self, other, func) def combine(self, other, func, fill_value=None, overwrite=True): """ diff --git a/pandas/core/ops.py b/pandas/core/ops.py index 70fe7de0a973e..a58d2ab034f55 100644 --- a/pandas/core/ops.py +++ b/pandas/core/ops.py @@ -1864,6 +1864,9 @@ def _comp_method_FRAME(cls, func, special): @Appender('Wrapper for comparison method {name}'.format(name=op_name)) def f(self, other): + + other = _align_method_FRAME(self, other, axis=None) + if isinstance(other, ABCDataFrame): # Another DataFrame if not self._indexed_same(other): diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 97c94e1134cc8..f001738be3294 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -731,8 +731,9 @@ def test_comp(func): result = func(df1, df2) tm.assert_numpy_array_equal(result.values, func(df1.values, df2.values)) + with tm.assert_raises_regex(ValueError, - 'Wrong number of dimensions'): + 'dim must be <= 2'): func(df1, ndim_5) result2 = func(self.simple, row) From aa9ca8fc27f4caeb4a3ea16525ff28644d091e08 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 3 Oct 2018 14:06:28 -0700 Subject: [PATCH 02/11] Remove no-longer-needed eval methods --- pandas/core/internals/blocks.py | 139 ------------------------------ pandas/core/internals/managers.py | 6 -- 2 files changed, 145 deletions(-) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 0e57dd33b1c4e..ff352bbd6e40e 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1313,145 +1313,6 @@ def shift(self, periods, axis=0, mgr=None): return [self.make_block(new_values)] - def eval(self, func, other, errors='raise', try_cast=False, mgr=None): - """ - evaluate the block; return result block from the result - - Parameters - ---------- - func : how to combine self, other - other : a ndarray/object - errors : str, {'raise', 'ignore'}, default 'raise' - - ``raise`` : allow exceptions to be raised - - ``ignore`` : suppress exceptions. On error return original object - - try_cast : try casting the results to the input type - - Returns - ------- - a new block, the result of the func - """ - orig_other = other - values = self.values - - other = getattr(other, 'values', other) - - # make sure that we can broadcast - is_transposed = False - if hasattr(other, 'ndim') and hasattr(values, 'ndim'): - if values.ndim != other.ndim: - is_transposed = True - else: - if values.shape == other.shape[::-1]: - is_transposed = True - elif values.shape[0] == other.shape[-1]: - is_transposed = True - else: - # this is a broadcast error heree - raise ValueError( - "cannot broadcast shape [{t_shape}] with " - "block values [{oth_shape}]".format( - t_shape=values.T.shape, oth_shape=other.shape)) - - transf = (lambda x: x.T) if is_transposed else (lambda x: x) - - # coerce/transpose the args if needed - try: - values, values_mask, other, other_mask = self._try_coerce_args( - transf(values), other) - except TypeError: - block = self.coerce_to_target_dtype(orig_other) - return block.eval(func, orig_other, - errors=errors, - try_cast=try_cast, mgr=mgr) - - # get the result, may need to transpose the other - def get_result(other): - - # avoid numpy warning of comparisons again None - if other is None: - result = not func.__name__ == 'eq' - - # avoid numpy warning of elementwise comparisons to object - elif is_numeric_v_string_like(values, other): - result = False - - # avoid numpy warning of elementwise comparisons - elif func.__name__ == 'eq': - if is_list_like(other) and not isinstance(other, np.ndarray): - other = np.asarray(other) - - # if we can broadcast, then ok - if values.shape[-1] != other.shape[-1]: - return False - result = func(values, other) - else: - result = func(values, other) - - # mask if needed - if isinstance(values_mask, np.ndarray) and values_mask.any(): - result = result.astype('float64', copy=False) - result[values_mask] = np.nan - if other_mask is True: - result = result.astype('float64', copy=False) - result[:] = np.nan - elif isinstance(other_mask, np.ndarray) and other_mask.any(): - result = result.astype('float64', copy=False) - result[other_mask.ravel()] = np.nan - - return result - - # error handler if we have an issue operating with the function - def handle_error(): - - if errors == 'raise': - # The 'detail' variable is defined in outer scope. - raise TypeError( - 'Could not operate {other!r} with block values ' - '{detail!s}'.format(other=other, detail=detail)) # noqa - else: - # return the values - result = np.empty(values.shape, dtype='O') - result.fill(np.nan) - return result - - # get the result - try: - with np.errstate(all='ignore'): - result = get_result(other) - - # if we have an invalid shape/broadcast error - # GH4576, so raise instead of allowing to pass through - except ValueError as detail: - raise - except Exception as detail: - result = handle_error() - - # technically a broadcast error in numpy can 'work' by returning a - # boolean False - if not isinstance(result, np.ndarray): - if not isinstance(result, np.ndarray): - - # differentiate between an invalid ndarray-ndarray comparison - # and an invalid type comparison - if isinstance(values, np.ndarray) and is_list_like(other): - raise ValueError( - 'Invalid broadcasting comparison [{other!r}] with ' - 'block values'.format(other=other)) - - raise TypeError('Could not compare [{other!r}] ' - 'with block values'.format(other=other)) - - # transpose if needed - result = transf(result) - - # try to cast if requested - if try_cast: - result = self._try_cast_result(result) - - result = _block_shape(result, ndim=self.ndim) - return [self.make_block(result)] - def where(self, other, cond, align=True, errors='raise', try_cast=False, axis=0, transpose=False, mgr=None): """ diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2f29f1ae2509f..1cbc09b4ca51a 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False, align_keys = ['new', 'mask'] else: align_keys = ['mask'] - elif f == 'eval': - align_copy = False - align_keys = ['other'] elif f == 'fillna': # fillna internally does putmask, maybe it's better to do this # at mgr, not block level? @@ -511,9 +508,6 @@ def isna(self, func, **kwargs): def where(self, **kwargs): return self.apply('where', **kwargs) - def eval(self, **kwargs): - return self.apply('eval', **kwargs) - def quantile(self, **kwargs): return self.reduction('quantile', **kwargs) From 92513ca41dd8896f3d66357ed90fe1511afac463 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Wed, 3 Oct 2018 14:32:35 -0700 Subject: [PATCH 03/11] Update tests to reflect consistent behavior --- pandas/tests/frame/test_arithmetic.py | 16 +++++++----- pandas/tests/frame/test_operators.py | 35 +++++++++++++++------------ 2 files changed, 29 insertions(+), 22 deletions(-) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 2eb11c3a2e2f7..d43ff1655eaac 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -48,15 +48,19 @@ def test_mixed_comparison(self): assert result.all().all() def test_df_boolean_comparison_error(self): - # GH#4576 + # GH#4576, GH#22880 # boolean comparisons with a tuple/list give unexpected results df = pd.DataFrame(np.arange(6).reshape((3, 2))) - # not shape compatible - with pytest.raises(ValueError): - df == (2, 2) - with pytest.raises(ValueError): - df == [2, 2] + expected = pd.DataFrame([[False, False], + [True, False], + [False, False]]) + + result = df == (2, 2) + tm.assert_frame_equal(result, expected) + + result = df == [2, 2] + tm.assert_frame_equal(result, expected) def test_df_float_none_comparison(self): df = pd.DataFrame(np.random.randn(8, 3), index=range(8), diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f001738be3294..410f24cab134a 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -784,14 +784,17 @@ def test_boolean_comparison(self): result = df.values > b assert_numpy_array_equal(result, expected.values) - result = df > l - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + # wrong shape + df > l - result = df > tup - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + # wrong shape + result = df > tup - result = df > b_r - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + # wrong shape + result = df > b_r result = df.values > b_r assert_numpy_array_equal(result, expected.values) @@ -807,14 +810,14 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - result = df == l - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + result = df == l - result = df == tup - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + result = df == tup - result = df == b_r - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + result = df == b_r result = df.values == b_r assert_numpy_array_equal(result, expected.values) @@ -830,11 +833,11 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - result = df == l - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + result = df == l - result = df == tup - assert_frame_equal(result, expected) + with pytest.raises(ValueError): + result = df == tup def test_combine_generic(self): df1 = self.frame From 10903337ffa187c748d8ccc4630ca1f594720327 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 10:37:46 -0700 Subject: [PATCH 04/11] whatsnew note --- doc/source/whatsnew/v0.24.0.txt | 64 +++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index f246ebad3aa2c..b38e622dad907 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -487,6 +487,70 @@ Previous Behavior: 0 NaT +.. _whatsnew_0240.api.dataframe_cmp_broadcasting: + +DataFrame Comparison Operations Broadcasting Changes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Previously, the broadcasting behavior of :class:`DataFrame` comparison +operations (``==``, ``!=``, ...) was inconsistent with the behavior of +arithmetic operations (``+``, ``-``, ...). The behavior of the comparison +operations has been changed to match the arithmetic operations in these cases. + +The affected cases are: operating against a 2-dimensional ``np.ndarray`` with +either 1 row or 1 column, a list or tuple with the same length matching the +number of rows in the :class:`DataFrame`, and a list or tuple with the +length matching the number of columns in the :class:`DataFrame`. + +Previous Behavior: + +.. code-block:: ipython + + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + + In [5]: df == arr[[0], :] # comparison used to broadcast where arithmetic would raise + Out[5]: + 0 1 + 0 True True + 1 False False + 2 False False + In [6]: df + arr[[0], :] + ... + ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) + + In [7]: df == (1, 2) # length matches number of columns; comparison used to raise where arithmetic would broadcast + ... + ValueError: Invalid broadcasting comparison [(1, 2)] with block values + In [8]: df + (1, 2) + Out[8]: + 0 1 + 0 1 3 + 1 3 5 + 2 5 7 + + In [9]: df == (1, 2, 3) # length matches number of rows; comparison used to broadcast where arithmetic would raise + Out[9]: + 0 1 + 0 False True + 1 True False + 2 False False + In [10]: df + (1, 2, 3) + ... + ValueError: Unable to coerce to Series, length must be 2: given 3 + +*Current Behavior*: + +.. ipython:: python + In [3]: arr = np.arange(6).reshape(3, 2) + In [4]: df = pd.DataFrame(arr) + In [5]: df == arr[[0], :] # raises just like the next arithmetic operation + In [6]: df + arr[[0], :] + In [7]: df == (1, 2) # broadcasts just like the next arithmetic operation + In [8]: df + (1, 2) + In [9]: df == (1, 2, 3) # raises just like the next arithmetic operation + In [10]: df + (1, 2, 3) + + .. _whatsnew_0240.api.extension: ExtensionType Changes From fb5b16ef35a78bbad56244dd202d1883b63fc042 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 5 Oct 2018 10:38:50 -0700 Subject: [PATCH 05/11] GH ref --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index b38e622dad907..fcca16688a3a8 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -495,6 +495,7 @@ Previously, the broadcasting behavior of :class:`DataFrame` comparison operations (``==``, ``!=``, ...) was inconsistent with the behavior of arithmetic operations (``+``, ``-``, ...). The behavior of the comparison operations has been changed to match the arithmetic operations in these cases. +(:issue:`22880`) The affected cases are: operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column, a list or tuple with the same length matching the From 73cda15e71c3e60774bc28f94551a88c7b7fb373 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Sun, 7 Oct 2018 17:14:55 -0700 Subject: [PATCH 06/11] Adjust tested behavior following GH#23000 --- doc/source/whatsnew/v0.24.0.txt | 1 - pandas/tests/frame/test_operators.py | 9 ++++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index e0614f09459cb..73896545a8542 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -567,7 +567,6 @@ Previous Behavior: In [3]: arr = np.arange(6).reshape(3, 2) In [4]: df = pd.DataFrame(arr) - In [5]: df + arr[[0], :] # 1 row, 2 columns ... ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 78ea130070b31..02195eb659c0f 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -813,9 +813,8 @@ def test_boolean_comparison(self): # wrong shape result = df > tup - with pytest.raises(ValueError): - # wrong shape - result = df > b_r + result = df > b_r # broadcasts like ndarray (GH#23000) + assert_frame_equal(result, expected) result = df.values > b_r assert_numpy_array_equal(result, expected.values) @@ -837,8 +836,8 @@ def test_boolean_comparison(self): with pytest.raises(ValueError): result = df == tup - with pytest.raises(ValueError): - result = df == b_r + result = df == b_r # broadcasts like ndarray (GH#23000) + assert_frame_equal(result, expected) result = df.values == b_r assert_numpy_array_equal(result, expected.values) From 87df9c4d3ada51e675b97a35429e7171c9d4f9c2 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Mon, 8 Oct 2018 15:55:06 -0700 Subject: [PATCH 07/11] Address Toms comments --- doc/source/whatsnew/v0.24.0.txt | 36 +++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 2b5af733edad3..528824ae1184b 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -497,10 +497,10 @@ arithmetic operations (``+``, ``-``, ...). The behavior of the comparison operations has been changed to match the arithmetic operations in these cases. (:issue:`22880`) -The affected cases are: operating against a 2-dimensional ``np.ndarray`` with -either 1 row or 1 column, a list or tuple with the same length matching the -number of rows in the :class:`DataFrame`, and a list or tuple with the -length matching the number of columns in the :class:`DataFrame`. +The affected cases are: + - operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column + - a list or tuple with the same length matching the number of rows in the :class:`DataFrame` + - a list or tuple with thlength matching the number of columns in the :class:`DataFrame`. Previous Behavior: @@ -509,7 +509,8 @@ Previous Behavior: In [3]: arr = np.arange(6).reshape(3, 2) In [4]: df = pd.DataFrame(arr) - In [5]: df == arr[[0], :] # comparison used to broadcast where arithmetic would raise + In [5]: df == arr[[0], :] + ...: # comparison previously broadcast where arithmetic would raise Out[5]: 0 1 0 True True @@ -519,7 +520,9 @@ Previous Behavior: ... ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2) - In [7]: df == (1, 2) # length matches number of columns; comparison used to raise where arithmetic would broadcast + In [7]: df == (1, 2) + ...: # length matches number of columns; + ...: # comparison previously raised where arithmetic would broadcast ... ValueError: Invalid broadcasting comparison [(1, 2)] with block values In [8]: df + (1, 2) @@ -529,7 +532,9 @@ Previous Behavior: 1 3 5 2 5 7 - In [9]: df == (1, 2, 3) # length matches number of rows; comparison used to broadcast where arithmetic would raise + In [9]: df == (1, 2, 3) + ...: # length matches number of rows + ...: # comparison previously broadcast where arithmetic would raise Out[9]: 0 1 0 False True @@ -542,15 +547,16 @@ Previous Behavior: *Current Behavior*: .. ipython:: python + :okexcept: - In [3]: arr = np.arange(6).reshape(3, 2) - In [4]: df = pd.DataFrame(arr) - In [5]: df == arr[[0], :] # raises just like the next arithmetic operation - In [6]: df + arr[[0], :] - In [7]: df == (1, 2) # broadcasts just like the next arithmetic operation - In [8]: df + (1, 2) - In [9]: df == (1, 2, 3) # raises just like the next arithmetic operation - In [10]: df + (1, 2, 3) + arr = np.arange(6).reshape(3, 2) + df = pd.DataFrame(arr) + df == arr[[0], :] # raises just like the next arithmetic operation + df + arr[[0], :] + df == (1, 2) # broadcasts just like the next arithmetic operation + df + (1, 2) + df == (1, 2, 3) # raises just like the next arithmetic operation + df + (1, 2, 3) .. _whatsnew_0240.api.dataframe_arithmetic_broadcasting: From b31633628f53f342c16dc618e0e0b3ee337a04f5 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Tue, 9 Oct 2018 08:06:37 -0700 Subject: [PATCH 08/11] Comment formatting --- doc/source/whatsnew/v0.24.0.txt | 21 ++++++++++++++++----- pandas/tests/frame/test_operators.py | 6 ++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 528824ae1184b..3edd91af7d9b6 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -499,8 +499,8 @@ operations has been changed to match the arithmetic operations in these cases. The affected cases are: - operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column - - a list or tuple with the same length matching the number of rows in the :class:`DataFrame` - - a list or tuple with thlength matching the number of columns in the :class:`DataFrame`. + - a list or tuple with length matching the number of rows in the :class:`DataFrame` + - a list or tuple with length matching the number of columns in the :class:`DataFrame`. Previous Behavior: @@ -551,11 +551,22 @@ Previous Behavior: arr = np.arange(6).reshape(3, 2) df = pd.DataFrame(arr) - df == arr[[0], :] # raises just like the next arithmetic operation + +.. ipython:: python + :okexcept: + # comparison and arithmetic both raise + df == arr[[0], :] df + arr[[0], :] - df == (1, 2) # broadcasts just like the next arithmetic operation + +.. ipython:: python + # comparison and arithmetic broadcast the same way + df == (1, 2) df + (1, 2) - df == (1, 2, 3) # raises just like the next arithmetic operation + +.. ipython:: python + :okexcept: + # comparison and arithmetic both raise + df == (1, 2, 3) df + (1, 2, 3) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 02195eb659c0f..7c3b12fb2ee43 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -813,7 +813,8 @@ def test_boolean_comparison(self): # wrong shape result = df > tup - result = df > b_r # broadcasts like ndarray (GH#23000) + # broadcasts like ndarray (GH#23000) + result = df > b_r assert_frame_equal(result, expected) result = df.values > b_r @@ -836,7 +837,8 @@ def test_boolean_comparison(self): with pytest.raises(ValueError): result = df == tup - result = df == b_r # broadcasts like ndarray (GH#23000) + # broadcasts like ndarray (GH#23000) + result = df == b_r assert_frame_equal(result, expected) result = df.values == b_r From 1d298577545a698b92d32996a278fede3d16666a Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 12 Oct 2018 12:11:56 -0700 Subject: [PATCH 09/11] update whatsnew --- doc/source/whatsnew/v0.24.0.txt | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index 3edd91af7d9b6..c473b8e77420e 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -498,9 +498,10 @@ operations has been changed to match the arithmetic operations in these cases. (:issue:`22880`) The affected cases are: - - operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column - - a list or tuple with length matching the number of rows in the :class:`DataFrame` - - a list or tuple with length matching the number of columns in the :class:`DataFrame`. + +- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`). +- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`. +- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`). Previous Behavior: @@ -553,8 +554,7 @@ Previous Behavior: df = pd.DataFrame(arr) .. ipython:: python - :okexcept: - # comparison and arithmetic both raise + # comparison and arithmetic both broadcast df == arr[[0], :] df + arr[[0], :] From 46a4e952ab205e4bbb7e0feeebcc7c6d00ef7218 Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 12 Oct 2018 15:37:07 -0700 Subject: [PATCH 10/11] fixup whatsnew, check specific exception messages --- doc/source/whatsnew/v0.24.0.txt | 6 +++--- pandas/tests/frame/test_arithmetic.py | 3 ++- pandas/tests/frame/test_operators.py | 23 ++++++++++++++--------- 3 files changed, 19 insertions(+), 13 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index bbb1a6e21560d..6532c9f7f8a96 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -577,18 +577,18 @@ Previous Behavior: df = pd.DataFrame(arr) .. ipython:: python - # comparison and arithmetic both broadcast + # Comparison operations and arithmetic operations both broadcast. df == arr[[0], :] df + arr[[0], :] .. ipython:: python - # comparison and arithmetic broadcast the same way + # Comparison operations and arithmetic operations both broadcast. df == (1, 2) df + (1, 2) .. ipython:: python :okexcept: - # comparison and arithmetic both raise + # Comparison operations and arithmetic opeartions both raise ValueError. df == (1, 2, 3) df + (1, 2, 3) diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 28958e46e1217..8156c5ea671c2 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -49,7 +49,8 @@ def test_mixed_comparison(self): def test_df_boolean_comparison_error(self): # GH#4576, GH#22880 - # boolean comparisons with a tuple/list give unexpected results + # comparing DataFrame against list/tuple with len(obj) matching + # len(df.columns) is supported as of GH#22800 df = pd.DataFrame(np.arange(6).reshape((3, 2))) expected = pd.DataFrame([[False, False], diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 7c3b12fb2ee43..409f604c20e50 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -805,11 +805,16 @@ def test_boolean_comparison(self): result = df.values > b assert_numpy_array_equal(result, expected.values) - with pytest.raises(ValueError): + msg1d = 'Unable to coerce to Series, length must be 2: given 3' + msg2d = (r'Unable to coerce to DataFrame, ' + r'shape must be \(3, 2\): given \(2, 1\)') + msg2db = (r"operands could not be broadcast together " + r"with shapes \(3,2\) \(2,1\)") + with tm.assert_raises_regex(ValueError, msg1d): # wrong shape df > l - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg1d): # wrong shape result = df > tup @@ -820,10 +825,10 @@ def test_boolean_comparison(self): result = df.values > b_r assert_numpy_array_equal(result, expected.values) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2d): df > b_c - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2db): df.values > b_c # == @@ -831,10 +836,10 @@ def test_boolean_comparison(self): result = df == b assert_frame_equal(result, expected) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg1d): result = df == l - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg1d): result = df == tup # broadcasts like ndarray (GH#23000) @@ -844,7 +849,7 @@ def test_boolean_comparison(self): result = df.values == b_r assert_numpy_array_equal(result, expected.values) - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg2d): df == b_c assert df.values.shape != b_c.shape @@ -855,10 +860,10 @@ def test_boolean_comparison(self): expected.index = df.index expected.columns = df.columns - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg1d): result = df == l - with pytest.raises(ValueError): + with tm.assert_raises_regex(ValueError, msg1d): result = df == tup def test_combine_generic(self): From 826f2c741d1624f9cdc8d2052c0a9cb681bf374b Mon Sep 17 00:00:00 2001 From: Brock Mendel Date: Fri, 12 Oct 2018 17:07:22 -0700 Subject: [PATCH 11/11] check messages compat for 32bit windows --- pandas/tests/frame/test_operators.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 409f604c20e50..9c0ef259ab686 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -806,10 +806,8 @@ def test_boolean_comparison(self): assert_numpy_array_equal(result, expected.values) msg1d = 'Unable to coerce to Series, length must be 2: given 3' - msg2d = (r'Unable to coerce to DataFrame, ' - r'shape must be \(3, 2\): given \(2, 1\)') - msg2db = (r"operands could not be broadcast together " - r"with shapes \(3,2\) \(2,1\)") + msg2d = 'Unable to coerce to DataFrame, shape must be' + msg2db = 'operands could not be broadcast together with shapes' with tm.assert_raises_regex(ValueError, msg1d): # wrong shape df > l