Skip to content

Commit

Permalink
Use align_method in comp_method_FRAME (pandas-dev#23132)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored and tm9k1 committed Nov 19, 2018
1 parent 08f9427 commit b2c6217
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 175 deletions.
82 changes: 82 additions & 0 deletions doc/source/whatsnew/v0.24.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,88 @@ Previous Behavior:
0
0 NaT

.. _whatsnew_0240.api.dataframe_cmp_broadcasting:

DataFrame Comparison Operations Broadcasting Changes
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Previously, the broadcasting behavior of :class:`DataFrame` comparison
operations (``==``, ``!=``, ...) was inconsistent with the behavior of
arithmetic operations (``+``, ``-``, ...). The behavior of the comparison
operations has been changed to match the arithmetic operations in these cases.
(:issue:`22880`)

The affected cases are:

- operating against a 2-dimensional ``np.ndarray`` with either 1 row or 1 column will now broadcast the same way a ``np.ndarray`` would (:issue:`23000`).
- a list or tuple with length matching the number of rows in the :class:`DataFrame` will now raise ``ValueError`` instead of operating column-by-column (:issue:`22880`.
- a list or tuple with length matching the number of columns in the :class:`DataFrame` will now operate row-by-row instead of raising ``ValueError`` (:issue:`22880`).

Previous Behavior:

.. code-block:: ipython

In [3]: arr = np.arange(6).reshape(3, 2)
In [4]: df = pd.DataFrame(arr)

In [5]: df == arr[[0], :]
...: # comparison previously broadcast where arithmetic would raise
Out[5]:
0 1
0 True True
1 False False
2 False False
In [6]: df + arr[[0], :]
...
ValueError: Unable to coerce to DataFrame, shape must be (3, 2): given (1, 2)

In [7]: df == (1, 2)
...: # length matches number of columns;
...: # comparison previously raised where arithmetic would broadcast
...
ValueError: Invalid broadcasting comparison [(1, 2)] with block values
In [8]: df + (1, 2)
Out[8]:
0 1
0 1 3
1 3 5
2 5 7

In [9]: df == (1, 2, 3)
...: # length matches number of rows
...: # comparison previously broadcast where arithmetic would raise
Out[9]:
0 1
0 False True
1 True False
2 False False
In [10]: df + (1, 2, 3)
...
ValueError: Unable to coerce to Series, length must be 2: given 3

*Current Behavior*:

.. ipython:: python
:okexcept:

arr = np.arange(6).reshape(3, 2)
df = pd.DataFrame(arr)

.. ipython:: python
# Comparison operations and arithmetic operations both broadcast.
df == arr[[0], :]
df + arr[[0], :]

.. ipython:: python
# Comparison operations and arithmetic operations both broadcast.
df == (1, 2)
df + (1, 2)

.. ipython:: python
:okexcept:
# Comparison operations and arithmetic opeartions both raise ValueError.
df == (1, 2, 3)
df + (1, 2, 3)


.. _whatsnew_0240.api.dataframe_arithmetic_broadcasting:

Expand Down
9 changes: 2 additions & 7 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4979,13 +4979,8 @@ def _combine_match_columns(self, other, func, level=None, try_cast=True):
return ops.dispatch_to_series(left, right, func, axis="columns")

def _combine_const(self, other, func, errors='raise', try_cast=True):
if lib.is_scalar(other) or np.ndim(other) == 0:
return ops.dispatch_to_series(self, other, func)

new_data = self._data.eval(func=func, other=other,
errors=errors,
try_cast=try_cast)
return self._constructor(new_data)
assert lib.is_scalar(other) or np.ndim(other) == 0
return ops.dispatch_to_series(self, other, func)

def combine(self, other, func, fill_value=None, overwrite=True):
"""
Expand Down
139 changes: 0 additions & 139 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1318,145 +1318,6 @@ def shift(self, periods, axis=0, mgr=None):

return [self.make_block(new_values)]

def eval(self, func, other, errors='raise', try_cast=False, mgr=None):
"""
evaluate the block; return result block from the result
Parameters
----------
func : how to combine self, other
other : a ndarray/object
errors : str, {'raise', 'ignore'}, default 'raise'
- ``raise`` : allow exceptions to be raised
- ``ignore`` : suppress exceptions. On error return original object
try_cast : try casting the results to the input type
Returns
-------
a new block, the result of the func
"""
orig_other = other
values = self.values

other = getattr(other, 'values', other)

# make sure that we can broadcast
is_transposed = False
if hasattr(other, 'ndim') and hasattr(values, 'ndim'):
if values.ndim != other.ndim:
is_transposed = True
else:
if values.shape == other.shape[::-1]:
is_transposed = True
elif values.shape[0] == other.shape[-1]:
is_transposed = True
else:
# this is a broadcast error heree
raise ValueError(
"cannot broadcast shape [{t_shape}] with "
"block values [{oth_shape}]".format(
t_shape=values.T.shape, oth_shape=other.shape))

transf = (lambda x: x.T) if is_transposed else (lambda x: x)

# coerce/transpose the args if needed
try:
values, values_mask, other, other_mask = self._try_coerce_args(
transf(values), other)
except TypeError:
block = self.coerce_to_target_dtype(orig_other)
return block.eval(func, orig_other,
errors=errors,
try_cast=try_cast, mgr=mgr)

# get the result, may need to transpose the other
def get_result(other):

# avoid numpy warning of comparisons again None
if other is None:
result = not func.__name__ == 'eq'

# avoid numpy warning of elementwise comparisons to object
elif is_numeric_v_string_like(values, other):
result = False

# avoid numpy warning of elementwise comparisons
elif func.__name__ == 'eq':
if is_list_like(other) and not isinstance(other, np.ndarray):
other = np.asarray(other)

# if we can broadcast, then ok
if values.shape[-1] != other.shape[-1]:
return False
result = func(values, other)
else:
result = func(values, other)

# mask if needed
if isinstance(values_mask, np.ndarray) and values_mask.any():
result = result.astype('float64', copy=False)
result[values_mask] = np.nan
if other_mask is True:
result = result.astype('float64', copy=False)
result[:] = np.nan
elif isinstance(other_mask, np.ndarray) and other_mask.any():
result = result.astype('float64', copy=False)
result[other_mask.ravel()] = np.nan

return result

# error handler if we have an issue operating with the function
def handle_error():

if errors == 'raise':
# The 'detail' variable is defined in outer scope.
raise TypeError(
'Could not operate {other!r} with block values '
'{detail!s}'.format(other=other, detail=detail)) # noqa
else:
# return the values
result = np.empty(values.shape, dtype='O')
result.fill(np.nan)
return result

# get the result
try:
with np.errstate(all='ignore'):
result = get_result(other)

# if we have an invalid shape/broadcast error
# GH4576, so raise instead of allowing to pass through
except ValueError as detail:
raise
except Exception as detail:
result = handle_error()

# technically a broadcast error in numpy can 'work' by returning a
# boolean False
if not isinstance(result, np.ndarray):
if not isinstance(result, np.ndarray):

# differentiate between an invalid ndarray-ndarray comparison
# and an invalid type comparison
if isinstance(values, np.ndarray) and is_list_like(other):
raise ValueError(
'Invalid broadcasting comparison [{other!r}] with '
'block values'.format(other=other))

raise TypeError('Could not compare [{other!r}] '
'with block values'.format(other=other))

# transpose if needed
result = transf(result)

# try to cast if requested
if try_cast:
result = self._try_cast_result(result)

result = _block_shape(result, ndim=self.ndim)
return [self.make_block(result)]

def where(self, other, cond, align=True, errors='raise',
try_cast=False, axis=0, transpose=False, mgr=None):
"""
Expand Down
6 changes: 0 additions & 6 deletions pandas/core/internals/managers.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,9 +373,6 @@ def apply(self, f, axes=None, filter=None, do_integrity_check=False,
align_keys = ['new', 'mask']
else:
align_keys = ['mask']
elif f == 'eval':
align_copy = False
align_keys = ['other']
elif f == 'fillna':
# fillna internally does putmask, maybe it's better to do this
# at mgr, not block level?
Expand Down Expand Up @@ -511,9 +508,6 @@ def isna(self, func, **kwargs):
def where(self, **kwargs):
return self.apply('where', **kwargs)

def eval(self, **kwargs):
return self.apply('eval', **kwargs)

def quantile(self, **kwargs):
return self.reduction('quantile', **kwargs)

Expand Down
3 changes: 3 additions & 0 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1923,6 +1923,9 @@ def _comp_method_FRAME(cls, func, special):

@Appender('Wrapper for comparison method {name}'.format(name=op_name))
def f(self, other):

other = _align_method_FRAME(self, other, axis=None)

if isinstance(other, ABCDataFrame):
# Another DataFrame
if not self._indexed_same(other):
Expand Down
19 changes: 12 additions & 7 deletions pandas/tests/frame/test_arithmetic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,20 @@ def test_mixed_comparison(self):
assert result.all().all()

def test_df_boolean_comparison_error(self):
# GH 4576
# boolean comparisons with a tuple/list give unexpected results
# GH#4576, GH#22880
# comparing DataFrame against list/tuple with len(obj) matching
# len(df.columns) is supported as of GH#22800
df = pd.DataFrame(np.arange(6).reshape((3, 2)))

# not shape compatible
with pytest.raises(ValueError):
df == (2, 2)
with pytest.raises(ValueError):
df == [2, 2]
expected = pd.DataFrame([[False, False],
[True, False],
[False, False]])

result = df == (2, 2)
tm.assert_frame_equal(result, expected)

result = df == [2, 2]
tm.assert_frame_equal(result, expected)

def test_df_float_none_comparison(self):
df = pd.DataFrame(np.random.randn(8, 3), index=range(8),
Expand Down
40 changes: 24 additions & 16 deletions pandas/tests/frame/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -752,8 +752,9 @@ def test_comp(func):
result = func(df1, df2)
tm.assert_numpy_array_equal(result.values,
func(df1.values, df2.values))

with tm.assert_raises_regex(ValueError,
'Wrong number of dimensions'):
'dim must be <= 2'):
func(df1, ndim_5)

result2 = func(self.simple, row)
Expand Down Expand Up @@ -804,42 +805,49 @@ def test_boolean_comparison(self):
result = df.values > b
assert_numpy_array_equal(result, expected.values)

result = df > lst
assert_frame_equal(result, expected)
msg1d = 'Unable to coerce to Series, length must be 2: given 3'
msg2d = 'Unable to coerce to DataFrame, shape must be'
msg2db = 'operands could not be broadcast together with shapes'
with tm.assert_raises_regex(ValueError, msg1d):
# wrong shape
df > lst

result = df > tup
assert_frame_equal(result, expected)
with tm.assert_raises_regex(ValueError, msg1d):
# wrong shape
result = df > tup

# broadcasts like ndarray (GH#23000)
result = df > b_r
assert_frame_equal(result, expected)

result = df.values > b_r
assert_numpy_array_equal(result, expected.values)

with pytest.raises(ValueError):
with tm.assert_raises_regex(ValueError, msg2d):
df > b_c

with pytest.raises(ValueError):
with tm.assert_raises_regex(ValueError, msg2db):
df.values > b_c

# ==
expected = DataFrame([[False, False], [True, False], [False, False]])
result = df == b
assert_frame_equal(result, expected)

result = df == lst
assert_frame_equal(result, expected)
with tm.assert_raises_regex(ValueError, msg1d):
result = df == lst

result = df == tup
assert_frame_equal(result, expected)
with tm.assert_raises_regex(ValueError, msg1d):
result = df == tup

# broadcasts like ndarray (GH#23000)
result = df == b_r
assert_frame_equal(result, expected)

result = df.values == b_r
assert_numpy_array_equal(result, expected.values)

with pytest.raises(ValueError):
with tm.assert_raises_regex(ValueError, msg2d):
df == b_c

assert df.values.shape != b_c.shape
Expand All @@ -850,11 +858,11 @@ def test_boolean_comparison(self):
expected.index = df.index
expected.columns = df.columns

result = df == lst
assert_frame_equal(result, expected)
with tm.assert_raises_regex(ValueError, msg1d):
result = df == lst

result = df == tup
assert_frame_equal(result, expected)
with tm.assert_raises_regex(ValueError, msg1d):
result = df == tup

def test_combine_generic(self):
df1 = self.frame
Expand Down

0 comments on commit b2c6217

Please sign in to comment.