Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify bool ops closures, improve tests #19795

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.23.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -651,6 +651,7 @@ Other API Changes
- :func:`Series.to_csv` now accepts a ``compression`` argument that works in the same way as the ``compression`` argument in :func:`DataFrame.to_csv` (:issue:`18958`)
- Set operations (union, difference...) on :class:`IntervalIndex` with incompatible index types will now raise a ``TypeError`` rather than a ``ValueError`` (:issue:`19329`)
- :class:`DateOffset` objects render more simply, e.g. ``<DateOffset: days=1>`` instead of ``<DateOffset: kwds={'days': 1}>`` (:issue:`19403`)
- Boolean operations ``&, |, ^`` between a ``Series`` and an ``Index`` will no longer raise ``TypeError`` (:issue:`19792`)
- ``Categorical.fillna`` now validates its ``value`` and ``method`` keyword arguments. It now raises when both or none are specified, matching the behavior of :meth:`Series.fillna` (:issue:`19682`)
- :func:`Series.str.replace` now takes an optional `regex` keyword which, when set to ``False``, uses literal string replacement rather than regex replacement (:issue:`16808`)

Expand Down
58 changes: 32 additions & 26 deletions pandas/core/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,23 +1241,22 @@ def _bool_method_SERIES(cls, op, special):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
name = _get_op_name(op, special)

def na_op(x, y):
try:
result = op(x, y)
except TypeError:
if isinstance(y, list):
y = construct_1d_object_array_from_listlike(y)

if isinstance(y, (np.ndarray, ABCSeries)):
if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)):
result = op(x, y) # when would this be hit?
else:
x = _ensure_object(x)
y = _ensure_object(y)
result = libops.vec_binop(x, y, op)
assert not isinstance(y, (list, ABCSeries, pd.Index))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add tuple here, or assert is_list_like and not np.ndarray

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe just remove the assertion and make the else into an is_scalar and make an else that raises an AssertionError

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As mentioned above, I’m not inclined to change any behavior until the docs/tests are fleshed out.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then let's do that first (in this PR or a pre-cursor)

if isinstance(y, np.ndarray):
# bool-bool dtype operations should be OK, should not get here
assert not (is_bool_dtype(x) and is_bool_dtype(y))
x = _ensure_object(x)
y = _ensure_object(y)
result = libops.vec_binop(x, y, op)
else:
# let null fall thru
assert is_scalar(y)
if not isna(y):
y = bool(y)
try:
Expand All @@ -1278,32 +1277,39 @@ def wrapper(self, other):

self, other = _align_method_SERIES(self, other, align_asobject=True)

res_name = get_op_result_name(self, other)

if isinstance(other, ABCDataFrame):
# Defer to DataFrame implementation; fail early
return NotImplemented

elif isinstance(other, ABCSeries):
name = get_op_result_name(self, other)
elif isinstance(other, (ABCSeries, pd.Index)):
is_other_int_dtype = is_integer_dtype(other.dtype)
other = fill_int(other) if is_other_int_dtype else fill_bool(other)

filler = (fill_int if is_self_int_dtype and is_other_int_dtype
else fill_bool)

res_values = na_op(self.values, other.values)
unfilled = self._constructor(res_values,
index=self.index, name=name)
return filler(unfilled)
ovalues = other.values

else:
# scalars, list, tuple, np.array
filler = (fill_int if is_self_int_dtype and
is_integer_dtype(np.asarray(other)) else fill_bool)

res_values = na_op(self.values, other)
unfilled = self._constructor(res_values, index=self.index)
return filler(unfilled).__finalize__(self)
is_other_int_dtype = is_integer_dtype(np.asarray(other))
if is_list_like(other) and not isinstance(other, np.ndarray):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then thismust be a scalar, put this in another elsif rather than trying to catch the list-like case (ndarray, list, tuple) AND scalar case here

# TODO: Can we do this before the is_integer_dtype check?
# could the is_integer_dtype check be checking the wrong
# thing? e.g. other = [[0, 1], [2, 3], [4, 5]]?
other = construct_1d_object_array_from_listlike(other)
ovalues = other

filler = (fill_int if is_self_int_dtype and is_other_int_dtype
else fill_bool)

res_values = na_op(self.values, ovalues)
unfilled = self._constructor(res_values, index=self.index,
name=res_name)
result = filler(unfilled)
if not isinstance(other, ABCSeries):
result = result.__finalize__(self)
return result

wrapper.__name__ = name
return wrapper


Expand Down
192 changes: 117 additions & 75 deletions pandas/tests/series/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,46 +178,47 @@ def test_comparison_tuples(self):
expected = Series([True, False])
assert_series_equal(result, expected)

def test_comparison_operators_with_nas(self):
@pytest.mark.parametrize('op', ['lt', 'le', 'gt', 'ge', 'eq', 'ne'])
def test_comparison_operators_with_nas(self, op):
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
ser[::2] = np.nan

# test that comparisons work
ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne']
for op in ops:
val = ser[5]
val = ser[5]

f = getattr(operator, op)
result = f(ser, val)
f = getattr(operator, op)
result = f(ser, val)

expected = f(ser.dropna(), val).reindex(ser.index)
expected = f(ser.dropna(), val).reindex(ser.index)

if op == 'ne':
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)
if op == 'ne':
expected = expected.fillna(True).astype(bool)
else:
expected = expected.fillna(False).astype(bool)

assert_series_equal(result, expected)
assert_series_equal(result, expected)

# fffffffuuuuuuuuuuuu
# result = f(val, s)
# expected = f(val, s.dropna()).reindex(s.index)
# assert_series_equal(result, expected)
# fffffffuuuuuuuuuuuu
# result = f(val, s)
# expected = f(val, s.dropna()).reindex(s.index)
# assert_series_equal(result, expected)

# boolean &, |, ^ should work with object arrays and propagate NAs
@pytest.mark.parametrize('bool_op', ['and_', 'or_', 'xor'])
def test_bool_operators_with_nas(self, bool_op):
# boolean &, |, ^ should work with object arrays and propagate NAs

ops = ['and_', 'or_', 'xor']
mask = ser.isna()
for bool_op in ops:
func = getattr(operator, bool_op)
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object)
ser[::2] = np.nan

filled = ser.fillna(ser[0])
func = getattr(operator, bool_op)
result = func(ser < ser[9], ser > ser[3])

result = func(ser < ser[9], ser > ser[3])
mask = ser.isna()
filled = ser.fillna(ser[0])
expected = func(filled < filled[9], filled > filled[3])
expected[mask] = False

expected = func(filled < filled[9], filled > filled[3])
expected[mask] = False
assert_series_equal(result, expected)
assert_series_equal(result, expected)

def test_comparison_object_numeric_nas(self):
ser = Series(np.random.randn(10), dtype=object)
Expand Down Expand Up @@ -371,10 +372,10 @@ def test_comparison_different_length(self):
b = Series([2, 3, 4])
pytest.raises(ValueError, a.__eq__, b)

def test_comparison_label_based(self):
def test_bool_opslabel_based(self):

# GH 4947
# comparisons should be label based
# boolean ops should be label based

a = Series([True, False, True], list('bca'))
b = Series([False, True, False], list('abc'))
Expand Down Expand Up @@ -1398,45 +1399,73 @@ def test_ops_datetimelike_align(self):

def test_operators_bitwise(self):
# GH 9016: support bitwise op for integer types
index = list('bca')
s_0123 = Series(range(4), dtype='int64')
s_1111 = Series([1] * 4, dtype='int8')

s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_tff = Series([True, False, False], index=index)
s_empty = Series([])
# We cannot wrap s111.astype(np.int32) with pd.Index because it
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you exercise both lhs and rhs are scalars (with a Series/Index) (True, False) and also None/np.nan

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i c you might have these cases below, if you do make add _scalars in the test name (and see if you can fully exercise them)

# will case to int64
res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
expected = Series([1, 1, 3, 3], dtype='int32')
assert_series_equal(res, expected)

# TODO: unused
# s_0101 = Series([0, 1, 0, 1])
def test_operators_bitwise_invalid(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype='int64')
s_1111 = Series([1] * 4, dtype='int8')

with pytest.raises(TypeError):
s_1111 & 'a'
with pytest.raises(TypeError):
s_1111 & ['a', 'b', 'c', 'd']
with pytest.raises(TypeError):
s_0123 & 3.14
with pytest.raises(TypeError):
s_0123 & [0.1, 4, 3.14, 2]

def test_operators_bitwise_int_series_with_float_series(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype='int64')
s_ftft = Series([False, True, False, True])
result = s_0123 & Series([0.1, 4, -3.14, 2])
assert_series_equal(result, s_ftft)

@pytest.mark.parametrize('box', [np.array, pd.Index, pd.Series])
def test_operators_bitwise_with_int_arraylike(self, box):
# GH#9016: support bitwise op for integer types
# GH#19795 allow for operating with Index
s_0123 = Series(range(4), dtype='int64')
s_3333 = Series([3] * 4)
s_4444 = Series([4] * 4)

res = s_tft & s_empty
expected = s_fff
res = s_0123 & box(s_3333)
expected = Series(range(4), dtype='int64')
assert_series_equal(res, expected)

res = s_tft | s_empty
expected = s_tft
res = s_0123 | box(s_4444)
expected = Series(range(4, 8), dtype='int64')
assert_series_equal(res, expected)

res = s_0123 & s_3333
expected = Series(range(4), dtype='int64')
s_1111 = Series([1] * 4, dtype='int8')
res = s_0123 & box(s_1111)
expected = Series([0, 1, 0, 1], dtype='int64')
assert_series_equal(res, expected)

res = s_0123 | s_4444
expected = Series(range(4, 8), dtype='int64')
assert_series_equal(res, expected)
def test_operators_bitwise_with_bool(self):
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype='int64')

s_a0b1c0 = Series([1], list('b'))
assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
assert_series_equal(s_0123 & [False], Series([False] * 4))
assert_series_equal(s_0123 & (False), Series([False] * 4))

res = s_tft & s_a0b1c0
expected = s_tff.reindex(list('abc'))
assert_series_equal(res, expected)
def test_operators_bitwise_with_integers(self):
# GH#9016: support bitwise op for integer types
index = list('bca')

res = s_tft | s_a0b1c0
expected = s_tft.reindex(list('abc'))
assert_series_equal(res, expected)
s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_0123 = Series(range(4), dtype='int64')

n0 = 0
res = s_tft & n0
Expand All @@ -1456,20 +1485,34 @@ def test_operators_bitwise(self):
expected = Series([0, 1, 0, 1])
assert_series_equal(res, expected)

s_1111 = Series([1] * 4, dtype='int8')
res = s_0123 & s_1111
expected = Series([0, 1, 0, 1], dtype='int64')
def test_operators_bitwise_with_reindexing(self):
# ops where reindeing needs to be done, so operating with a Series
# makes sense but with an Index or array does not
# GH#9016: support bitwise op for integer types
index = list('bca')

s_tft = Series([True, False, True], index=index)
s_fff = Series([False, False, False], index=index)
s_tff = Series([True, False, False], index=index)
s_empty = Series([])
s_a0b1c0 = Series([1], list('b'))
s_0123 = Series(range(4), dtype='int64')

res = s_tft & s_empty
expected = s_fff
assert_series_equal(res, expected)

res = s_0123.astype(np.int16) | s_1111.astype(np.int32)
expected = Series([1, 1, 3, 3], dtype='int32')
res = s_tft | s_empty
expected = s_tft
assert_series_equal(res, expected)

pytest.raises(TypeError, lambda: s_1111 & 'a')
pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd'])
pytest.raises(TypeError, lambda: s_0123 & np.NaN)
pytest.raises(TypeError, lambda: s_0123 & 3.14)
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2])
res = s_tft & s_a0b1c0
expected = s_tff.reindex(list('abc'))
assert_series_equal(res, expected)

res = s_tft | s_a0b1c0
expected = s_tft.reindex(list('abc'))
assert_series_equal(res, expected)

# s_0123 will be all false now because of reindexing like s_tft
if compat.PY3:
Expand All @@ -1491,16 +1534,15 @@ def test_operators_bitwise(self):
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c'])
assert_series_equal(s_0123 & s_tft, exp)

assert_series_equal(s_0123 & False, Series([False] * 4))
assert_series_equal(s_0123 ^ False, Series([False, True, True, True]))
assert_series_equal(s_0123 & [False], Series([False] * 4))
assert_series_equal(s_0123 & (False), Series([False] * 4))
assert_series_equal(s_0123 & Series([False, np.NaN, False, False]),
Series([False] * 4))
def test_operators_bitwise_with_nans(self):
# with NaNs present op(series, series) works but op(series, index)
# is not implemented
# GH#9016: support bitwise op for integer types
s_0123 = Series(range(4), dtype='int64')
result = s_0123 & Series([False, np.NaN, False, False])
assert_series_equal(result, Series([False] * 4))

s_ftft = Series([False, True, False, True])
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft)

s_abNd = Series(['a', 'b', np.NaN, 'd'])
res = s_0123 & s_abNd
expected = s_ftft
Expand All @@ -1509,17 +1551,16 @@ def test_operators_bitwise(self):
def test_scalar_na_cmp_corners(self):
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10])

def tester(a, b):
return a & b

pytest.raises(TypeError, tester, s, datetime(2005, 1, 1))
with pytest.raises(TypeError):
s & datetime(2005, 1, 1)

s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)])
s[::2] = np.nan

expected = Series(True, index=s.index)
expected[::2] = False
assert_series_equal(tester(s, list(s)), expected)
result = s & list(s)
assert_series_equal(result, expected)

d = DataFrame({'A': s})
# TODO: Fix this exception - needs to be fixed! (see GH5035)
Expand All @@ -1530,7 +1571,8 @@ def tester(a, b):
# https://github.com/pandas-dev/pandas/issues/5284

pytest.raises(ValueError, lambda: d.__and__(s, axis='columns'))
pytest.raises(ValueError, tester, s, d)
with pytest.raises(ValueError):
s & d

# this is wrong as its not a boolean result
# result = d.__and__(s,axis='index')
Expand Down