-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Simplify bool ops closures, improve tests #19795
Changes from all commits
619d2bb
7cf9a7a
f5b262b
c03cc55
235f969
9aa41ec
e721b64
a591078
8243ec8
f19a596
f9ff20b
0d99c94
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1241,23 +1241,22 @@ def _bool_method_SERIES(cls, op, special): | |
Wrapper function for Series arithmetic operations, to avoid | ||
code duplication. | ||
""" | ||
name = _get_op_name(op, special) | ||
|
||
def na_op(x, y): | ||
try: | ||
result = op(x, y) | ||
except TypeError: | ||
if isinstance(y, list): | ||
y = construct_1d_object_array_from_listlike(y) | ||
|
||
if isinstance(y, (np.ndarray, ABCSeries)): | ||
if (is_bool_dtype(x.dtype) and is_bool_dtype(y.dtype)): | ||
result = op(x, y) # when would this be hit? | ||
else: | ||
x = _ensure_object(x) | ||
y = _ensure_object(y) | ||
result = libops.vec_binop(x, y, op) | ||
assert not isinstance(y, (list, ABCSeries, pd.Index)) | ||
if isinstance(y, np.ndarray): | ||
# bool-bool dtype operations should be OK, should not get here | ||
assert not (is_bool_dtype(x) and is_bool_dtype(y)) | ||
x = _ensure_object(x) | ||
y = _ensure_object(y) | ||
result = libops.vec_binop(x, y, op) | ||
else: | ||
# let null fall thru | ||
assert is_scalar(y) | ||
if not isna(y): | ||
y = bool(y) | ||
try: | ||
|
@@ -1278,32 +1277,39 @@ def wrapper(self, other): | |
|
||
self, other = _align_method_SERIES(self, other, align_asobject=True) | ||
|
||
res_name = get_op_result_name(self, other) | ||
|
||
if isinstance(other, ABCDataFrame): | ||
# Defer to DataFrame implementation; fail early | ||
return NotImplemented | ||
|
||
elif isinstance(other, ABCSeries): | ||
name = get_op_result_name(self, other) | ||
elif isinstance(other, (ABCSeries, pd.Index)): | ||
is_other_int_dtype = is_integer_dtype(other.dtype) | ||
other = fill_int(other) if is_other_int_dtype else fill_bool(other) | ||
|
||
filler = (fill_int if is_self_int_dtype and is_other_int_dtype | ||
else fill_bool) | ||
|
||
res_values = na_op(self.values, other.values) | ||
unfilled = self._constructor(res_values, | ||
index=self.index, name=name) | ||
return filler(unfilled) | ||
ovalues = other.values | ||
|
||
else: | ||
# scalars, list, tuple, np.array | ||
filler = (fill_int if is_self_int_dtype and | ||
is_integer_dtype(np.asarray(other)) else fill_bool) | ||
|
||
res_values = na_op(self.values, other) | ||
unfilled = self._constructor(res_values, index=self.index) | ||
return filler(unfilled).__finalize__(self) | ||
is_other_int_dtype = is_integer_dtype(np.asarray(other)) | ||
if is_list_like(other) and not isinstance(other, np.ndarray): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then thismust be a scalar, put this in another elsif rather than trying to catch the list-like case (ndarray, list, tuple) AND scalar case here |
||
# TODO: Can we do this before the is_integer_dtype check? | ||
# could the is_integer_dtype check be checking the wrong | ||
# thing? e.g. other = [[0, 1], [2, 3], [4, 5]]? | ||
other = construct_1d_object_array_from_listlike(other) | ||
ovalues = other | ||
|
||
filler = (fill_int if is_self_int_dtype and is_other_int_dtype | ||
else fill_bool) | ||
|
||
res_values = na_op(self.values, ovalues) | ||
unfilled = self._constructor(res_values, index=self.index, | ||
name=res_name) | ||
result = filler(unfilled) | ||
if not isinstance(other, ABCSeries): | ||
result = result.__finalize__(self) | ||
return result | ||
|
||
wrapper.__name__ = name | ||
return wrapper | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -178,46 +178,47 @@ def test_comparison_tuples(self): | |
expected = Series([True, False]) | ||
assert_series_equal(result, expected) | ||
|
||
def test_comparison_operators_with_nas(self): | ||
@pytest.mark.parametrize('op', ['lt', 'le', 'gt', 'ge', 'eq', 'ne']) | ||
def test_comparison_operators_with_nas(self, op): | ||
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) | ||
ser[::2] = np.nan | ||
|
||
# test that comparisons work | ||
ops = ['lt', 'le', 'gt', 'ge', 'eq', 'ne'] | ||
for op in ops: | ||
val = ser[5] | ||
val = ser[5] | ||
|
||
f = getattr(operator, op) | ||
result = f(ser, val) | ||
f = getattr(operator, op) | ||
result = f(ser, val) | ||
|
||
expected = f(ser.dropna(), val).reindex(ser.index) | ||
expected = f(ser.dropna(), val).reindex(ser.index) | ||
|
||
if op == 'ne': | ||
expected = expected.fillna(True).astype(bool) | ||
else: | ||
expected = expected.fillna(False).astype(bool) | ||
if op == 'ne': | ||
expected = expected.fillna(True).astype(bool) | ||
else: | ||
expected = expected.fillna(False).astype(bool) | ||
|
||
assert_series_equal(result, expected) | ||
assert_series_equal(result, expected) | ||
|
||
# fffffffuuuuuuuuuuuu | ||
# result = f(val, s) | ||
# expected = f(val, s.dropna()).reindex(s.index) | ||
# assert_series_equal(result, expected) | ||
# fffffffuuuuuuuuuuuu | ||
# result = f(val, s) | ||
# expected = f(val, s.dropna()).reindex(s.index) | ||
# assert_series_equal(result, expected) | ||
|
||
# boolean &, |, ^ should work with object arrays and propagate NAs | ||
@pytest.mark.parametrize('bool_op', ['and_', 'or_', 'xor']) | ||
def test_bool_operators_with_nas(self, bool_op): | ||
# boolean &, |, ^ should work with object arrays and propagate NAs | ||
|
||
ops = ['and_', 'or_', 'xor'] | ||
mask = ser.isna() | ||
for bool_op in ops: | ||
func = getattr(operator, bool_op) | ||
ser = Series(bdate_range('1/1/2000', periods=10), dtype=object) | ||
ser[::2] = np.nan | ||
|
||
filled = ser.fillna(ser[0]) | ||
func = getattr(operator, bool_op) | ||
result = func(ser < ser[9], ser > ser[3]) | ||
|
||
result = func(ser < ser[9], ser > ser[3]) | ||
mask = ser.isna() | ||
filled = ser.fillna(ser[0]) | ||
expected = func(filled < filled[9], filled > filled[3]) | ||
expected[mask] = False | ||
|
||
expected = func(filled < filled[9], filled > filled[3]) | ||
expected[mask] = False | ||
assert_series_equal(result, expected) | ||
assert_series_equal(result, expected) | ||
|
||
def test_comparison_object_numeric_nas(self): | ||
ser = Series(np.random.randn(10), dtype=object) | ||
|
@@ -371,10 +372,10 @@ def test_comparison_different_length(self): | |
b = Series([2, 3, 4]) | ||
pytest.raises(ValueError, a.__eq__, b) | ||
|
||
def test_comparison_label_based(self): | ||
def test_bool_opslabel_based(self): | ||
|
||
# GH 4947 | ||
# comparisons should be label based | ||
# boolean ops should be label based | ||
|
||
a = Series([True, False, True], list('bca')) | ||
b = Series([False, True, False], list('abc')) | ||
|
@@ -1398,45 +1399,73 @@ def test_ops_datetimelike_align(self): | |
|
||
def test_operators_bitwise(self): | ||
# GH 9016: support bitwise op for integer types | ||
index = list('bca') | ||
s_0123 = Series(range(4), dtype='int64') | ||
s_1111 = Series([1] * 4, dtype='int8') | ||
|
||
s_tft = Series([True, False, True], index=index) | ||
s_fff = Series([False, False, False], index=index) | ||
s_tff = Series([True, False, False], index=index) | ||
s_empty = Series([]) | ||
# We cannot wrap s111.astype(np.int32) with pd.Index because it | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you exercise both lhs and rhs are scalars (with a Series/Index) (True, False) and also There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i c you might have these cases below, if you do make add _scalars in the test name (and see if you can fully exercise them) |
||
# will case to int64 | ||
res = s_0123.astype(np.int16) | s_1111.astype(np.int32) | ||
expected = Series([1, 1, 3, 3], dtype='int32') | ||
assert_series_equal(res, expected) | ||
|
||
# TODO: unused | ||
# s_0101 = Series([0, 1, 0, 1]) | ||
def test_operators_bitwise_invalid(self): | ||
# GH#9016: support bitwise op for integer types | ||
s_0123 = Series(range(4), dtype='int64') | ||
s_1111 = Series([1] * 4, dtype='int8') | ||
|
||
with pytest.raises(TypeError): | ||
s_1111 & 'a' | ||
with pytest.raises(TypeError): | ||
s_1111 & ['a', 'b', 'c', 'd'] | ||
with pytest.raises(TypeError): | ||
s_0123 & 3.14 | ||
with pytest.raises(TypeError): | ||
s_0123 & [0.1, 4, 3.14, 2] | ||
|
||
def test_operators_bitwise_int_series_with_float_series(self): | ||
# GH#9016: support bitwise op for integer types | ||
s_0123 = Series(range(4), dtype='int64') | ||
s_ftft = Series([False, True, False, True]) | ||
result = s_0123 & Series([0.1, 4, -3.14, 2]) | ||
assert_series_equal(result, s_ftft) | ||
|
||
@pytest.mark.parametrize('box', [np.array, pd.Index, pd.Series]) | ||
def test_operators_bitwise_with_int_arraylike(self, box): | ||
# GH#9016: support bitwise op for integer types | ||
# GH#19795 allow for operating with Index | ||
s_0123 = Series(range(4), dtype='int64') | ||
s_3333 = Series([3] * 4) | ||
s_4444 = Series([4] * 4) | ||
|
||
res = s_tft & s_empty | ||
expected = s_fff | ||
res = s_0123 & box(s_3333) | ||
expected = Series(range(4), dtype='int64') | ||
assert_series_equal(res, expected) | ||
|
||
res = s_tft | s_empty | ||
expected = s_tft | ||
res = s_0123 | box(s_4444) | ||
expected = Series(range(4, 8), dtype='int64') | ||
assert_series_equal(res, expected) | ||
|
||
res = s_0123 & s_3333 | ||
expected = Series(range(4), dtype='int64') | ||
s_1111 = Series([1] * 4, dtype='int8') | ||
res = s_0123 & box(s_1111) | ||
expected = Series([0, 1, 0, 1], dtype='int64') | ||
assert_series_equal(res, expected) | ||
|
||
res = s_0123 | s_4444 | ||
expected = Series(range(4, 8), dtype='int64') | ||
assert_series_equal(res, expected) | ||
def test_operators_bitwise_with_bool(self): | ||
# GH#9016: support bitwise op for integer types | ||
s_0123 = Series(range(4), dtype='int64') | ||
|
||
s_a0b1c0 = Series([1], list('b')) | ||
assert_series_equal(s_0123 & False, Series([False] * 4)) | ||
assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) | ||
assert_series_equal(s_0123 & [False], Series([False] * 4)) | ||
assert_series_equal(s_0123 & (False), Series([False] * 4)) | ||
|
||
res = s_tft & s_a0b1c0 | ||
expected = s_tff.reindex(list('abc')) | ||
assert_series_equal(res, expected) | ||
def test_operators_bitwise_with_integers(self): | ||
# GH#9016: support bitwise op for integer types | ||
index = list('bca') | ||
|
||
res = s_tft | s_a0b1c0 | ||
expected = s_tft.reindex(list('abc')) | ||
assert_series_equal(res, expected) | ||
s_tft = Series([True, False, True], index=index) | ||
s_fff = Series([False, False, False], index=index) | ||
s_0123 = Series(range(4), dtype='int64') | ||
|
||
n0 = 0 | ||
res = s_tft & n0 | ||
|
@@ -1456,20 +1485,34 @@ def test_operators_bitwise(self): | |
expected = Series([0, 1, 0, 1]) | ||
assert_series_equal(res, expected) | ||
|
||
s_1111 = Series([1] * 4, dtype='int8') | ||
res = s_0123 & s_1111 | ||
expected = Series([0, 1, 0, 1], dtype='int64') | ||
def test_operators_bitwise_with_reindexing(self): | ||
# ops where reindeing needs to be done, so operating with a Series | ||
# makes sense but with an Index or array does not | ||
# GH#9016: support bitwise op for integer types | ||
index = list('bca') | ||
|
||
s_tft = Series([True, False, True], index=index) | ||
s_fff = Series([False, False, False], index=index) | ||
s_tff = Series([True, False, False], index=index) | ||
s_empty = Series([]) | ||
s_a0b1c0 = Series([1], list('b')) | ||
s_0123 = Series(range(4), dtype='int64') | ||
|
||
res = s_tft & s_empty | ||
expected = s_fff | ||
assert_series_equal(res, expected) | ||
|
||
res = s_0123.astype(np.int16) | s_1111.astype(np.int32) | ||
expected = Series([1, 1, 3, 3], dtype='int32') | ||
res = s_tft | s_empty | ||
expected = s_tft | ||
assert_series_equal(res, expected) | ||
|
||
pytest.raises(TypeError, lambda: s_1111 & 'a') | ||
pytest.raises(TypeError, lambda: s_1111 & ['a', 'b', 'c', 'd']) | ||
pytest.raises(TypeError, lambda: s_0123 & np.NaN) | ||
pytest.raises(TypeError, lambda: s_0123 & 3.14) | ||
pytest.raises(TypeError, lambda: s_0123 & [0.1, 4, 3.14, 2]) | ||
res = s_tft & s_a0b1c0 | ||
expected = s_tff.reindex(list('abc')) | ||
assert_series_equal(res, expected) | ||
|
||
res = s_tft | s_a0b1c0 | ||
expected = s_tft.reindex(list('abc')) | ||
assert_series_equal(res, expected) | ||
|
||
# s_0123 will be all false now because of reindexing like s_tft | ||
if compat.PY3: | ||
|
@@ -1491,16 +1534,15 @@ def test_operators_bitwise(self): | |
exp = Series([False] * 7, index=[0, 1, 2, 3, 'a', 'b', 'c']) | ||
assert_series_equal(s_0123 & s_tft, exp) | ||
|
||
assert_series_equal(s_0123 & False, Series([False] * 4)) | ||
assert_series_equal(s_0123 ^ False, Series([False, True, True, True])) | ||
assert_series_equal(s_0123 & [False], Series([False] * 4)) | ||
assert_series_equal(s_0123 & (False), Series([False] * 4)) | ||
assert_series_equal(s_0123 & Series([False, np.NaN, False, False]), | ||
Series([False] * 4)) | ||
def test_operators_bitwise_with_nans(self): | ||
# with NaNs present op(series, series) works but op(series, index) | ||
# is not implemented | ||
# GH#9016: support bitwise op for integer types | ||
s_0123 = Series(range(4), dtype='int64') | ||
result = s_0123 & Series([False, np.NaN, False, False]) | ||
assert_series_equal(result, Series([False] * 4)) | ||
|
||
s_ftft = Series([False, True, False, True]) | ||
assert_series_equal(s_0123 & Series([0.1, 4, -3.14, 2]), s_ftft) | ||
|
||
s_abNd = Series(['a', 'b', np.NaN, 'd']) | ||
res = s_0123 & s_abNd | ||
expected = s_ftft | ||
|
@@ -1509,17 +1551,16 @@ def test_operators_bitwise(self): | |
def test_scalar_na_cmp_corners(self): | ||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, 10]) | ||
|
||
def tester(a, b): | ||
return a & b | ||
|
||
pytest.raises(TypeError, tester, s, datetime(2005, 1, 1)) | ||
with pytest.raises(TypeError): | ||
s & datetime(2005, 1, 1) | ||
|
||
s = Series([2, 3, 4, 5, 6, 7, 8, 9, datetime(2005, 1, 1)]) | ||
s[::2] = np.nan | ||
|
||
expected = Series(True, index=s.index) | ||
expected[::2] = False | ||
assert_series_equal(tester(s, list(s)), expected) | ||
result = s & list(s) | ||
assert_series_equal(result, expected) | ||
|
||
d = DataFrame({'A': s}) | ||
# TODO: Fix this exception - needs to be fixed! (see GH5035) | ||
|
@@ -1530,7 +1571,8 @@ def tester(a, b): | |
# https://github.com/pandas-dev/pandas/issues/5284 | ||
|
||
pytest.raises(ValueError, lambda: d.__and__(s, axis='columns')) | ||
pytest.raises(ValueError, tester, s, d) | ||
with pytest.raises(ValueError): | ||
s & d | ||
|
||
# this is wrong as its not a boolean result | ||
# result = d.__and__(s,axis='index') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add tuple here, or assert
is_list_like
and notnp.ndarray
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
or maybe just remove the assertion and make the else into an
is_scalar
and make an else that raises an AssertionErrorThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As mentioned above, I’m not inclined to change any behavior until the docs/tests are fleshed out.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
then let's do that first (in this PR or a pre-cursor)