Skip to content

Commit

Permalink
ENH: improved handling of NAs in binary ops with object Series, GH #737
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Feb 5, 2012
1 parent 7187159 commit 9b07b42
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 5 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ pandas 0.7.0
#657)
- Implement array interface on Panel so that ufuncs work (re: #740)
- Add ``sort`` option to ``DataFrame.join`` (GH #731)
- Improved handling of NAs (propagation) in binary operations with
dtype=object arrays (GH #737)

**API Changes**

Expand Down
29 changes: 24 additions & 5 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,27 +43,46 @@ def _arith_method(op, name):
Wrapper function for Series arithmetic operations, to avoid
code duplication.
"""
def na_op(x, y):
try:
result = op(x, y)
except TypeError:
if isinstance(x, np.ndarray) and isinstance(y, np.ndarray):
mask = notnull(x) & notnull(y)
result = np.empty(len(x), dtype=x.dtype)
result[mask] = op(x[mask], y[mask])
elif isinstance(x, np.ndarray):
mask = notnull(x)
result = np.empty(len(x), dtype=x.dtype)
result[mask] = op(x[mask], y)
else:
mask = notnull(y)
result = np.empty(len(y), dtype=y.dtype)
result[mask] = op(x, y[mask])

return result

def wrapper(self, other):
from pandas.core.frame import DataFrame

if isinstance(other, Series):
if self.index.equals(other.index):
name = _maybe_match_name(self, other)
return Series(op(self.values, other.values), index=self.index,
name=name)
return Series(na_op(self.values, other.values),
index=self.index, name=name)

this_reindexed, other_reindexed = self.align(other, join='outer',
copy=False)
arr = op(this_reindexed.values, other_reindexed.values)
arr = na_op(this_reindexed.values, other_reindexed.values)

name = _maybe_match_name(self, other)
return Series(arr, index=this_reindexed.index, name=name)
elif isinstance(other, DataFrame):
return NotImplemented
else:
# scalars
return Series(op(self.values, other), index=self.index,
name=self.name)
return Series(na_op(self.values, other),
index=self.index, name=self.name)
return wrapper

def _radd_compat(left, right):
Expand Down
18 changes: 18 additions & 0 deletions pandas/tests/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -1135,6 +1135,24 @@ def test_operators_empty_int_corner(self):
# expected = (self.ts >= -0.5) & (self.ts <= 0.5)
# assert_series_equal(selector, expected)

def test_operators_na_handling(self):
from decimal import Decimal
from datetime import date
s = Series([Decimal('1.3'), Decimal('2.3')],
index=[date(2012,1,1), date(2012,1,2)])

result = s + s.shift(1)
self.assert_(isnull(result[0]))

s = Series(['foo', 'bar', 'baz', np.nan])
result = 'prefix_' + s
expected = Series(['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan])
assert_series_equal(result, expected)

result = s + '_suffix'
expected = Series(['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan])
assert_series_equal(result, expected)

def test_idxmin(self):
# test idxmin
# _check_stat_op approach can not be used here because of isnull check.
Expand Down

0 comments on commit 9b07b42

Please sign in to comment.