Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

use boolean indexing via getitem to trigger masking; add inplace keyword to where #2230

Merged
merged 5 commits into from
Nov 13, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 15 additions & 19 deletions pandas/core/frame.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -1775,9 +1775,8 @@ def __getitem__(self, key):
elif isinstance(self.columns, MultiIndex):
return self._getitem_multilevel(key)
elif isinstance(key, DataFrame):
values = key.values
if values.dtype == bool:
return self.values[values]
if key.values.dtype == bool:
return self.where(key)
else:
raise ValueError('Cannot index using non-boolean DataFrame')
else:
Expand Down Expand Up @@ -1871,11 +1870,6 @@ def __setitem__(self, key, value):
# support boolean setting with DataFrame input, e.g.
# df[df > df2] = 0
if isinstance(key, DataFrame):
if not (key.index.equals(self.index) and
key.columns.equals(self.columns)):
raise PandasError('Can only index with like-indexed '
'DataFrame objects')

self._boolean_set(key, value)
elif isinstance(key, (np.ndarray, list)):
return self._set_item_multiple(key, value)
Expand All @@ -1884,18 +1878,13 @@ def __setitem__(self, key, value):
self._set_item(key, value)

def _boolean_set(self, key, value):
mask = key.values
if mask.dtype != np.bool_:
if key.values.dtype != np.bool_:
raise ValueError('Must pass DataFrame with boolean values only')

if self._is_mixed_type:
raise ValueError('Cannot do boolean setting on mixed-type frame')

if isinstance(value, DataFrame):
assert(value._indexed_same(self))
np.putmask(self.values, mask, value.values)
else:
self.values[mask] = value
self.where(key, value, inplace=True)

def _set_item_multiple(self, keys, value):
if isinstance(value, DataFrame):
Expand Down Expand Up @@ -4878,7 +4867,7 @@ def combineMult(self, other):
"""
return self.mul(other, fill_value=1.)

def where(self, cond, other):
def where(self, cond, other=NA, inplace=False):
"""
Return a DataFrame with the same shape as self and whose corresponding
entries are from self where cond is True and otherwise are from other.
Expand All @@ -4893,6 +4882,9 @@ def where(self, cond, other):
-------
wh: DataFrame
"""
if not hasattr(cond,'shape'):
raise ValueError('where requires an ndarray like object for its condition')

if isinstance(cond, np.ndarray):
if cond.shape != self.shape:
raise ValueError('Array onditional must be same shape as self')
Expand All @@ -4905,13 +4897,17 @@ def where(self, cond, other):
if isinstance(other, DataFrame):
_, other = self.align(other, join='left', fill_value=NA)

if inplace:
np.putmask(self.values, cond, other)
return self

rs = np.where(cond, self, other)
return self._constructor(rs, self.index, self.columns)

def mask(self, cond):
"""
Returns copy of self whose values are replaced with nan if the
corresponding entry in cond is False
inverted condition is True

Parameters
----------
Expand All @@ -4921,7 +4917,7 @@ def mask(self, cond):
-------
wh: DataFrame
"""
return self.where(cond, NA)
return self.where(~cond, NA)

_EMPTY_SERIES = Series([])

Expand Down
26 changes: 23 additions & 3 deletions pandas/tests/test_frame.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,12 @@ def test_getitem_boolean(self):

self.assertRaises(ValueError, self.tsframe.__getitem__, self.tsframe)

# test df[df >0] works
bif = self.tsframe[self.tsframe > 0]
bifw = DataFrame(np.where(self.tsframe>0,self.tsframe,np.nan),index=self.tsframe.index,columns=self.tsframe.columns)
self.assert_(isinstance(bif,DataFrame))
self.assert_(bif.shape == self.tsframe.shape)
assert_frame_equal(bif,bifw)

def test_getitem_boolean_list(self):
df = DataFrame(np.arange(12).reshape(3,4))
Expand Down Expand Up @@ -278,7 +284,11 @@ def test_setitem_boolean(self):
values[values == 5] = 0
assert_almost_equal(df.values, values)

self.assertRaises(Exception, df.__setitem__, df[:-1] > 0, 2)
# a df that needs alignment first
df[df[:-1]<0] = 2
np.putmask(values[:-1],values[:-1]<0,2)
assert_almost_equal(df.values, values)

self.assertRaises(Exception, df.__setitem__, df * 0, 2)

# index with DataFrame
Expand Down Expand Up @@ -5204,14 +5214,24 @@ def test_where(self):
for k, v in rs.iteritems():
assert_series_equal(v, np.where(cond[k], df[k], other5))

assert_frame_equal(rs, df.mask(cond))

err1 = (df + 1).values[0:2, :]
self.assertRaises(ValueError, df.where, cond, err1)

err2 = cond.ix[:2, :].values
self.assertRaises(ValueError, df.where, err2, other1)

# invalid conditions
self.assertRaises(ValueError, df.mask, True)
self.assertRaises(ValueError, df.mask, 0)

def test_mask(self):
df = DataFrame(np.random.randn(5, 3))
cond = df > 0

rs = df.where(cond, np.nan)
assert_frame_equal(rs, df.mask(df <= 0))
assert_frame_equal(rs, df.mask(~cond))


#----------------------------------------------------------------------
# Transposing
Expand Down