Skip to content

Commit

Permalink
ENH: _maybe_upcast_putmask now has the keyword, change to provide inline
Browse files Browse the repository at this point in the history
       putmask changes to an object (series)
     apply in BlockManager now has a keyword, filter to allow acting
       on only those items contained in the filter (if supplied)
CLN: consolidated all replace subs to main replace in DataFrame
       (which calls replace in BlockManager)
TST: now passed test in GH 3064
  • Loading branch information
jreback committed Mar 16, 2013
1 parent fbfd16a commit 1cae798
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 58 deletions.
37 changes: 28 additions & 9 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,17 +745,36 @@ def _maybe_promote(dtype, fill_value=np.nan):
return dtype, fill_value


def _maybe_upcast_putmask(result, mask, other, dtype=None):
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
""" a safe version of put mask that (potentially upcasts the result
return the result and a changed flag """
try:
np.putmask(result, mask, other)
except:
# our type is wrong here, need to upcast
if (-mask).any():
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
return the result
if change is not None, then MUTATE the change (and change the dtype)
return a changed flag
"""

if mask.any():

def changeit():
# our type is wrong here, need to upcast
if (-mask).any():
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)

# we need to actually change the dtype here
if change is not None:
change.dtype = r.dtype
change[:] = r

return r, True

new_dtype, fill_value = _maybe_promote(result.dtype,other)
if new_dtype != result.dtype:
return changeit()

try:
np.putmask(result, mask, other)
return result, True
except:
return changeit()

return result, False

Expand Down
66 changes: 28 additions & 38 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3467,14 +3467,21 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
if len(self.columns) == 0:
return self

new_data = self._data
if isinstance(to_replace, dict):
if isinstance(value, dict): # {'A' : NA} -> {'A' : 0}
return self._replace_both_dict(to_replace, value, inplace)
new_data = self._data
for c, src in to_replace.iteritems():
if c in value and c in self:
new_data = new_data.replace(src, value[c], filter = [ c ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)):
return self._replace_src_dict(to_replace, value, inplace)

raise ValueError('Fill value must be scalar or dict')
new_data = self._data
for k, src in to_replace.iteritems():
if k in self:
new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace)
else:
raise ValueError('Fill value must be scalar or dict')

elif isinstance(to_replace, (list, np.ndarray)):
# [NA, ''] -> [0, 'missing']
Expand All @@ -3491,25 +3498,29 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
new_data = self._data.replace(to_replace, value,
inplace=inplace)

if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:

# dest iterable dict-like
if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1}
return self._replace_dest_dict(to_replace, value, inplace)

new_data = self._data
for k, v in value.iteritems():
if k in self:
new_data = new_data.replace(to_replace, v, filter = [ k ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)): # NA -> 0
new_data = self._data.replace(to_replace, value,
inplace=inplace)
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:
raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover


raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)

def _interpolate(self, to_replace, method, axis, inplace, limit):
if self._is_mixed_type and axis == 1:
Expand Down Expand Up @@ -3543,27 +3554,6 @@ def _interpolate(self, to_replace, method, axis, inplace, limit):
else:
return self._constructor(new_data)

def _replace_dest_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, v in value.iteritems():
if k in rs:
rs[k].replace(to_replace, v, inplace=True)
return rs if not inplace else None

def _replace_src_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, src in to_replace.iteritems():
if k in rs:
rs[k].replace(src, value, inplace=True)
return rs if not inplace else None

def _replace_both_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for c, src in to_replace.iteritems():
if c in value and c in rs:
rs[c].replace(src, value[c], inplace=True)
return rs if not inplace else None

#----------------------------------------------------------------------
# Rename

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,10 +880,23 @@ def _verify_integrity(self):
'block items')

def apply(self, f, *args, **kwargs):
""" iterate over the blocks, collect and create a new block manager """
""" iterate over the blocks, collect and create a new block manager
Parameters
----------
f : the callable or function name to operate on at the block level
axes : optional (if not supplied, use self.axes)
filter : callable, if supplied, only call the block if the filter is True
"""

axes = kwargs.pop('axes',None)
filter = kwargs.pop('filter',None)
result_blocks = []
for blk in self.blocks:
if filter is not None:
if not blk.items.isin(filter).any():
result_blocks.append(blk)
continue
if callable(f):
applied = f(blk, *args, **kwargs)
else:
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,13 +732,8 @@ def where(self, cond, other=nan, inplace=False):
if len(other) != len(ser):
raise ValueError('Length of replacements must equal series length')

result, changed = com._maybe_upcast_putmask(ser,~cond,other)
if changed:

# need to actually change ser here
if inplace:
ser.dtype = result.dtype
ser[:] = result
change = ser if inplace else None
result, changed = com._maybe_upcast_putmask(ser,~cond,other,change=change)

return None if inplace else ser

Expand Down Expand Up @@ -2680,11 +2675,17 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
-------
replaced : Series
"""
result = self.copy() if not inplace else self

if inplace:
result = self
change = self
else:
result = self.copy()
change = None

def _rep_one(s, to_rep, v): # replace single value
mask = com.mask_missing(s.values, to_rep)
np.putmask(s.values, mask, v)
com._maybe_upcast_putmask(s.values,mask,v,change=change)

def _rep_dict(rs, to_rep): # replace {[src] -> dest}

Expand All @@ -2701,7 +2702,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
masks[d] = com.mask_missing(rs.values, sset)

for d, m in masks.iteritems():
np.putmask(rs.values, m, d)
com._maybe_upcast_putmask(rs.values,m,d,change=change)
else: # if no risk of clobbering then simple
for d, sset in dd.iteritems():
_rep_one(rs, sset, d)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5621,6 +5621,16 @@ def test_replace_mixed(self):
result = df.replace([1,2], ['foo','bar'])
assert_frame_equal(result,expected)

# test case from
from pandas.util.testing import makeCustomDataframe as mkdf
df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') })
result = df.replace(3, df.mean().to_dict())
expected = df.copy().astype('float64')
m = df.mean()
expected.iloc[0,0] = m[0]
expected.iloc[1,1] = m[1]
assert_frame_equal(result,expected)

def test_replace_interpolate(self):
padded = self.tsframe.replace(nan, method='pad')
assert_frame_equal(padded, self.tsframe.fillna(method='pad'))
Expand Down

0 comments on commit 1cae798

Please sign in to comment.