Skip to content

Commit

Permalink
Merge pull request #3068 from jreback/replace2
Browse files Browse the repository at this point in the history
BUG: fixes in replace to deal with block upcasting
  • Loading branch information
jreback committed Mar 16, 2013
2 parents fbfd16a + 1cae798 commit 32ad737
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 58 deletions.
37 changes: 28 additions & 9 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -745,17 +745,36 @@ def _maybe_promote(dtype, fill_value=np.nan):
return dtype, fill_value


def _maybe_upcast_putmask(result, mask, other, dtype=None):
def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None):
""" a safe version of put mask that (potentially upcasts the result
return the result and a changed flag """
try:
np.putmask(result, mask, other)
except:
# our type is wrong here, need to upcast
if (-mask).any():
result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
return the result
if change is not None, then MUTATE the change (and change the dtype)
return a changed flag
"""

if mask.any():

def changeit():
# our type is wrong here, need to upcast
if (-mask).any():
r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True)
np.putmask(r, mask, other)

# we need to actually change the dtype here
if change is not None:
change.dtype = r.dtype
change[:] = r

return r, True

new_dtype, fill_value = _maybe_promote(result.dtype,other)
if new_dtype != result.dtype:
return changeit()

try:
np.putmask(result, mask, other)
return result, True
except:
return changeit()

return result, False

Expand Down
66 changes: 28 additions & 38 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3467,14 +3467,21 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
if len(self.columns) == 0:
return self

new_data = self._data
if isinstance(to_replace, dict):
if isinstance(value, dict): # {'A' : NA} -> {'A' : 0}
return self._replace_both_dict(to_replace, value, inplace)
new_data = self._data
for c, src in to_replace.iteritems():
if c in value and c in self:
new_data = new_data.replace(src, value[c], filter = [ c ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)):
return self._replace_src_dict(to_replace, value, inplace)

raise ValueError('Fill value must be scalar or dict')
new_data = self._data
for k, src in to_replace.iteritems():
if k in self:
new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace)
else:
raise ValueError('Fill value must be scalar or dict')

elif isinstance(to_replace, (list, np.ndarray)):
# [NA, ''] -> [0, 'missing']
Expand All @@ -3491,25 +3498,29 @@ def replace(self, to_replace, value=None, method='pad', axis=0,
new_data = self._data.replace(to_replace, value,
inplace=inplace)

if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:

# dest iterable dict-like
if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1}
return self._replace_dest_dict(to_replace, value, inplace)

new_data = self._data
for k, v in value.iteritems():
if k in self:
new_data = new_data.replace(to_replace, v, filter = [ k ], inplace=inplace)

elif not isinstance(value, (list, np.ndarray)): # NA -> 0
new_data = self._data.replace(to_replace, value,
inplace=inplace)
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)
else:
raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover


raise ValueError('Invalid to_replace type: %s' %
type(to_replace)) # pragma: no cover
if inplace:
self._data = new_data
return self
else:
return self._constructor(new_data)

def _interpolate(self, to_replace, method, axis, inplace, limit):
if self._is_mixed_type and axis == 1:
Expand Down Expand Up @@ -3543,27 +3554,6 @@ def _interpolate(self, to_replace, method, axis, inplace, limit):
else:
return self._constructor(new_data)

def _replace_dest_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, v in value.iteritems():
if k in rs:
rs[k].replace(to_replace, v, inplace=True)
return rs if not inplace else None

def _replace_src_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for k, src in to_replace.iteritems():
if k in rs:
rs[k].replace(src, value, inplace=True)
return rs if not inplace else None

def _replace_both_dict(self, to_replace, value, inplace):
rs = self if inplace else self.copy()
for c, src in to_replace.iteritems():
if c in value and c in rs:
rs[c].replace(src, value[c], inplace=True)
return rs if not inplace else None

#----------------------------------------------------------------------
# Rename

Expand Down
15 changes: 14 additions & 1 deletion pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -880,10 +880,23 @@ def _verify_integrity(self):
'block items')

def apply(self, f, *args, **kwargs):
""" iterate over the blocks, collect and create a new block manager """
""" iterate over the blocks, collect and create a new block manager
Parameters
----------
f : the callable or function name to operate on at the block level
axes : optional (if not supplied, use self.axes)
filter : callable, if supplied, only call the block if the filter is True
"""

axes = kwargs.pop('axes',None)
filter = kwargs.pop('filter',None)
result_blocks = []
for blk in self.blocks:
if filter is not None:
if not blk.items.isin(filter).any():
result_blocks.append(blk)
continue
if callable(f):
applied = f(blk, *args, **kwargs)
else:
Expand Down
21 changes: 11 additions & 10 deletions pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -732,13 +732,8 @@ def where(self, cond, other=nan, inplace=False):
if len(other) != len(ser):
raise ValueError('Length of replacements must equal series length')

result, changed = com._maybe_upcast_putmask(ser,~cond,other)
if changed:

# need to actually change ser here
if inplace:
ser.dtype = result.dtype
ser[:] = result
change = ser if inplace else None
result, changed = com._maybe_upcast_putmask(ser,~cond,other,change=change)

return None if inplace else ser

Expand Down Expand Up @@ -2680,11 +2675,17 @@ def replace(self, to_replace, value=None, method='pad', inplace=False,
-------
replaced : Series
"""
result = self.copy() if not inplace else self

if inplace:
result = self
change = self
else:
result = self.copy()
change = None

def _rep_one(s, to_rep, v): # replace single value
mask = com.mask_missing(s.values, to_rep)
np.putmask(s.values, mask, v)
com._maybe_upcast_putmask(s.values,mask,v,change=change)

def _rep_dict(rs, to_rep): # replace {[src] -> dest}

Expand All @@ -2701,7 +2702,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest}
masks[d] = com.mask_missing(rs.values, sset)

for d, m in masks.iteritems():
np.putmask(rs.values, m, d)
com._maybe_upcast_putmask(rs.values,m,d,change=change)
else: # if no risk of clobbering then simple
for d, sset in dd.iteritems():
_rep_one(rs, sset, d)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5621,6 +5621,16 @@ def test_replace_mixed(self):
result = df.replace([1,2], ['foo','bar'])
assert_frame_equal(result,expected)

# test case from
from pandas.util.testing import makeCustomDataframe as mkdf
df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') })
result = df.replace(3, df.mean().to_dict())
expected = df.copy().astype('float64')
m = df.mean()
expected.iloc[0,0] = m[0]
expected.iloc[1,1] = m[1]
assert_frame_equal(result,expected)

def test_replace_interpolate(self):
padded = self.tsframe.replace(nan, method='pad')
assert_frame_equal(padded, self.tsframe.fillna(method='pad'))
Expand Down

0 comments on commit 32ad737

Please sign in to comment.