From 1cae798d232556132058203113fb8d34a936e2e7 Mon Sep 17 00:00:00 2001 From: jreback Date: Sat, 16 Mar 2013 15:38:54 -0400 Subject: [PATCH] ENH: _maybe_upcast_putmask now has the keyword, change to provide inline putmask changes to an object (series) apply in BlockManager now has a keyword, filter to allow acting on only those items contained in the filter (if supplied) CLN: consolidated all replace subs to main replace in DataFrame (which calls replace in BlockManager) TST: now passed test in GH 3064 --- pandas/core/common.py | 37 +++++++++++++++------ pandas/core/frame.py | 66 ++++++++++++++++---------------------- pandas/core/internals.py | 15 ++++++++- pandas/core/series.py | 21 ++++++------ pandas/tests/test_frame.py | 10 ++++++ 5 files changed, 91 insertions(+), 58 deletions(-) diff --git a/pandas/core/common.py b/pandas/core/common.py index 20c6ae05349ec..1568018174e68 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -745,17 +745,36 @@ def _maybe_promote(dtype, fill_value=np.nan): return dtype, fill_value -def _maybe_upcast_putmask(result, mask, other, dtype=None): +def _maybe_upcast_putmask(result, mask, other, dtype=None, change=None): """ a safe version of put mask that (potentially upcasts the result - return the result and a changed flag """ - try: - np.putmask(result, mask, other) - except: - # our type is wrong here, need to upcast - if (-mask).any(): - result, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True) + return the result + if change is not None, then MUTATE the change (and change the dtype) + return a changed flag + """ + + if mask.any(): + + def changeit(): + # our type is wrong here, need to upcast + if (-mask).any(): + r, fill_value = _maybe_upcast(result, fill_value=other, dtype=dtype, copy=True) + np.putmask(r, mask, other) + + # we need to actually change the dtype here + if change is not None: + change.dtype = r.dtype + change[:] = r + + return r, True + + new_dtype, fill_value = _maybe_promote(result.dtype,other) + if new_dtype != result.dtype: + return changeit() + + try: np.putmask(result, mask, other) - return result, True + except: + return changeit() return result, False diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4eba4b52aaa81..5603730974c7e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3467,14 +3467,21 @@ def replace(self, to_replace, value=None, method='pad', axis=0, if len(self.columns) == 0: return self + new_data = self._data if isinstance(to_replace, dict): if isinstance(value, dict): # {'A' : NA} -> {'A' : 0} - return self._replace_both_dict(to_replace, value, inplace) + new_data = self._data + for c, src in to_replace.iteritems(): + if c in value and c in self: + new_data = new_data.replace(src, value[c], filter = [ c ], inplace=inplace) elif not isinstance(value, (list, np.ndarray)): - return self._replace_src_dict(to_replace, value, inplace) - - raise ValueError('Fill value must be scalar or dict') + new_data = self._data + for k, src in to_replace.iteritems(): + if k in self: + new_data = new_data.replace(src, value, filter = [ k ], inplace=inplace) + else: + raise ValueError('Fill value must be scalar or dict') elif isinstance(to_replace, (list, np.ndarray)): # [NA, ''] -> [0, 'missing'] @@ -3491,25 +3498,29 @@ def replace(self, to_replace, value=None, method='pad', axis=0, new_data = self._data.replace(to_replace, value, inplace=inplace) - if inplace: - self._data = new_data - return self - else: - return self._constructor(new_data) else: + + # dest iterable dict-like if isinstance(value, dict): # NA -> {'A' : 0, 'B' : -1} - return self._replace_dest_dict(to_replace, value, inplace) + + new_data = self._data + for k, v in value.iteritems(): + if k in self: + new_data = new_data.replace(to_replace, v, filter = [ k ], inplace=inplace) + elif not isinstance(value, (list, np.ndarray)): # NA -> 0 new_data = self._data.replace(to_replace, value, inplace=inplace) - if inplace: - self._data = new_data - return self - else: - return self._constructor(new_data) + else: + raise ValueError('Invalid to_replace type: %s' % + type(to_replace)) # pragma: no cover + - raise ValueError('Invalid to_replace type: %s' % - type(to_replace)) # pragma: no cover + if inplace: + self._data = new_data + return self + else: + return self._constructor(new_data) def _interpolate(self, to_replace, method, axis, inplace, limit): if self._is_mixed_type and axis == 1: @@ -3543,27 +3554,6 @@ def _interpolate(self, to_replace, method, axis, inplace, limit): else: return self._constructor(new_data) - def _replace_dest_dict(self, to_replace, value, inplace): - rs = self if inplace else self.copy() - for k, v in value.iteritems(): - if k in rs: - rs[k].replace(to_replace, v, inplace=True) - return rs if not inplace else None - - def _replace_src_dict(self, to_replace, value, inplace): - rs = self if inplace else self.copy() - for k, src in to_replace.iteritems(): - if k in rs: - rs[k].replace(src, value, inplace=True) - return rs if not inplace else None - - def _replace_both_dict(self, to_replace, value, inplace): - rs = self if inplace else self.copy() - for c, src in to_replace.iteritems(): - if c in value and c in rs: - rs[c].replace(src, value[c], inplace=True) - return rs if not inplace else None - #---------------------------------------------------------------------- # Rename diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 0228baf238bcd..4163c6ad8f60f 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -880,10 +880,23 @@ def _verify_integrity(self): 'block items') def apply(self, f, *args, **kwargs): - """ iterate over the blocks, collect and create a new block manager """ + """ iterate over the blocks, collect and create a new block manager + + Parameters + ---------- + f : the callable or function name to operate on at the block level + axes : optional (if not supplied, use self.axes) + filter : callable, if supplied, only call the block if the filter is True + """ + axes = kwargs.pop('axes',None) + filter = kwargs.pop('filter',None) result_blocks = [] for blk in self.blocks: + if filter is not None: + if not blk.items.isin(filter).any(): + result_blocks.append(blk) + continue if callable(f): applied = f(blk, *args, **kwargs) else: diff --git a/pandas/core/series.py b/pandas/core/series.py index c6fe396b08867..fcefbbe216aa3 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -732,13 +732,8 @@ def where(self, cond, other=nan, inplace=False): if len(other) != len(ser): raise ValueError('Length of replacements must equal series length') - result, changed = com._maybe_upcast_putmask(ser,~cond,other) - if changed: - - # need to actually change ser here - if inplace: - ser.dtype = result.dtype - ser[:] = result + change = ser if inplace else None + result, changed = com._maybe_upcast_putmask(ser,~cond,other,change=change) return None if inplace else ser @@ -2680,11 +2675,17 @@ def replace(self, to_replace, value=None, method='pad', inplace=False, ------- replaced : Series """ - result = self.copy() if not inplace else self + + if inplace: + result = self + change = self + else: + result = self.copy() + change = None def _rep_one(s, to_rep, v): # replace single value mask = com.mask_missing(s.values, to_rep) - np.putmask(s.values, mask, v) + com._maybe_upcast_putmask(s.values,mask,v,change=change) def _rep_dict(rs, to_rep): # replace {[src] -> dest} @@ -2701,7 +2702,7 @@ def _rep_dict(rs, to_rep): # replace {[src] -> dest} masks[d] = com.mask_missing(rs.values, sset) for d, m in masks.iteritems(): - np.putmask(rs.values, m, d) + com._maybe_upcast_putmask(rs.values,m,d,change=change) else: # if no risk of clobbering then simple for d, sset in dd.iteritems(): _rep_one(rs, sset, d) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 2cdb4488b8126..f143d0fcacc2f 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -5621,6 +5621,16 @@ def test_replace_mixed(self): result = df.replace([1,2], ['foo','bar']) assert_frame_equal(result,expected) + # test case from + from pandas.util.testing import makeCustomDataframe as mkdf + df = DataFrame({'A' : Series([3,0],dtype='int64'), 'B' : Series([0,3],dtype='int64') }) + result = df.replace(3, df.mean().to_dict()) + expected = df.copy().astype('float64') + m = df.mean() + expected.iloc[0,0] = m[0] + expected.iloc[1,1] = m[1] + assert_frame_equal(result,expected) + def test_replace_interpolate(self): padded = self.tsframe.replace(nan, method='pad') assert_frame_equal(padded, self.tsframe.fillna(method='pad'))