Skip to content

Commit

Permalink
ENH: can pass dict of values per column to DataFrame.fillna, close #661
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Mar 13, 2012
1 parent 041d2a3 commit 79decd7
Show file tree
Hide file tree
Showing 4 changed files with 54 additions and 9 deletions.
18 changes: 15 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2402,8 +2402,10 @@ def fillna(self, value=None, method='pad', inplace=False):
Method to use for filling holes in reindexed Series
pad / ffill: propagate last valid observation forward to next valid
backfill / bfill: use NEXT valid observation to fill gap
value : any kind (should be same type as array)
Value to use to fill holes (e.g. 0)
value : scalar or dict
Value to use to fill holes (e.g. 0), alternately a dict of values
specifying which value to use for each column (columns not in the
dict will not be filled)
inplace : boolean, default False
If True, fill the DataFrame in place. Note: this will modify any
other views on this DataFrame, like if you took a no-copy slice of
Expand Down Expand Up @@ -2438,7 +2440,17 @@ def fillna(self, value=None, method='pad', inplace=False):
# Float type values
if len(self.columns) == 0:
return self
new_data = self._data.fillna(value)
if np.isscalar(value):
new_data = self._data.fillna(value, inplace=inplace)
elif isinstance(value, dict):
result = self if inplace else self.copy()
for k, v in value.iteritems():
if k not in result:
continue
result[k].fillna(v, inplace=True)
return result
else: # pragma: no cover
raise TypeError('Invalid fill value type: %s' % type(value))

if inplace:
self._data = new_data
Expand Down
20 changes: 14 additions & 6 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,15 +201,20 @@ def split_block_at(self, item):
left_block = make_block(self.values[:loc],
self.items[:loc].copy(), self.ref_items)
right_block = make_block(self.values[loc + 1:],
self.items[loc + 1:].copy(), self.ref_items)
self.items[loc + 1:].copy(),
self.ref_items)

return left_block, right_block

def fillna(self, value):
new_values = self.values.copy()
def fillna(self, value, inplace=False):
new_values = self.values if inplace else self.values.copy()
mask = com.isnull(new_values.ravel())
new_values.flat[mask] = value
return make_block(new_values, self.items, self.ref_items)

if inplace:
return self
else:
return make_block(new_values, self.items, self.ref_items)

def interpolate(self, method='pad', inplace=False):
values = self.values if inplace else self.values.copy()
Expand Down Expand Up @@ -931,11 +936,14 @@ def add_suffix(self, suffix):
f = ('%s' + ('%s' % suffix)).__mod__
return self.rename_items(f)

def fillna(self, value):
def fillna(self, value, inplace=False):
"""
"""
new_blocks = [b.fillna(value) for b in self.blocks]
new_blocks = [b.fillna(value, inplace=inplace)
for b in self.blocks]
if inplace:
return self
return BlockManager(new_blocks, self.axes)

@property
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2968,6 +2968,21 @@ def test_fillna_inplace(self):
self.assert_(df2 is df)
assert_frame_equal(df2, expected)

def test_fillna_dict(self):
df = DataFrame({'a': [nan, 1, 2, nan, nan],
'b': [1, 2, 3, nan, nan],
'c': [nan, 1, 2, 3, 4]})

result = df.fillna({'a': 0, 'b': 5})

expected = df.copy()
expected['a'] = expected['a'].fillna(0)
expected['b'] = expected['b'].fillna(5)
assert_frame_equal(result, expected)

# it works
result = df.fillna({'a': 0, 'b': 5, 'd' : 7})

def test_truncate(self):
offset = datetools.bday

Expand Down
10 changes: 10 additions & 0 deletions vb_suite/frame_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,13 @@
frame_fancy_lookup_all = Benchmark('df.lookup(row_labels_all, col_labels_all)',
setup,
start_date=datetime(2012, 1, 12))

#----------------------------------------------------------------------
# fillna in place

setup = common_setup + """
df = DataFrame(randn(10000, 100))
df.values[::2] = np.nan
"""

frame_fillna_inplace = Benchmark('df.fillna(0, inplace=True)', setup)

0 comments on commit 79decd7

Please sign in to comment.