Skip to content

Commit

Permalink
ENH: implement DataFrame.lookup for label-based vector fancy indexing…
Browse files Browse the repository at this point in the history
…, GH #338
  • Loading branch information
wesm committed Jan 12, 2012
1 parent 1f6d10c commit f494fbd
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 0 deletions.
4 changes: 4 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ pandas 0.7.0
for potential speedups (GH #595)
- Can pass MaskedArray to Series constructor (PR #563)
- Add Panel item access via attributes and IPython completion (GH #554)
- Implement ``DataFrame.lookup``, fancy-indexing analogue for retrieving
values given a sequence of row and column labels (GH #338)

**API Changes**

Expand Down Expand Up @@ -187,6 +189,8 @@ pandas 0.7.0
- Fix exception caused by parser converter returning strings (GH #583)
- Fix MultiIndex formatting bug with integer names (GH #601)
- Fix bug in handling of non-numeric aggregates in Series.groupby (GH #612)
- Fix TypeError with tuple subclasses (e.g. namedtuple) in
DataFrame.from_records (GH #611)

Thanks
------
Expand Down
45 changes: 45 additions & 0 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1430,6 +1430,51 @@ def xs(self, key, axis=0, copy=True):
result.index = _maybe_droplevels(result.index, key)
return result

def lookup(self, row_labels, col_labels):
"""
Label-based "fancy indexing" function for DataFrame. Given equal-length
arrays of row and column labels, return an array of the values
corresponding to each (row, col) pair.
Parameters
----------
row_labels : sequence
col_labels : sequence
Notes
-----
Akin to
result = []
for row, col in zip(row_labels, col_labels):
result.append(df.get_value(row, col))
Example
-------
values : ndarray
"""
from itertools import izip

n = len(row_labels)
assert(n == len(col_labels))

thresh = 1000
if not self._is_mixed_type or n > thresh:
values = self.values
ridx = self.index.get_indexer(row_labels)
cidx = self.columns.get_indexer(col_labels)
flat_index = ridx * len(self.columns) + cidx
result = values.flat[flat_index]
else:
result = np.empty(n, dtype='O')
for i, (r, c) in enumerate(izip(row_labels, col_labels)):
result[i] = self.get_value(r, c)

if result.dtype == 'O':
result = lib.maybe_convert_objects(result)

return result

#----------------------------------------------------------------------
# Reindexing and alignment

Expand Down
26 changes: 26 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,32 @@ def test_get_value(self):
expected = self.frame[col][idx]
assert_almost_equal(result, expected)

def test_lookup(self):
def alt(df, rows, cols):
result = []
for r, c in zip(rows, cols):
result.append(df.get_value(r, c))
return result

def testit(df):
rows = list(df.index) * len(df.columns)
cols = list(df.columns) * len(df.index)
result = df.lookup(rows, cols)
expected = alt(df, rows, cols)
assert_almost_equal(result, expected)

testit(self.mixed_frame)
testit(self.frame)

df = DataFrame({'label' : ['a', 'b', 'a', 'c'],
'mask_a' : [True, True, False, True],
'mask_b' : [True, False, False, False],
'mask_c' : [False, True, False, True]})
df['mask'] = df.lookup(df.index, 'mask_' + df['label'])
exp_mask = alt(df, df.index, 'mask_' + df['label'])
assert_almost_equal(df['mask'], exp_mask)
self.assert_(df['mask'].dtype == np.bool_)

def test_set_value(self):
for idx in self.frame.index:
for col in self.frame.columns:
Expand Down

0 comments on commit f494fbd

Please sign in to comment.