From f494fbd2bbb278669ac6f128bcc55aa04dd9f70b Mon Sep 17 00:00:00 2001 From: Wes McKinney Date: Thu, 12 Jan 2012 14:52:57 -0500 Subject: [PATCH] ENH: implement DataFrame.lookup for label-based vector fancy indexing, GH #338 --- RELEASE.rst | 4 ++++ pandas/core/frame.py | 45 ++++++++++++++++++++++++++++++++++++++ pandas/tests/test_frame.py | 26 ++++++++++++++++++++++ 3 files changed, 75 insertions(+) diff --git a/RELEASE.rst b/RELEASE.rst index ef8c45610a483..35d01fa5757ab 100644 --- a/RELEASE.rst +++ b/RELEASE.rst @@ -64,6 +64,8 @@ pandas 0.7.0 for potential speedups (GH #595) - Can pass MaskedArray to Series constructor (PR #563) - Add Panel item access via attributes and IPython completion (GH #554) + - Implement ``DataFrame.lookup``, fancy-indexing analogue for retrieving + values given a sequence of row and column labels (GH #338) **API Changes** @@ -187,6 +189,8 @@ pandas 0.7.0 - Fix exception caused by parser converter returning strings (GH #583) - Fix MultiIndex formatting bug with integer names (GH #601) - Fix bug in handling of non-numeric aggregates in Series.groupby (GH #612) + - Fix TypeError with tuple subclasses (e.g. namedtuple) in + DataFrame.from_records (GH #611) Thanks ------ diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d8c792fa47f33..6372327850036 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -1430,6 +1430,51 @@ def xs(self, key, axis=0, copy=True): result.index = _maybe_droplevels(result.index, key) return result + def lookup(self, row_labels, col_labels): + """ + Label-based "fancy indexing" function for DataFrame. Given equal-length + arrays of row and column labels, return an array of the values + corresponding to each (row, col) pair. + + Parameters + ---------- + row_labels : sequence + col_labels : sequence + + Notes + ----- + Akin to + + result = [] + for row, col in zip(row_labels, col_labels): + result.append(df.get_value(row, col)) + + Example + ------- + values : ndarray + """ + from itertools import izip + + n = len(row_labels) + assert(n == len(col_labels)) + + thresh = 1000 + if not self._is_mixed_type or n > thresh: + values = self.values + ridx = self.index.get_indexer(row_labels) + cidx = self.columns.get_indexer(col_labels) + flat_index = ridx * len(self.columns) + cidx + result = values.flat[flat_index] + else: + result = np.empty(n, dtype='O') + for i, (r, c) in enumerate(izip(row_labels, col_labels)): + result[i] = self.get_value(r, c) + + if result.dtype == 'O': + result = lib.maybe_convert_objects(result) + + return result + #---------------------------------------------------------------------- # Reindexing and alignment diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 4cbd2f7b4e862..b90ea0c92de99 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -702,6 +702,32 @@ def test_get_value(self): expected = self.frame[col][idx] assert_almost_equal(result, expected) + def test_lookup(self): + def alt(df, rows, cols): + result = [] + for r, c in zip(rows, cols): + result.append(df.get_value(r, c)) + return result + + def testit(df): + rows = list(df.index) * len(df.columns) + cols = list(df.columns) * len(df.index) + result = df.lookup(rows, cols) + expected = alt(df, rows, cols) + assert_almost_equal(result, expected) + + testit(self.mixed_frame) + testit(self.frame) + + df = DataFrame({'label' : ['a', 'b', 'a', 'c'], + 'mask_a' : [True, True, False, True], + 'mask_b' : [True, False, False, False], + 'mask_c' : [False, True, False, True]}) + df['mask'] = df.lookup(df.index, 'mask_' + df['label']) + exp_mask = alt(df, df.index, 'mask_' + df['label']) + assert_almost_equal(df['mask'], exp_mask) + self.assert_(df['mask'].dtype == np.bool_) + def test_set_value(self): for idx in self.frame.index: for col in self.frame.columns: