ENH: implement DataFrame.lookup for label-based vector fancy indexing…

…, GH #338
pandas-dev · Jan 12, 2012 · f494fbd · f494fbd
1 parent 1f6d10c
commit f494fbd
Show file tree

Hide file tree

Showing 3 changed files with 75 additions and 0 deletions.
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -64,6 +64,8 @@ pandas 0.7.0
     for potential speedups (GH #595)
   - Can pass MaskedArray to Series constructor (PR #563)
   - Add Panel item access via attributes and IPython completion (GH #554)
+  - Implement ``DataFrame.lookup``, fancy-indexing analogue for retrieving
+    values given a sequence of row and column labels (GH #338)
 
 **API Changes**
 
@@ -187,6 +189,8 @@ pandas 0.7.0
   - Fix exception caused by parser converter returning strings (GH #583)
   - Fix MultiIndex formatting bug with integer names (GH #601)
   - Fix bug in handling of non-numeric aggregates in Series.groupby (GH #612)
+  - Fix TypeError with tuple subclasses (e.g. namedtuple) in
+    DataFrame.from_records (GH #611)
 
 Thanks
 ------

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -1430,6 +1430,51 @@ def xs(self, key, axis=0, copy=True):
             result.index = _maybe_droplevels(result.index, key)
             return result
 
+    def lookup(self, row_labels, col_labels):
+        """
+        Label-based "fancy indexing" function for DataFrame. Given equal-length
+        arrays of row and column labels, return an array of the values
+        corresponding to each (row, col)  pair.
+
+        Parameters
+        ----------
+        row_labels : sequence
+        col_labels : sequence
+
+        Notes
+        -----
+        Akin to
+
+        result = []
+        for row, col in zip(row_labels, col_labels):
+            result.append(df.get_value(row, col))
+
+        Example
+        -------
+        values : ndarray
+        """
+        from itertools import izip
+
+        n = len(row_labels)
+        assert(n == len(col_labels))
+
+        thresh = 1000
+        if not self._is_mixed_type or n > thresh:
+            values = self.values
+            ridx = self.index.get_indexer(row_labels)
+            cidx = self.columns.get_indexer(col_labels)
+            flat_index = ridx * len(self.columns) + cidx
+            result = values.flat[flat_index]
+        else:
+            result = np.empty(n, dtype='O')
+            for i, (r, c) in enumerate(izip(row_labels, col_labels)):
+                result[i] = self.get_value(r, c)
+
+        if result.dtype == 'O':
+            result = lib.maybe_convert_objects(result)
+
+        return result
+
     #----------------------------------------------------------------------
     # Reindexing and alignment
 

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -702,6 +702,32 @@ def test_get_value(self):
                 expected = self.frame[col][idx]
                 assert_almost_equal(result, expected)
 
+    def test_lookup(self):
+        def alt(df, rows, cols):
+            result = []
+            for r, c in zip(rows, cols):
+                result.append(df.get_value(r, c))
+            return result
+
+        def testit(df):
+            rows = list(df.index) * len(df.columns)
+            cols = list(df.columns) * len(df.index)
+            result = df.lookup(rows, cols)
+            expected = alt(df, rows, cols)
+            assert_almost_equal(result, expected)
+
+        testit(self.mixed_frame)
+        testit(self.frame)
+
+        df = DataFrame({'label' : ['a', 'b', 'a', 'c'],
+                        'mask_a' : [True, True, False, True],
+                        'mask_b' : [True, False, False, False],
+                        'mask_c' : [False, True, False, True]})
+        df['mask'] = df.lookup(df.index, 'mask_' + df['label'])
+        exp_mask = alt(df, df.index, 'mask_' + df['label'])
+        assert_almost_equal(df['mask'], exp_mask)
+        self.assert_(df['mask'].dtype == np.bool_)
+
     def test_set_value(self):
         for idx in self.frame.index:
             for col in self.frame.columns: