ENH: take a crack at #614

pandas-dev · Jan 12, 2012 · 4ef29e4 · 4ef29e4
1 parent a99c057
commit 4ef29e4
Show file tree

Hide file tree

Showing 5 changed files with 70 additions and 10 deletions.
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2656,7 +2656,8 @@ def apply(self, func, axis=0, broadcast=False, raw=False,
                     is_reduction = not isinstance(f(_EMPTY_SERIES),
                                                   np.ndarray)
                     if is_reduction:
-                        return Series(np.nan, index=self._get_agg_axis(axis))
+                        return Series(np.nan,
+                                      index=self._get_agg_axis(axis))
                     else:
                         return self.copy()
 
@@ -2670,7 +2671,7 @@ def apply(self, func, axis=0, broadcast=False, raw=False,
     def _apply_raw(self, func, axis):
         try:
             result = lib.reduce(self.values, func, axis=axis)
-        except Exception:
+        except Exception, e:
             result = np.apply_along_axis(func, axis, self.values)
 
         # TODO: mixed type case
@@ -2715,8 +2716,13 @@ def _apply_standard(self, func, axis, ignore_failures=False):
             if len(successes) < len(res_index):
                 res_index = res_index.take(successes)
         else:
-            for k, v in series_gen:
-                results[k] = func(v)
+            try:
+                for k, v in series_gen:
+                    results[k] = func(v)
+            except Exception, e:
+                if hasattr(e, 'args'):
+                    e.args = e.args + ('occurred at index %s' % str(k),)
+                    raise
 
         if len(results) > 0 and _is_sequence(results.values()[0]):
             if not isinstance(results.values()[0], Series):
@@ -2729,7 +2735,7 @@ def _apply_standard(self, func, axis, ignore_failures=False):
             if axis == 1:
                 result = result.T
 
-            return result
+            return result.convert_objects()
         else:
             return Series(results, index=res_index)
 

diff --git a/pandas/sparse/frame.py b/pandas/sparse/frame.py
@@ -107,6 +107,10 @@ def _consolidate_inplace(self):
         # do nothing when DataFrame calls this method
         pass
 
+    def convert_objects(self):
+        # XXX
+        return self
+
     @property
     def _constructor(self):
         def wrapper(data, index=None, columns=None):

diff --git a/pandas/src/reduce.pyx b/pandas/src/reduce.pyx
@@ -67,6 +67,9 @@ cdef class Reducer:
                 PyArray_SETITEM(result, PyArray_ITER_DATA(it), res)
                 chunk.data = chunk.data + self.increment
                 PyArray_ITER_NEXT(it)
+        except Exception, e:
+            if hasattr(e, 'args'):
+                e.args = e.args + (i,)
         finally:
             # so we don't free the wrong memory
             chunk.data = dummy_buf
@@ -80,10 +83,6 @@ cdef class Reducer:
             assert(not (isinstance(res, list) and len(res) == len(self.dummy)))
 
             result = np.empty(self.nresults, dtype='O')
-            # if hasattr(res, 'dtype'):
-            #     result = np.empty(self.nresults, dtype=res.dtype)
-            # else:
-            #     result = np.empty(self.nresults, dtype='O')
             result[0] = res
         except Exception:
             raise ValueError('function does not reduce')

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -3162,6 +3162,58 @@ def test_apply_differently_indexed(self):
                               columns=df.index).T
         assert_frame_equal(result1, expected1)
 
+    def test_apply_modify_traceback(self):
+        data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
+                                 'bar', 'bar', 'bar', 'bar',
+                                 'foo', 'foo', 'foo'],
+                          'B' : ['one', 'one', 'one', 'two',
+                                 'one', 'one', 'one', 'two',
+                                 'two', 'two', 'one'],
+                          'C' : ['dull', 'dull', 'shiny', 'dull',
+                                 'dull', 'shiny', 'shiny', 'dull',
+                                 'shiny', 'shiny', 'shiny'],
+                          'D' : np.random.randn(11),
+                          'E' : np.random.randn(11),
+                          'F' : np.random.randn(11)})
+
+        data['C'][4] = np.nan
+
+        def transform(row):
+            if row['C'].startswith('shin') and row['A'] == 'foo':
+                row['D'] = 7
+            return row
+
+        def transform2(row):
+            if (notnull(row['C']) and  row['C'].startswith('shin')
+                and row['A'] == 'foo'):
+                row['D'] = 7
+            return row
+
+        try:
+            transformed = data.apply(transform, axis=1)
+        except Exception, e:
+            pass
+
+        self.assertEqual(len(e.args), 2)
+        self.assertEqual(e.args[1], 'occurred at index 4')
+
+    def test_apply_convert_objects(self):
+        data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
+                                 'bar', 'bar', 'bar', 'bar',
+                                 'foo', 'foo', 'foo'],
+                          'B' : ['one', 'one', 'one', 'two',
+                                 'one', 'one', 'one', 'two',
+                                 'two', 'two', 'one'],
+                          'C' : ['dull', 'dull', 'shiny', 'dull',
+                                 'dull', 'shiny', 'shiny', 'dull',
+                                 'shiny', 'shiny', 'shiny'],
+                          'D' : np.random.randn(11),
+                          'E' : np.random.randn(11),
+                          'F' : np.random.randn(11)})
+
+        result = data.apply(lambda x: x, axis=1)
+        assert_frame_equal(result, data)
+
     def test_applymap(self):
         applied = self.frame.applymap(lambda x: x * 2)
         assert_frame_equal(applied, self.frame * 2)

diff --git a/scripts/roll_median_leak.py b/scripts/roll_median_leak.py
@@ -14,7 +14,6 @@
 pid = os.getpid()
 proc = psutil.Process(pid)
 
-
 s = Series(np.random.randn(10000))
 
 for _ in xrange(5):