ENH: handle differently-indexed results in DataFrame.apply, GH #498

pandas-dev · Dec 18, 2011 · a7f8d6c · a7f8d6c
1 parent 19cb89e
commit a7f8d6c
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 19 deletions.
diff --git a/bench/zoo_bench.py b/bench/zoo_bench.py
@@ -1,16 +1,21 @@
 from pandas import *
 from pandas.util.testing import rands
 
-# from la import larry
-
 n = 1000000
 # indices = Index([rands(10) for _ in xrange(n)])
-
 def sample(values, k):
     sampler = np.random.permutation(len(values))
     return values.take(sampler[:k])
+sz = 500000
+rng = np.arange(0, 10000000000000, 10000000)
+stamps = np.datetime64(datetime.now()).view('i8') + rng
+idx1 = np.sort(sample(stamps, sz))
+idx2 = np.sort(sample(stamps, sz))
+ts1 = Series(np.random.randn(sz), idx1)
+ts2 = Series(np.random.randn(sz), idx2)
 
-subsample_size = 90000
+
+# subsample_size = 90000
 
 # x = Series(np.random.randn(100000), indices)
 # y = Series(np.random.randn(subsample_size),
@@ -20,19 +25,6 @@ def sample(values, k):
 # lx = larry(np.random.randn(100000), [list(indices)])
 # ly = larry(np.random.randn(subsample_size), [list(y.index)])
 
-sz = 500000
-
-rng = np.arange(0, 10000000000000, 10000000)
-stamps = np.datetime64(datetime.now()).view('i8') + rng
-
-# stamps = np.random.randint(1000000000, 1000000000000, 2000000)
-
-idx1 = np.sort(sample(stamps, sz))
-idx2 = np.sort(sample(stamps, sz))
-
-ts1 = Series(np.random.randn(sz), idx1)
-ts2 = Series(np.random.randn(sz), idx2)
-
 # Benchmark 1: Two 1-million length time series (int64-based index) with
 # randomly chosen timestamps
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -2406,8 +2406,12 @@ def _apply_standard(self, func, axis, ignore_failures=False):
                 results[k] = func(v)
 
         if len(results) > 0 and _is_sequence(results.values()[0]):
-            result = self._constructor(data=results, index=res_columns,
-                                       columns=res_index)
+            if not isinstance(results.values()[0], Series):
+                index = res_columns
+            else:
+                index = None
+
+            result = self._constructor(data=results, index=index)
 
             if axis == 1:
                 result = result.T

diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py
@@ -3001,6 +3001,21 @@ def test_apply_reduce_Series(self):
         expected = self.frame.mean(1)
         assert_series_equal(result, expected)
 
+    def test_apply_differently_indexed(self):
+        df = DataFrame(np.random.randn(20, 10))
+
+        result0 = df.apply(Series.describe, axis=0)
+        expected0 = DataFrame(dict((i, v.describe())
+                                   for i, v in df.iteritems()),
+                              columns=df.columns)
+        assert_frame_equal(result0, expected0)
+
+        result1 = df.apply(Series.describe, axis=1)
+        expected1 = DataFrame(dict((i, v.describe())
+                                   for i, v in df.T.iteritems()),
+                              columns=df.index).T
+        assert_frame_equal(result1, expected1)
+
     def test_applymap(self):
         applied = self.frame.applymap(lambda x: x * 2)
         assert_frame_equal(applied, self.frame * 2)