Skip to content

Commit

Permalink
ENH: handle differently-indexed results in DataFrame.apply, GH #498
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Dec 18, 2011
1 parent 19cb89e commit a7f8d6c
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 19 deletions.
26 changes: 9 additions & 17 deletions bench/zoo_bench.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,21 @@
from pandas import *
from pandas.util.testing import rands

# from la import larry

n = 1000000
# indices = Index([rands(10) for _ in xrange(n)])

def sample(values, k):
sampler = np.random.permutation(len(values))
return values.take(sampler[:k])
sz = 500000
rng = np.arange(0, 10000000000000, 10000000)
stamps = np.datetime64(datetime.now()).view('i8') + rng
idx1 = np.sort(sample(stamps, sz))
idx2 = np.sort(sample(stamps, sz))
ts1 = Series(np.random.randn(sz), idx1)
ts2 = Series(np.random.randn(sz), idx2)

subsample_size = 90000

# subsample_size = 90000

# x = Series(np.random.randn(100000), indices)
# y = Series(np.random.randn(subsample_size),
Expand All @@ -20,19 +25,6 @@ def sample(values, k):
# lx = larry(np.random.randn(100000), [list(indices)])
# ly = larry(np.random.randn(subsample_size), [list(y.index)])

sz = 500000

rng = np.arange(0, 10000000000000, 10000000)
stamps = np.datetime64(datetime.now()).view('i8') + rng

# stamps = np.random.randint(1000000000, 1000000000000, 2000000)

idx1 = np.sort(sample(stamps, sz))
idx2 = np.sort(sample(stamps, sz))

ts1 = Series(np.random.randn(sz), idx1)
ts2 = Series(np.random.randn(sz), idx2)

# Benchmark 1: Two 1-million length time series (int64-based index) with
# randomly chosen timestamps

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2406,8 +2406,12 @@ def _apply_standard(self, func, axis, ignore_failures=False):
results[k] = func(v)

if len(results) > 0 and _is_sequence(results.values()[0]):
result = self._constructor(data=results, index=res_columns,
columns=res_index)
if not isinstance(results.values()[0], Series):
index = res_columns
else:
index = None

result = self._constructor(data=results, index=index)

if axis == 1:
result = result.T
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3001,6 +3001,21 @@ def test_apply_reduce_Series(self):
expected = self.frame.mean(1)
assert_series_equal(result, expected)

def test_apply_differently_indexed(self):
df = DataFrame(np.random.randn(20, 10))

result0 = df.apply(Series.describe, axis=0)
expected0 = DataFrame(dict((i, v.describe())
for i, v in df.iteritems()),
columns=df.columns)
assert_frame_equal(result0, expected0)

result1 = df.apply(Series.describe, axis=1)
expected1 = DataFrame(dict((i, v.describe())
for i, v in df.T.iteritems()),
columns=df.index).T
assert_frame_equal(result1, expected1)

def test_applymap(self):
applied = self.frame.applymap(lambda x: x * 2)
assert_frame_equal(applied, self.frame * 2)
Expand Down

0 comments on commit a7f8d6c

Please sign in to comment.