ENH: pivot table docs and basic tests, GH #234

pandas-dev · Oct 21, 2011 · a663412 · a663412
1 parent f789bb7
commit a663412
Show file tree

Hide file tree

Showing 3 changed files with 97 additions and 2 deletions.
diff --git a/pandas/__init__.py b/pandas/__init__.py
@@ -26,3 +26,4 @@
 from pandas.stats.api import *
 from pandas.util.testing import debug
 
+from pandas.tools.pivot import pivot_table
diff --git a/pandas/tools/pivot.py b/pandas/tools/pivot.py
@@ -4,9 +4,49 @@
 def pivot_table(data, values=None, xby=None, yby=None, aggfunc=np.mean,
                 fill_value=None):
     """
-
+    Create a spreadsheet-style pivot table as a DataFrame. The levels in the
+    pivot table will be stored in MultiIndex objects (hierarchical indexes) on
+    the index and columns of the result DataFrame
+
+    Parameters
+    ----------
+    data : DataFrame
+    values : column to aggregate, optional
+    xby : list
+        Columns to group on the x-axis of the pivot table
+    yby : list
+        Columns to group on the x-axis of the pivot table
+    aggfunc : function, default numpy.mean
+    fill_value : scalar, default None
+        Value to replace missing values with
+
+    Examples
+    --------
+    >>> df
+       A   B   C      D
+    0  foo one small  1
+    1  foo one large  2
+    2  foo one large  2
+    3  foo two small  3
+    4  foo two small  3
+    5  bar one large  4
+    6  bar one small  5
+    7  bar two small  6
+    8  bar two large  7
+
+    >>> table = pivot_table(df, values='D', xby=['A, 'B'],
+                            yby=['C'], aggfunc=np.sum)
+    >>> table
+              small  large
+    foo  one  1      4
+         two  6      NaN
+    bar  one  5      4
+         two  6      7
+
+    Returns
+    -------
+    table : DataFrame
     """
-
     xby = [] if xby is None else list(xby)
     yby = [] if yby is None else list(yby)
 

diff --git a/pandas/tools/tests/test_pivot.py b/pandas/tools/tests/test_pivot.py
@@ -0,0 +1,54 @@
+import unittest
+
+import numpy as np
+
+from pandas import DataFrame
+from pandas.tools.pivot import pivot_table
+from pandas.util.testing import assert_frame_equal
+
+class TestPivotTable(unittest.TestCase):
+
+    def setUp(self):
+        self.data = DataFrame({'A' : ['foo', 'foo', 'foo', 'foo',
+                                      'bar', 'bar', 'bar', 'bar',
+                                      'foo', 'foo', 'foo'],
+                               'B' : ['one', 'one', 'one', 'two',
+                                      'one', 'one', 'one', 'two',
+                                      'two', 'two', 'one'],
+                               'C' : ['dull', 'dull', 'shiny', 'dull',
+                                      'dull', 'shiny', 'shiny', 'dull',
+                                      'shiny', 'shiny', 'shiny'],
+                               'D' : np.random.randn(11),
+                               'E' : np.random.randn(11)})
+
+    def test_pivot_table(self):
+        xby = ['A', 'B']
+        yby=  ['C']
+        table = pivot_table(self.data, values='D', xby=xby, yby=yby)
+
+        if len(xby) > 1:
+            self.assertEqual(table.index.names, xby)
+        else:
+            self.assertEqual(table.index.name, xby[0])
+
+        if len(yby) > 1:
+            self.assertEqual(table.columns.names, yby)
+        else:
+            self.assertEqual(table.columns.name, yby[0])
+
+        expected = self.data.groupby(xby + yby)['D'].agg(np.mean).unstack()
+        assert_frame_equal(table, expected)
+
+    def test_pivot_table_multiple(self):
+        xby = ['A', 'B']
+        yby=  ['C']
+        table = pivot_table(self.data, xby=xby, yby=yby)
+        expected = self.data.groupby(xby + yby).agg(np.mean).unstack()
+        assert_frame_equal(table, expected)
+
+if __name__ == '__main__':
+    import nose
+    nose.runmodule(argv=[__file__,'-vvs','-x','--pdb', '--pdb-failure'],
+                   exit=False)
+
+
Original file line number	Diff line number	Diff line change
Expand Up		@@ -26,3 +26,4 @@
		from pandas.stats.api import *
		from pandas.util.testing import debug

		from pandas.tools.pivot import pivot_table