From 719327784f37a07a744759f215fcb35374fec2b1 Mon Sep 17 00:00:00 2001
From: Jeff Reback <jeff@reback.net>
Date: Tue, 24 Jan 2017 17:44:31 -0500
Subject: [PATCH] ENH: add MultiIndex.to_dataframe ENH: allow hashing of
 MultiIndex

closes #12397
---
 doc/source/api.rst                 |  1 +
 doc/source/whatsnew/v0.20.0.txt    |  2 +-
 pandas/indexes/multi.py            | 24 +++++++++++++++++++
 pandas/tests/indexes/test_multi.py | 37 ++++++++++++++++++++++++++++++
 pandas/tools/hashing.py            |  8 ++++++-
 pandas/tools/tests/test_hashing.py | 16 +++++--------
 6 files changed, 76 insertions(+), 12 deletions(-)

diff --git a/doc/source/api.rst b/doc/source/api.rst
index 04a85bf63a6f88..7215cc3d3574fb 100644
--- a/doc/source/api.rst
+++ b/doc/source/api.rst
@@ -1460,6 +1460,7 @@ MultiIndex Components
    MultiIndex.set_levels
    MultiIndex.set_labels
    MultiIndex.to_hierarchical
+   MultiIndex.to_dataframe
    MultiIndex.is_lexsorted
    MultiIndex.droplevel
    MultiIndex.swaplevel
diff --git a/doc/source/whatsnew/v0.20.0.txt b/doc/source/whatsnew/v0.20.0.txt
index 6aaed803c5352d..2b7f1f0664bf2f 100644
--- a/doc/source/whatsnew/v0.20.0.txt
+++ b/doc/source/whatsnew/v0.20.0.txt
@@ -131,7 +131,7 @@ Other enhancements
 - New ``UnsortedIndexError`` (subclass of ``KeyError``) raised when indexing/slicing into an
   unsorted MultiIndex (:issue:`11897`). This allows differentiation between errors due to lack
   of sorting or an incorrect key. See :ref:`here <advanced.unsorted>`
-
+- ``MultiIndex`` has gained a ``.to_dataframe()`` method to convert to a ``DataFrame`` (:issue:`12397`)
 - ``pd.cut`` and ``pd.qcut`` now support datetime64 and timedelta64 dtypes (:issue:`14714`, :issue:`14798`)
 - ``pd.qcut`` has gained the ``duplicates='raise'|'drop'`` option to control whether to raise on duplicated edges (:issue:`7751`)
 - ``Series`` provides a ``to_excel`` method to output Excel files (:issue:`8825`)
diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py
index 2afafaeb544d15..99fcf789221388 100644
--- a/pandas/indexes/multi.py
+++ b/pandas/indexes/multi.py
@@ -827,6 +827,30 @@ def _to_safe_for_reshape(self):
         """ convert to object if we are a categorical """
         return self.set_levels([i._to_safe_for_reshape() for i in self.levels])
 
+    def to_dataframe(self, index=True):
+        """
+        Create a DataFrame with the columns the levels of the MultiIndex
+
+        .. versionadded:: 0.20.0
+
+        Parameters
+        ----------
+        index : boolean, default True
+            return this MultiIndex as the index
+
+        Returns
+        -------
+        DataFrame
+        """
+
+        from pandas import DataFrame
+        result = DataFrame({(name or level): self.get_level_values(level)
+                            for name, level in
+                            zip(self.names, range(len(self.levels)))})
+        if index:
+            result.index = self
+        return result
+
     def to_hierarchical(self, n_repeat, n_shuffle=1):
         """
         Return a MultiIndex reshaped to conform to the
diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py
index 343078aeafaf00..b72dc84b337dc3 100644
--- a/pandas/tests/indexes/test_multi.py
+++ b/pandas/tests/indexes/test_multi.py
@@ -1348,6 +1348,43 @@ def test_format_sparse_config(self):
 
         warnings.filters = warn_filters
 
+    def test_to_dataframe(self):
+        tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
+
+        index = MultiIndex.from_tuples(tuples)
+        result = index.to_dataframe(index=False)
+        expected = DataFrame(tuples)
+        tm.assert_frame_equal(result, expected)
+
+        result = index.to_dataframe()
+        expected.index = index
+        tm.assert_frame_equal(result, expected)
+
+        tuples = [(1, 'one'), (1, 'two'), (2, 'one'), (2, 'two')]
+        index = MultiIndex.from_tuples(tuples, names=['first', 'second'])
+        result = index.to_dataframe(index=False)
+        expected = DataFrame(tuples)
+        expected.columns = ['first', 'second']
+        tm.assert_frame_equal(result, expected)
+
+        result = index.to_dataframe()
+        expected.index = index
+        tm.assert_frame_equal(result, expected)
+
+        index = MultiIndex.from_product([range(5),
+                                         pd.date_range('20130101', periods=3)])
+        result = index.to_dataframe(index=False)
+        expected = DataFrame(
+            {0: np.repeat(np.arange(5, dtype='int64'), 3),
+             1: np.tile(pd.date_range('20130101', periods=3), 5)})
+        tm.assert_frame_equal(result, expected)
+
+        index = MultiIndex.from_product([range(5),
+                                         pd.date_range('20130101', periods=3)])
+        result = index.to_dataframe()
+        expected.index = index
+        tm.assert_frame_equal(result, expected)
+
     def test_to_hierarchical(self):
         index = MultiIndex.from_tuples([(1, 'one'), (1, 'two'), (2, 'one'), (
             2, 'two')])
diff --git a/pandas/tools/hashing.py b/pandas/tools/hashing.py
index 6d2186fdab34c5..7e7b7dc76bb74c 100644
--- a/pandas/tools/hashing.py
+++ b/pandas/tools/hashing.py
@@ -3,7 +3,7 @@
 """
 
 import numpy as np
-from pandas import _hash, Series, factorize, Categorical, Index
+from pandas import _hash, Series, factorize, Categorical, Index, MultiIndex
 from pandas.lib import is_bool_array
 from pandas.types.generic import ABCIndexClass, ABCSeries, ABCDataFrame
 from pandas.types.common import (is_categorical_dtype, is_numeric_dtype,
@@ -45,6 +45,12 @@ def adder(h, hashed_to_add):
         h = np.multiply(h, np.uint(3), h)
         return np.add(h, hashed_to_add, h)
 
+    if isinstance(obj, MultiIndex):
+        # efficiently turn us into a DataFrame and hash
+        return hash_pandas_object(obj.to_dataframe(index=False),
+                                  index=False, encoding=encoding,
+                                  hash_key=hash_key, categorize=categorize)
+
     if isinstance(obj, ABCIndexClass):
         h = hash_array(obj.values, encoding, hash_key,
                        categorize).astype('uint64')
diff --git a/pandas/tools/tests/test_hashing.py b/pandas/tools/tests/test_hashing.py
index 7913706f5658b1..f3205f6ef8d5e7 100644
--- a/pandas/tools/tests/test_hashing.py
+++ b/pandas/tools/tests/test_hashing.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pandas as pd
 
-from pandas import DataFrame, Series, Index
+from pandas import DataFrame, Series, Index, MultiIndex
 from pandas.tools.hashing import hash_array, hash_pandas_object
 import pandas.util.testing as tm
 
@@ -72,7 +72,11 @@ def test_hash_pandas_object(self):
                     tm.makeMixedDataFrame(),
                     tm.makeTimeDataFrame(),
                     tm.makeTimeSeries(),
-                    tm.makeTimedeltaIndex()]:
+                    tm.makeTimedeltaIndex(),
+                    MultiIndex.from_product(
+                        [range(5),
+                         ['foo', 'bar', 'baz'],
+                         pd.date_range('20130101', periods=2)])]:
             self.check_equal(obj)
             self.check_not_equal_with_index(obj)
 
@@ -140,14 +144,6 @@ def f():
             hash_pandas_object(obj)
         self.assertRaises(TypeError, f)
 
-        # MultiIndex are represented as tuples
-        obj = Series([1, 2, 3], index=pd.MultiIndex.from_tuples(
-            [('a', 1), ('a', 2), ('b', 1)]))
-
-        def f():
-            hash_pandas_object(obj)
-        self.assertRaises(TypeError, f)
-
     def test_alread_encoded(self):
         # if already encoded then ok