From e54c734556ac46fa085d65861fd411190747358c Mon Sep 17 00:00:00 2001
From: Chris Billington <chrisjbillington@gmail.com>
Date: Thu, 27 Jul 2017 13:48:55 -0400
Subject: [PATCH 1/5] Allow unicode empty strings to be used as placeholders in
 multilevel column names in Python 2

---
 pandas/core/frame.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index e546e96f253c7..586d4318dd3df 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2112,8 +2112,9 @@ def _getitem_multilevel(self, key):
                 result = result.__finalize__(self)
             if len(result.columns) == 1:
                 top = result.columns[0]
-                if ((type(top) == str and top == '') or
-                        (type(top) == tuple and top[0] == '')):
+                if isinstance(top, tuple):
+                    top = top[0]
+                if top == '':
                     result = result['']
                     if isinstance(result, Series):
                         result = self._constructor_sliced(result,

From a798b70e6d540d71d2bd7f23369b73be365f61e5 Mon Sep 17 00:00:00 2001
From: Chris Billington <chrisjbillington@gmail.com>
Date: Thu, 27 Jul 2017 15:47:40 -0400
Subject: [PATCH 2/5] added test for unicode multilevel column placeholders

---
 pandas/tests/test_multilevel.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 0b2dc9ba70f03..f61078a00286e 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1694,6 +1694,28 @@ def test_mixed_depth_get(self):
         tm.assert_series_equal(result, expected, check_names=False)
         assert result.name == ('routine1', 'result1')
 
+    def test_mixed_depth_get_unicode_placeholders_py2(self):
+        # Note this is only different to test_mixed_depth_get() on Python 2
+        arrays = [[u('a'), u('top'), u('top'),
+                   u('routine1'), u('routine1'), u('routine2')],
+                  [u(''), u('OD'), u('OD'),
+                   u('result1'), u('result2'), u('result1')],
+                  [u(''), u('wx'), u('wy'), u(''), u(''), u('')]]
+
+        tuples = sorted(zip(*arrays))
+        index = MultiIndex.from_tuples(tuples)
+        df = DataFrame(randn(4, 6), columns=index)
+
+        result = df['a']
+        expected = df['a', '', '']
+        tm.assert_series_equal(result, expected, check_names=False)
+        assert result.name == 'a'
+
+        result = df['routine1', 'result1']
+        expected = df['routine1', 'result1', '']
+        tm.assert_series_equal(result, expected, check_names=False)
+        assert result.name == ('routine1', 'result1')
+
     def test_mixed_depth_insert(self):
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                   ['', 'OD', 'OD', 'result1', 'result2', 'result1'],

From 63071bd5e1597655bd3c3d39ecc8f5dab49fb774 Mon Sep 17 00:00:00 2001
From: Chris Billington <chrisjbillington@gmail.com>
Date: Thu, 27 Jul 2017 22:23:52 -0400
Subject: [PATCH 3/5] Added whatsnew note, modfied tests for mixed level
 columns based on pull request review

---
 doc/source/whatsnew/v0.21.0.txt |  1 +
 pandas/tests/test_multilevel.py | 25 ++++++++++++-------------
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index 0025f8d098d81..5a5146a74375c 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -273,6 +273,7 @@ Indexing
 - Fixes bug where indexing with ``np.inf`` caused an ``OverflowError`` to be raised (:issue:`16957`)
 - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
 - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
+- Fixes getting a column by name when unicode empty strings are used as placeholders in multilevel columns in Python 2 (:issue:`17099`)
 
 I/O
 ^^^
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index f61078a00286e..4c8976cc9324c 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1682,20 +1682,20 @@ def test_mixed_depth_get(self):
 
         tuples = sorted(zip(*arrays))
         index = MultiIndex.from_tuples(tuples)
-        df = DataFrame(randn(4, 6), columns=index)
+        df = DataFrame(np.random.randn(4, 6), columns=index)
 
         result = df['a']
-        expected = df['a', '', '']
-        tm.assert_series_equal(result, expected, check_names=False)
-        assert result.name == 'a'
+        expected = df['a', '', ''].rename('a')
+        tm.assert_series_equal(result, expected)
 
         result = df['routine1', 'result1']
         expected = df['routine1', 'result1', '']
-        tm.assert_series_equal(result, expected, check_names=False)
-        assert result.name == ('routine1', 'result1')
+        expected = expected.rename(('routine1', 'result1'))
+        tm.assert_series_equal(result, expected)
 
     def test_mixed_depth_get_unicode_placeholders_py2(self):
-        # Note this is only different to test_mixed_depth_get() on Python 2
+        # Pull request #17099. This is only different to
+        # test_mixed_depth_get() on Python 2
         arrays = [[u('a'), u('top'), u('top'),
                    u('routine1'), u('routine1'), u('routine2')],
                   [u(''), u('OD'), u('OD'),
@@ -1704,17 +1704,16 @@ def test_mixed_depth_get_unicode_placeholders_py2(self):
 
         tuples = sorted(zip(*arrays))
         index = MultiIndex.from_tuples(tuples)
-        df = DataFrame(randn(4, 6), columns=index)
+        df = DataFrame(np.random.randn(4, 6), columns=index)
 
         result = df['a']
-        expected = df['a', '', '']
-        tm.assert_series_equal(result, expected, check_names=False)
-        assert result.name == 'a'
+        expected = df['a', '', ''].rename('a')
+        tm.assert_series_equal(result, expected)
 
         result = df['routine1', 'result1']
         expected = df['routine1', 'result1', '']
-        tm.assert_series_equal(result, expected, check_names=False)
-        assert result.name == ('routine1', 'result1')
+        expected = expected.rename(('routine1', 'result1'))
+        tm.assert_series_equal(result, expected)
 
     def test_mixed_depth_insert(self):
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],

From e7c9f97bb52cd9eb47602313981677d28756f9d1 Mon Sep 17 00:00:00 2001
From: Chris Billington <chrisjbillington@gmail.com>
Date: Sat, 29 Jul 2017 13:22:33 -0400
Subject: [PATCH 4/5] Dedup test, added descriptive comment to
 _getitem_multilevel().

---
 pandas/core/frame.py            |  7 +++++++
 pandas/tests/test_multilevel.py | 31 ++++++++++---------------------
 2 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 586d4318dd3df..d5f1a4845cd42 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -2110,6 +2110,13 @@ def _getitem_multilevel(self, key):
                 result = self._constructor(new_values, index=self.index,
                                            columns=result_columns)
                 result = result.__finalize__(self)
+
+            # If there is only one column being returned, and its name is
+            # either an empty string, or a tuple with an empty string as its
+            # first element, then treat the empty string as a placeholder
+            # and return the column as if the user had provided that empty
+            # string in the key. If the result is a Series, exclude the
+            # implied empty string from its name.
             if len(result.columns) == 1:
                 top = result.columns[0]
                 if isinstance(top, tuple):
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index 4c8976cc9324c..dca8fdef3f29f 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1675,11 +1675,18 @@ def test_int_series_slicing(self):
         expected = self.ymd.reindex(s.index[5:])
         tm.assert_frame_equal(result, expected)
 
-    def test_mixed_depth_get(self):
+    def test_mixed_depth_get(self, unicode_strings_py2=False):
+        # If unicode_strings_py2 is True, then the column labels in dataframe
+        # construction will use unicode strings in Python 2. In Python 3 they
+        # are unicode strings regardless.
+
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                   ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                   ['', 'wx', 'wy', '', '', '']]
 
+        if unicode_strings_py2:
+            arrays = [[u(s) for s in arr] for arr in arrays]
+
         tuples = sorted(zip(*arrays))
         index = MultiIndex.from_tuples(tuples)
         df = DataFrame(np.random.randn(4, 6), columns=index)
@@ -1694,26 +1701,8 @@ def test_mixed_depth_get(self):
         tm.assert_series_equal(result, expected)
 
     def test_mixed_depth_get_unicode_placeholders_py2(self):
-        # Pull request #17099. This is only different to
-        # test_mixed_depth_get() on Python 2
-        arrays = [[u('a'), u('top'), u('top'),
-                   u('routine1'), u('routine1'), u('routine2')],
-                  [u(''), u('OD'), u('OD'),
-                   u('result1'), u('result2'), u('result1')],
-                  [u(''), u('wx'), u('wy'), u(''), u(''), u('')]]
-
-        tuples = sorted(zip(*arrays))
-        index = MultiIndex.from_tuples(tuples)
-        df = DataFrame(np.random.randn(4, 6), columns=index)
-
-        result = df['a']
-        expected = df['a', '', ''].rename('a')
-        tm.assert_series_equal(result, expected)
-
-        result = df['routine1', 'result1']
-        expected = df['routine1', 'result1', '']
-        expected = expected.rename(('routine1', 'result1'))
-        tm.assert_series_equal(result, expected)
+        # Pull request #17099.
+        self.test_mixed_depth_get(unicode_strings_py2=True)
 
     def test_mixed_depth_insert(self):
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],

From f1edf68bb99c7d7c9afeb3a76e8ecd97e6be4199 Mon Sep 17 00:00:00 2001
From: Chris Billington <chrisjbillington@gmail.com>
Date: Thu, 10 Aug 2017 00:54:31 -0400
Subject: [PATCH 5/5] parametrize test, simplified whatnew note

---
 doc/source/whatsnew/v0.21.0.txt |  2 +-
 pandas/tests/test_multilevel.py | 15 ++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/doc/source/whatsnew/v0.21.0.txt b/doc/source/whatsnew/v0.21.0.txt
index ec3dad0c02b28..f601c4e8a321b 100644
--- a/doc/source/whatsnew/v0.21.0.txt
+++ b/doc/source/whatsnew/v0.21.0.txt
@@ -323,7 +323,7 @@ Indexing
 - Bug in reindexing on an empty ``CategoricalIndex`` (:issue:`16770`)
 - Fixes ``DataFrame.loc`` for setting with alignment and tz-aware ``DatetimeIndex`` (:issue:`16889`)
 - Avoids ``IndexError`` when passing an Index or Series to ``.iloc`` with older numpy (:issue:`17193`)
-- Fixes getting a column by name when unicode empty strings are used as placeholders in multilevel columns in Python 2 (:issue:`17099`)
+- Allow unicode empty strings as placeholders in multilevel columns in Python 2 (:issue:`17099`)
 
 I/O
 ^^^
diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py
index dca8fdef3f29f..a765e2c4ca1bf 100644
--- a/pandas/tests/test_multilevel.py
+++ b/pandas/tests/test_multilevel.py
@@ -1675,16 +1675,17 @@ def test_int_series_slicing(self):
         expected = self.ymd.reindex(s.index[5:])
         tm.assert_frame_equal(result, expected)
 
-    def test_mixed_depth_get(self, unicode_strings_py2=False):
-        # If unicode_strings_py2 is True, then the column labels in dataframe
-        # construction will use unicode strings in Python 2. In Python 3 they
-        # are unicode strings regardless.
+    @pytest.mark.parametrize('unicode_strings', [True, False])
+    def test_mixed_depth_get(self, unicode_strings):
+        # If unicode_strings is True, the column labels in dataframe
+        # construction will use unicode strings in Python 2 (pull request
+        # #17099).
 
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                   ['', 'OD', 'OD', 'result1', 'result2', 'result1'],
                   ['', 'wx', 'wy', '', '', '']]
 
-        if unicode_strings_py2:
+        if unicode_strings:
             arrays = [[u(s) for s in arr] for arr in arrays]
 
         tuples = sorted(zip(*arrays))
@@ -1700,10 +1701,6 @@ def test_mixed_depth_get(self, unicode_strings_py2=False):
         expected = expected.rename(('routine1', 'result1'))
         tm.assert_series_equal(result, expected)
 
-    def test_mixed_depth_get_unicode_placeholders_py2(self):
-        # Pull request #17099.
-        self.test_mixed_depth_get(unicode_strings_py2=True)
-
     def test_mixed_depth_insert(self):
         arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
                   ['', 'OD', 'OD', 'result1', 'result2', 'result1'],