Fix multi-index with categorical values. (#3860)

* Fix bug for multi-index with categorical values. See issue #3674. * Blacked. * Add line in whats-new.rst. * Remove forgotten print. Co-authored-by: Matthieu Ancellin <matthieu.ancellin@ens-paris-saclay.fr>
pydata · Mar 13, 2020 · ae03616 · ae03616
1 parent 0d95eba
commit ae03616
Show file tree

Hide file tree

Showing 3 changed files with 16 additions and 0 deletions.
diff --git a/doc/whats-new.rst b/doc/whats-new.rst
@@ -58,6 +58,9 @@ Bug fixes
   indexed variable (:issue:`3252`).
   By `David Huard <https://github.com/huard>`_.
 
+
+- Fix use of multi-index with categorical values (:issue:`3674`).
+  By `Matthieu Ancellin <https://github.com/mancellin>`_.
 - Fix alignment with ``join="override"`` when some dimensions are unindexed. (:issue:`3681`).
   By `Deepak Cherian <https://github.com/dcherian>`_.
 - Fix :py:meth:`Dataset.swap_dims` and :py:meth:`DataArray.swap_dims` producing

diff --git a/xarray/core/indexes.py b/xarray/core/indexes.py
@@ -22,6 +22,8 @@ def remove_unused_levels_categories(index):
             for i, level in enumerate(index.levels):
                 if isinstance(level, pd.CategoricalIndex):
                     level = level[index.codes[i]].remove_unused_categories()
+                else:
+                    level = level[index.codes[i]]
                 levels.append(level)
             index = pd.MultiIndex.from_arrays(levels, names=index.names)
     elif isinstance(index, pd.CategoricalIndex):

diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
@@ -1458,6 +1458,17 @@ def test_categorical_reindex(self):
         actual = ds.reindex(cat=["foo"])["cat"].values
         assert (actual == np.array(["foo"])).all()
 
+    def test_categorical_multiindex(self):
+        i1 = pd.Series([0, 0])
+        cat = pd.CategoricalDtype(categories=["foo", "baz", "bar"])
+        i2 = pd.Series(["baz", "bar"], dtype=cat)
+
+        df = pd.DataFrame({"i1": i1, "i2": i2, "values": [1, 2]}).set_index(
+            ["i1", "i2"]
+        )
+        actual = df.to_xarray()
+        assert actual["values"].shape == (1, 2)
+
     def test_sel_drop(self):
         data = Dataset({"foo": ("x", [1, 2, 3])}, {"x": [0, 1, 2]})
         expected = Dataset({"foo": 1})