diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 9281c51059087..959858fb50f89 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,4 +1,4 @@ - [ ] closes #xxxx - [ ] tests added / passed - - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` + - [ ] passes ``git diff upstream/master --name-only -- '*.py' | flake8 --diff`` (On Windows, ``git diff upstream/master -u -- "*.py" | flake8 --diff`` might work as an alternative.) - [ ] whatsnew entry diff --git a/doc/source/contributing.rst b/doc/source/contributing.rst index aacfe25b91564..cd444f796fabb 100644 --- a/doc/source/contributing.rst +++ b/doc/source/contributing.rst @@ -525,6 +525,12 @@ run this slightly modified command:: git diff master --name-only -- '*.py' | grep 'pandas/' | xargs flake8 +Note that on Windows, ``grep``, ``xargs``, and other tools are likely +unavailable. However, this has been shown to work on smaller commits in the +standard Windows command line:: + + git diff master -u -- "*.py" | flake8 --diff + Backwards Compatibility ~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v0.20.3.txt b/doc/source/whatsnew/v0.20.3.txt index 17195715d0f95..f8916f5464276 100644 --- a/doc/source/whatsnew/v0.20.3.txt +++ b/doc/source/whatsnew/v0.20.3.txt @@ -78,6 +78,7 @@ Sparse Reshaping ^^^^^^^^^ +- Bug in joining on a ``MultiIndex`` with a ``category`` dtype for a level (:issue:`16627`). Numeric diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index d9e0c218bfafc..d13636e8b43e2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -560,6 +560,9 @@ def take(self, indices, axis=0, allow_fill=True, na_value=-1) return self._create_from_codes(taken) + def is_dtype_equal(self, other): + return self._data.is_dtype_equal(other) + take_nd = take def map(self, mapper): diff --git a/pandas/tests/test_join.py b/pandas/tests/test_join.py index 3fc13d23b53f7..cde1cab37d09c 100644 --- a/pandas/tests/test_join.py +++ b/pandas/tests/test_join.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- import numpy as np -from pandas import Index +from pandas import Index, DataFrame, Categorical, merge from pandas._libs import join as _join import pandas.util.testing as tm -from pandas.util.testing import assert_almost_equal +from pandas.util.testing import assert_almost_equal, assert_frame_equal class TestIndexer(object): @@ -192,3 +192,43 @@ def test_inner_join_indexer2(): exp_ridx = np.array([0, 1, 2, 3], dtype=np.int64) assert_almost_equal(ridx, exp_ridx) + + +def test_merge_join_categorical_multiindex(): + # From issue 16627 + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['a', 'b', 'c']), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['a', 'b', 'c']), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result) + + # Same test, but with ordered categorical + a = {'Cat1': Categorical(['a', 'b', 'a', 'c', 'a', 'b'], + ['b', 'a', 'c'], + ordered=True), + 'Int1': [0, 1, 0, 1, 0, 0]} + a = DataFrame(a) + + b = {'Cat': Categorical(['a', 'b', 'c', 'a', 'b', 'c'], + ['b', 'a', 'c'], + ordered=True), + 'Int': [0, 0, 0, 1, 1, 1], + 'Factor': [1.1, 1.2, 1.3, 1.4, 1.5, 1.6]} + b = DataFrame(b).set_index(['Cat', 'Int'])['Factor'] + + expected = merge(a, b.reset_index(), left_on=['Cat1', 'Int1'], + right_on=['Cat', 'Int'], how='left') + result = a.join(b, on=['Cat1', 'Int1']) + expected = expected.drop(['Cat', 'Int'], axis=1) + assert_frame_equal(expected, result)