diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..bb8c059d70b60 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -231,6 +231,7 @@ Other enhancements - :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`) - :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`) - :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`) +- Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`) .. _whatsnew_120.api_breaking.python: diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 978597e3c7686..aa883d518f8d1 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1203,11 +1203,9 @@ def _validate_specification(self): if self.left_index and self.right_index: self.left_on, self.right_on = (), () elif self.left_index: - if self.right_on is None: - raise MergeError("Must pass right_on or right_index=True") + raise MergeError("Must pass right_on or right_index=True") elif self.right_index: - if self.left_on is None: - raise MergeError("Must pass left_on or left_index=True") + raise MergeError("Must pass left_on or left_index=True") else: # use the common columns common_cols = self.left.columns.intersection(self.right.columns) @@ -1228,8 +1226,19 @@ def _validate_specification(self): 'Can only pass argument "on" OR "left_on" ' 'and "right_on", not a combination of both.' ) + if self.left_index or self.right_index: + raise MergeError( + 'Can only pass argument "on" OR "left_index" ' + 'and "right_index", not a combination of both.' + ) self.left_on = self.right_on = self.on elif self.left_on is not None: + if self.left_index: + raise MergeError( + 'Can only pass argument "left_on" OR "left_index" not both.' + ) + if not self.right_index and self.right_on is None: + raise MergeError('Must pass "right_on" OR "right_index".') n = len(self.left_on) if self.right_index: if len(self.left_on) != self.right.index.nlevels: @@ -1239,6 +1248,12 @@ def _validate_specification(self): ) self.right_on = [None] * n elif self.right_on is not None: + if self.right_index: + raise MergeError( + 'Can only pass argument "right_on" OR "right_index" not both.' + ) + if not self.left_index and self.left_on is None: + raise MergeError('Must pass "left_on" OR "left_index".') n = len(self.right_on) if self.left_index: if len(self.right_on) != self.left.index.nlevels: diff --git a/pandas/tests/reshape/merge/test_merge.py b/pandas/tests/reshape/merge/test_merge.py index a58372040c7f3..999b827fe0571 100644 --- a/pandas/tests/reshape/merge/test_merge.py +++ b/pandas/tests/reshape/merge/test_merge.py @@ -2283,3 +2283,57 @@ def test_merge_join_categorical_multiindex(): expected = expected.drop(["Cat", "Int"], axis=1) result = a.join(b, on=["Cat1", "Int1"]) tm.assert_frame_equal(expected, result) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a", "left_index": True}, ["left_on", "left_index"]), + ({"right_on": "a", "right_index": True}, ["right_on", "right_index"]), + ], +) +def test_merge_join_cols_error_reporting_duplicates(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Can only pass argument "{err_msg[0]}" OR "{err_msg[1]}" not both\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + ("kwargs", "err_msg"), + [ + ({"left_on": "a"}, ["right_on", "right_index"]), + ({"right_on": "a"}, ["left_on", "left_index"]), + ], +) +def test_merge_join_cols_error_reporting_missing(func, kwargs, err_msg): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = rf'Must pass "{err_msg[0]}" OR "{err_msg[1]}"\.' + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, **kwargs) + + +@pytest.mark.parametrize("func", ["merge", "merge_asof"]) +@pytest.mark.parametrize( + "kwargs", + [ + {"right_index": True}, + {"left_index": True}, + ], +) +def test_merge_join_cols_error_reporting_on_and_index(func, kwargs): + # GH: 16228 + left = DataFrame({"a": [1, 2], "b": [3, 4]}) + right = DataFrame({"a": [1, 1], "c": [5, 6]}) + msg = ( + r'Can only pass argument "on" OR "left_index" ' + r'and "right_index", not a combination of both\.' + ) + with pytest.raises(MergeError, match=msg): + getattr(pd, func)(left, right, on="a", **kwargs)