Skip to content

Commit

Permalink
ENH: Improve error reporting for wrong merge cols (#37547)
Browse files Browse the repository at this point in the history
  • Loading branch information
phofl authored Nov 8, 2020
1 parent 5fd478d commit 0755915
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 4 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,7 @@ Other enhancements
- :class:`Window` now supports all Scipy window types in ``win_type`` with flexible keyword argument support (:issue:`34556`)
- :meth:`testing.assert_index_equal` now has a ``check_order`` parameter that allows indexes to be checked in an order-insensitive manner (:issue:`37478`)
- :func:`read_csv` supports memory-mapping for compressed files (:issue:`37621`)
- Improve error reporting for :meth:`DataFrame.merge()` when invalid merge column definitions were given (:issue:`16228`)

.. _whatsnew_120.api_breaking.python:

Expand Down
23 changes: 19 additions & 4 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -1203,11 +1203,9 @@ def _validate_specification(self):
if self.left_index and self.right_index:
self.left_on, self.right_on = (), ()
elif self.left_index:
if self.right_on is None:
raise MergeError("Must pass right_on or right_index=True")
raise MergeError("Must pass right_on or right_index=True")
elif self.right_index:
if self.left_on is None:
raise MergeError("Must pass left_on or left_index=True")
raise MergeError("Must pass left_on or left_index=True")
else:
# use the common columns
common_cols = self.left.columns.intersection(self.right.columns)
Expand All @@ -1228,8 +1226,19 @@ def _validate_specification(self):
'Can only pass argument "on" OR "left_on" '
'and "right_on", not a combination of both.'
)
if self.left_index or self.right_index:
raise MergeError(
'Can only pass argument "on" OR "left_index" '
'and "right_index", not a combination of both.'
)
self.left_on = self.right_on = self.on
elif self.left_on is not None:
if self.left_index:
raise MergeError(
'Can only pass argument "left_on" OR "left_index" not both.'
)
if not self.right_index and self.right_on is None:
raise MergeError('Must pass "right_on" OR "right_index".')
n = len(self.left_on)
if self.right_index:
if len(self.left_on) != self.right.index.nlevels:
Expand All @@ -1239,6 +1248,12 @@ def _validate_specification(self):
)
self.right_on = [None] * n
elif self.right_on is not None:
if self.right_index:
raise MergeError(
'Can only pass argument "right_on" OR "right_index" not both.'
)
if not self.left_index and self.left_on is None:
raise MergeError('Must pass "left_on" OR "left_index".')
n = len(self.right_on)
if self.left_index:
if len(self.right_on) != self.left.index.nlevels:
Expand Down
54 changes: 54 additions & 0 deletions pandas/tests/reshape/merge/test_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -2283,3 +2283,57 @@ def test_merge_join_categorical_multiindex():
expected = expected.drop(["Cat", "Int"], axis=1)
result = a.join(b, on=["Cat1", "Int1"])
tm.assert_frame_equal(expected, result)


@pytest.mark.parametrize("func", ["merge", "merge_asof"])
@pytest.mark.parametrize(
("kwargs", "err_msg"),
[
({"left_on": "a", "left_index": True}, ["left_on", "left_index"]),
({"right_on": "a", "right_index": True}, ["right_on", "right_index"]),
],
)
def test_merge_join_cols_error_reporting_duplicates(func, kwargs, err_msg):
# GH: 16228
left = DataFrame({"a": [1, 2], "b": [3, 4]})
right = DataFrame({"a": [1, 1], "c": [5, 6]})
msg = rf'Can only pass argument "{err_msg[0]}" OR "{err_msg[1]}" not both\.'
with pytest.raises(MergeError, match=msg):
getattr(pd, func)(left, right, **kwargs)


@pytest.mark.parametrize("func", ["merge", "merge_asof"])
@pytest.mark.parametrize(
("kwargs", "err_msg"),
[
({"left_on": "a"}, ["right_on", "right_index"]),
({"right_on": "a"}, ["left_on", "left_index"]),
],
)
def test_merge_join_cols_error_reporting_missing(func, kwargs, err_msg):
# GH: 16228
left = DataFrame({"a": [1, 2], "b": [3, 4]})
right = DataFrame({"a": [1, 1], "c": [5, 6]})
msg = rf'Must pass "{err_msg[0]}" OR "{err_msg[1]}"\.'
with pytest.raises(MergeError, match=msg):
getattr(pd, func)(left, right, **kwargs)


@pytest.mark.parametrize("func", ["merge", "merge_asof"])
@pytest.mark.parametrize(
"kwargs",
[
{"right_index": True},
{"left_index": True},
],
)
def test_merge_join_cols_error_reporting_on_and_index(func, kwargs):
# GH: 16228
left = DataFrame({"a": [1, 2], "b": [3, 4]})
right = DataFrame({"a": [1, 1], "c": [5, 6]})
msg = (
r'Can only pass argument "on" OR "left_index" '
r'and "right_index", not a combination of both\.'
)
with pytest.raises(MergeError, match=msg):
getattr(pd, func)(left, right, on="a", **kwargs)

0 comments on commit 0755915

Please sign in to comment.