Skip to content

Commit

Permalink
FEAT-modin-project#2013: More accurate handling of broken edge cases …
Browse files Browse the repository at this point in the history
…in Pandas.

Signed-off-by: Itamar Turner-Trauring <itamar@itamarst.org>
  • Loading branch information
itamarst committed Nov 24, 2020
1 parent 6ea4a36 commit a62c908
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 5 deletions.
17 changes: 12 additions & 5 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,20 +161,27 @@ def merge_asof(

ErrorMessage.default_to_pandas("`merge_asof`")

# As of Pandas 1.2 these should raise an error; before that it did
# something likely random:
if (
(on and (left_index or right_index))
or (left_on and left_index)
or (right_on and right_index)
):
raise ValueError("Can't combine left/right_index with left/right_on or on.")

# Pandas fallbacks for tricky cases:
if (
# No idea how this works or why it does what it does:
# No idea how this works or why it does what it does; and in fact
# there's a Pandas bug suggesting it's wrong:
# https://github.com/pandas-dev/pandas/issues/33463
(left_index and right_on is not None)
# This is the case where by is a list of columns. If we're copying lots
# of columns out of Pandas, maybe not worth trying our path, it's not
# clear it's any better:
or not isinstance(by, (str, type(None)))
or not isinstance(left_by, (str, type(None)))
or not isinstance(right_by, (str, type(None)))
# What does this even mean?! Pandas does _something_...
or (on and (left_index or right_index))
or (left_on and left_index)
or (right_on and right_index)
):
if isinstance(right, DataFrame):
right = to_pandas(right)
Expand Down
14 changes: 14 additions & 0 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,20 @@ def test_merge_asof_bad_arguments():
with pytest.raises(ValueError):
pd.merge_asof(modin_left, modin_right, on="a", right_on="can't do with by")

# Can't mix left_index with left_on or on, similarly for right.
with pytest.raises(ValueError):
pd.merge_asof(modin_left, modin_right, on="a", right_index=True)
with pytest.raises(ValueError):
pd.merge_asof(
modin_left, modin_right, left_on="a", right_on="a", right_index=True
)
with pytest.raises(ValueError):
pd.merge_asof(modin_left, modin_right, on="a", left_index=True)
with pytest.raises(ValueError):
pd.merge_asof(
modin_left, modin_right, left_on="a", right_on="a", left_index=True
)

# Need both left and right
with pytest.raises(Exception): # Pandas bug, didn't validate inputs sufficiently
pandas.merge_asof(pandas_left, pandas_right, left_on="a")
Expand Down

0 comments on commit a62c908

Please sign in to comment.