Skip to content

Commit

Permalink
FEAT-modin-project#2013: Support for on and index variations.
Browse files Browse the repository at this point in the history
Signed-off-by: Itamar Turner-Trauring <itamar@itamarst.org>
  • Loading branch information
itamarst committed Nov 4, 2020
1 parent 1e69546 commit 76e2c9a
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 7 deletions.
29 changes: 26 additions & 3 deletions modin/pandas/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,29 @@ def merge_asof(

ErrorMessage.default_to_pandas("`merge_asof`")

if left_index and right_on:
# No idea how this works, fall back to Pandas slow path.
if isinstance(right, DataFrame):
right = to_pandas(right)
return DataFrame(
pandas.merge_asof(
to_pandas(left),
right,
on=on,
left_on=left_on,
right_on=right_on,
left_index=left_index,
right_index=right_index,
by=by,
left_by=left_by,
right_by=right_by,
suffixes=suffixes,
tolerance=tolerance,
allow_exact_matches=allow_exact_matches,
direction=direction,
)
)

left_column = None
right_column = None

Expand Down Expand Up @@ -190,7 +213,6 @@ def merge_asof(

# Working sketch of the new proposed algorithm. Currently just supports
# "on".
# TODO support left_on/right_on/left_index/right_index
# TODO support suffixes
# TODO what does "by" do?

Expand Down Expand Up @@ -221,15 +243,15 @@ def merge_asof(
right_subset.index = left.index

# 4. Merge left and the new shrunken right:
# 4. Merge left and the new shrunken right: TODO maybe instead of merge we
# just want to concatenate, given all the rows line up?
result = merge(
left,
right_subset,
left_index=True,
right_index=True,
how="left",
)

# 5. Clean up to match Pandas output:
if left_on is not None and right_index:
result.insert(
list(result.columns).index(left_on + suffixes[0]),
Expand All @@ -238,6 +260,7 @@ def merge_asof(
)
if not left_index and not right_index:
result.index = pandas.RangeIndex(start=0, stop=len(result))

return result


Expand Down
5 changes: 1 addition & 4 deletions modin/pandas/test/test_general.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def test_merge_asof_on_variations():
# Simplest possible test, just to try out the basic approach
left = {"a": [1, 5, 10], "left_val": ["a", "b", "c"]}
left_index = [6, 8, 12]
right = {"a": [1, 2, 3, 6, 7], "right_val": [1, 2, 3, 6, 7]}
right = {"a": [1, 2, 3, 6, 7], "right_val": ["d", "e", "f", "g", "h"]}
right_index = [6, 7, 8, 9, 15]
pandas_left, pandas_right = (
pandas.DataFrame(left, index=left_index),
Expand All @@ -239,9 +239,6 @@ def test_merge_asof_on_variations():
]:
pandas_merged = pandas.merge_asof(pandas_left, pandas_right, **on_arguments)
modin_merged = pd.merge_asof(modin_left, modin_right, **on_arguments)
print(on_arguments)
print(pandas_merged)
print(modin_merged)
df_equals(pandas_merged, modin_merged)


Expand Down

0 comments on commit 76e2c9a

Please sign in to comment.