Skip to content

Commit

Permalink
fix(python): Fix by argument handling in join_asof (#10447)
Browse files Browse the repository at this point in the history
  • Loading branch information
zundertj authored Aug 13, 2023
1 parent bdaad51 commit befaa30
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 12 deletions.
22 changes: 10 additions & 12 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3004,18 +3004,16 @@ def join_asof(
if left_on is None or right_on is None:
raise ValueError("You should pass the column to join on as an argument.")

by_left_: Sequence[str] | None
by_left_ = [by_left] if isinstance(by_left, str) else by_left

by_right_: Sequence[str] | None
by_right_ = [by_right] if isinstance(by_right, (str, pl.Expr)) else by_right

if isinstance(by, str):
by_left_ = [by]
by_right_ = [by]
elif isinstance(by, list):
by_left_ = by
by_right_ = by
if by is not None:
by_left_ = [by] if isinstance(by, str) else by
by_right_ = by_left_
elif (by_left is not None) and (by_right is not None):
by_left_ = [by_left] if isinstance(by_left, str) else by_left
by_right_ = [by_right] if isinstance(by_right, str) else by_right
else:
# no by
by_left_ = None
by_right_ = None

tolerance_str: str | None = None
tolerance_num: float | int | None = None
Expand Down
35 changes: 35 additions & 0 deletions py-polars/tests/unit/operations/test_join_asof.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,3 +557,38 @@ def test_asof_join_string_err() -> None:
).sort("date_str")
with pytest.raises(pl.InvalidOperationError):
left.join_asof(right, on="date_str")


def test_join_asof_by_argument_parsing() -> None:
df1 = pl.DataFrame(
{
"n": [10, 20, 30, 40, 50, 60],
"id1": [0, 0, 3, 3, 5, 5],
"id2": [1, 2, 1, 2, 1, 2],
"x": ["a", "b", "c", "d", "e", "f"],
}
).sort(by="n")

df2 = pl.DataFrame(
{
"n": [25, 8, 5, 23, 15, 35],
"id1": [0, 0, 3, 3, 5, 5],
"id2": [1, 2, 1, 2, 1, 2],
"y": ["A", "B", "C", "D", "E", "F"],
}
).sort(by="n")

# any sequency for by argument is allowed, so we should see the same results here
by_list = df1.join_asof(df2, on="n", by=["id1", "id2"])
by_tuple = df1.join_asof(df2, on="n", by=("id1", "id2"))
assert_frame_equal(by_list, by_tuple)

# same for using the by_left and by_right kwargs
by_list2 = df1.join_asof(
df2, on="n", by_left=["id1", "id2"], by_right=["id1", "id2"]
)
by_tuple2 = df1.join_asof(
df2, on="n", by_left=("id1", "id2"), by_right=("id1", "id2")
)
assert_frame_equal(by_list2, by_list)
assert_frame_equal(by_tuple2, by_list)

0 comments on commit befaa30

Please sign in to comment.