From 762b5600bbc282d6e2f5e739c5cfaa7123b72614 Mon Sep 17 00:00:00 2001 From: mfatihaktas Date: Tue, 19 Mar 2024 16:45:18 -0400 Subject: [PATCH] refactor(api): remove `by` of asof_join() in favor of `predicates` --- ibis/backends/dask/tests/test_join.py | 6 ++++-- ibis/backends/pandas/tests/test_join.py | 4 ++-- ibis/backends/tests/test_asof_join.py | 2 +- ibis/expr/types/joins.py | 3 +-- ibis/expr/types/relations.py | 7 +------ ibis/tests/expr/test_table.py | 4 ++-- 6 files changed, 11 insertions(+), 15 deletions(-) diff --git a/ibis/backends/dask/tests/test_join.py b/ibis/backends/dask/tests/test_join.py index 13900659ca06..75b1235d5182 100644 --- a/ibis/backends/dask/tests/test_join.py +++ b/ibis/backends/dask/tests/test_join.py @@ -229,7 +229,7 @@ def test_asof_join(time_left, time_right, time_df1, time_df2): def test_keyed_asof_join( time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 ): - expr = time_keyed_left.asof_join(time_keyed_right, "time", by="key")[ + expr = time_keyed_left.asof_join(time_keyed_right, "time", predicates="key")[ time_keyed_left, time_keyed_right.other_value ] result = expr.compile() @@ -254,7 +254,9 @@ def test_asof_join_overlapping_non_predicate( time_keyed_df1.assign(collide=time_keyed_df1["key"] + time_keyed_df1["value"]) time_keyed_df2.assign(collide=time_keyed_df2["key"] + time_keyed_df2["other_value"]) - expr = time_keyed_left.asof_join(time_keyed_right, on="time", by=[("key", "key")]) + expr = time_keyed_left.asof_join( + time_keyed_right, on="time", predicates=[("key", "key")] + ) result = expr.compile() expected = dd.merge_asof( time_keyed_df1, time_keyed_df2, on="time", by="key", suffixes=("", "_right") diff --git a/ibis/backends/pandas/tests/test_join.py b/ibis/backends/pandas/tests/test_join.py index 711da76f954c..cddedbfebb3b 100644 --- a/ibis/backends/pandas/tests/test_join.py +++ b/ibis/backends/pandas/tests/test_join.py @@ -333,7 +333,7 @@ def test_asof_join_predicate(time_left, time_right, time_df1, time_df2): def test_keyed_asof_join( time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 ): - expr = time_keyed_left.asof_join(time_keyed_right, "time", by="key") + expr = time_keyed_left.asof_join(time_keyed_right, "time", predicates="key") expr = expr.select(time_keyed_left, time_keyed_right.other_value) result = expr.execute() expected = pd.merge_asof(time_keyed_df1, time_keyed_df2, on="time", by="key") @@ -345,7 +345,7 @@ def test_keyed_asof_join_with_tolerance( time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2 ): expr = time_keyed_left.asof_join( - time_keyed_right, "time", by="key", tolerance=2 * ibis.interval(days=1) + time_keyed_right, "time", predicates="key", tolerance=2 * ibis.interval(days=1) ) result = expr.execute() expected = pd.merge_asof( diff --git a/ibis/backends/tests/test_asof_join.py b/ibis/backends/tests/test_asof_join.py index 5f2d1ac067c2..ffc16df9b908 100644 --- a/ibis/backends/tests/test_asof_join.py +++ b/ibis/backends/tests/test_asof_join.py @@ -152,7 +152,7 @@ def test_keyed_asof_join_with_tolerance( ): on = op(time_keyed_left["time"], time_keyed_right["time"]) expr = time_keyed_left.asof_join( - time_keyed_right, on=on, by="key", tolerance=ibis.interval(days=2) + time_keyed_right, on=on, predicates="key", tolerance=ibis.interval(days=2) ) result = con.execute(expr) diff --git a/ibis/expr/types/joins.py b/ibis/expr/types/joins.py index 6e3021780507..71a8b11ab061 100644 --- a/ibis/expr/types/joins.py +++ b/ibis/expr/types/joins.py @@ -335,13 +335,12 @@ def asof_join( right: Table, on, predicates=(), - by=(), tolerance=None, *, lname: str = "", rname: str = "{name}_right", ): - predicates = util.promote_list(predicates) + util.promote_list(by) + predicates = util.promote_list(predicates) if tolerance is not None: # `tolerance` parameter is mimicking the pandas API, but we express # it at the expression level by a sequence of operations: diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index 9fb7e1a18c0c..f839ce88f2a3 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -3043,7 +3043,6 @@ def asof_join( right: Table, on: str | ir.BooleanColumn, predicates: str | ir.Column | Sequence[str | ir.Column] = (), - by: str | ir.Column | Sequence[str | ir.Column] = (), tolerance: str | ir.IntervalScalar | None = None, *, lname: str = "", @@ -3054,8 +3053,6 @@ def asof_join( Similar to a left join except that the match is done on nearest key rather than equal keys. - Optionally, match keys with `by` before joining with `predicates`. - Parameters ---------- left @@ -3066,8 +3063,6 @@ def asof_join( Closest match inequality condition predicates Additional join predicates - by - Additional equality join predicates tolerance Amount of time to look behind when joining lname @@ -3085,7 +3080,7 @@ def asof_join( from ibis.expr.types.joins import Join return Join(left.op()).asof_join( - right, on, predicates, by=by, tolerance=tolerance, lname=lname, rname=rname + right, on, predicates, tolerance=tolerance, lname=lname, rname=rname ) def cross_join( diff --git a/ibis/tests/expr/test_table.py b/ibis/tests/expr/test_table.py index 961c330024d8..2024f5f584fb 100644 --- a/ibis/tests/expr/test_table.py +++ b/ibis/tests/expr/test_table.py @@ -984,7 +984,7 @@ def test_asof_join_with_by(): ) assert join_without_by.op() == expected - join_with_by = api.asof_join(left, right, "time", by="key") + join_with_predicates = api.asof_join(left, right, "time", predicates="key") with join_tables(left, right) as (r1, r2): expected = ops.JoinChain( first=r1, @@ -1000,7 +1000,7 @@ def test_asof_join_with_by(): "value2": r2.value2, }, ) - assert join_with_by.op() == expected + assert join_with_predicates.op() == expected @pytest.mark.parametrize(