From 89056b9bbf14bf5f46149addaa778b4c917082ba Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 9 Jan 2024 04:34:52 -0500 Subject: [PATCH] fix(backends): preserve `order_by` position in window function when subsequent expressions are duplicated (#7943) ## Description of changes This PR fixes an issue where duplicate window expressions were given the position of the new (duplicated value) of the expression, rather than preserving the original order. This was because we were iterating over the new keys first and putting those expressions into a `dict`. BY iterating over the old keys first, we preserve the original order. ## Issues closed Closes #7940. --- ibis/backends/tests/test_window.py | 33 ++++++++++++++++++++++++++++++ ibis/expr/analysis.py | 9 +++++++- ibis/expr/types/generic.py | 1 - 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/ibis/backends/tests/test_window.py b/ibis/backends/tests/test_window.py index 25890c530f97..b97cde5ab36d 100644 --- a/ibis/backends/tests/test_window.py +++ b/ibis/backends/tests/test_window.py @@ -1251,3 +1251,36 @@ def test_rank_followed_by_over_call_merge_frames(backend, alltypes, df): ) backend.assert_series_equal(result, expected) + + +@pytest.mark.notyet( + ["pandas", "dask"], + reason="multiple ordering keys in a window function not supported for ranking", + raises=ValueError, +) +@pytest.mark.notyet( + ["mssql"], + reason="IS NULL not valid syntax for mssql", + raises=sa.exc.ProgrammingError, +) +@pytest.mark.notimpl(["polars"], raises=com.OperationNotDefinedError) +@pytest.mark.notyet(["flink"], raises=com.UnsupportedOperationError) +@pytest.mark.broken( + ["pyspark"], reason="pyspark requires CURRENT ROW", raises=PySparkAnalysisException +) +def test_ordering_order(con): + table = ibis.memtable({"bool_col": [True, False, False, None, True]}) + window = ibis.window( + order_by=[ + ibis.asc(table["bool_col"].isnull()), + ibis.asc(table["bool_col"]), + ], + ) + expr = table.select( + rank=table["bool_col"].rank().over(window), + bool_col=table["bool_col"], + ) + + result = con.execute(expr) + value = result.bool_col.loc[result["rank"] == 4].item() + assert pd.isna(value) diff --git a/ibis/expr/analysis.py b/ibis/expr/analysis.py index d086a7c827e3..386ede17090f 100644 --- a/ibis/expr/analysis.py +++ b/ibis/expr/analysis.py @@ -182,7 +182,14 @@ def merge_windows(_, default_frame): group_by = tuple(toolz.unique(_.frame.group_by + default_frame.group_by)) order_by = {} - for sort_key in _.frame.order_by + default_frame.order_by: + # iterate in the order of the existing keys followed by the new keys + # + # this allows duplicates to be overridden with no effect on the original + # position + # + # see https://github.com/ibis-project/ibis/issues/7940 for how this + # originally manifested + for sort_key in default_frame.order_by + _.frame.order_by: order_by[sort_key.expr] = sort_key.ascending order_by = tuple(ops.SortKey(k, v) for k, v in order_by.items()) diff --git a/ibis/expr/types/generic.py b/ibis/expr/types/generic.py index 70a6c62eaa4e..0f4ff974389c 100644 --- a/ibis/expr/types/generic.py +++ b/ibis/expr/types/generic.py @@ -765,7 +765,6 @@ def bind(table): ) return expr - op = self.op() if isinstance(window, bl.WindowBuilder): if table := an.find_first_base_table(self.op()): return bind(table)