From b35582e4f92d277bb7f8edaba5bea20788d9752a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:05:57 -0400 Subject: [PATCH] fix(ir): avoid deduplicating filters based solely on their name (#9476) This PR fixes an issue where we were using `unwrap_aliases` to deduplicate and enforce expressions by their name in `filter`s. That approach makes sense in the context of projections, but the name of predicate expressions in a `filter` is irrelevant. The solution was to continue to unwrap aliases but avoid deduplicating expressions based on their name, and do it solely on the hash value of expressions. Closes #9474. --- ibis/expr/tests/test_newrels.py | 11 +++++++++++ ibis/expr/types/relations.py | 17 +++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 5e17023a479e..e92def62957e 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1737,3 +1737,14 @@ def test_projections_with_different_field_order_are_unequal(): t2 = t.select(b=2, a=1) assert not t1.equals(t2) + + +def test_filters_are_allowed_to_have_the_same_name(): + t = ibis.table({"a": "int64", "b": "string"}, name="t") + f1 = t.filter(t.a > 1, t.a > 1) + f2 = t.filter(t.a > 1) + f3 = t.filter((t.a > 1).name("a")) + f4 = t.filter((t.a > 1).name("a"), (t.a > 1).name("b")) + assert f1.equals(f2) + assert f1.equals(f3) + assert f1.equals(f4) diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index cb6e4fc9c395..4d80735888e8 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -2542,8 +2542,21 @@ def filter( from ibis.expr.rewrites import flatten_predicates, rewrite_filter_input preds = self.bind(*predicates) - preds = unwrap_aliases(preds) - preds = flatten_predicates(list(preds.values())) + + # we can't use `unwrap_aliases` here because that function + # deduplicates based on name alone + # + # it's perfectly valid to repeat a filter, even if it might be + # useless, so enforcing uniquely named expressions here doesn't make + # sense + # + # instead, compute all distinct unaliased predicates + result = toolz.unique( + node.arg if isinstance(node := value.op(), ops.Alias) else node + for value in preds + ) + + preds = flatten_predicates(list(result)) preds = list(map(rewrite_filter_input, preds)) if not preds: raise com.IbisInputError("You must pass at least one predicate to filter")