From b35582e4f92d277bb7f8edaba5bea20788d9752a Mon Sep 17 00:00:00 2001
From: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
Date: Mon, 1 Jul 2024 09:05:57 -0400
Subject: [PATCH] fix(ir): avoid deduplicating filters based solely on their
 name (#9476)

This PR fixes an issue where we were using `unwrap_aliases` to
deduplicate and
enforce expressions by their name in `filter`s.

That approach makes sense in the context of projections, but the name of
predicate expressions in a `filter` is irrelevant.

The solution was to continue to unwrap aliases but avoid deduplicating
expressions based on their name, and do it solely on the hash value of
expressions.

Closes #9474.
---
 ibis/expr/tests/test_newrels.py | 11 +++++++++++
 ibis/expr/types/relations.py    | 17 +++++++++++++++--
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py
index 5e17023a479e..e92def62957e 100644
--- a/ibis/expr/tests/test_newrels.py
+++ b/ibis/expr/tests/test_newrels.py
@@ -1737,3 +1737,14 @@ def test_projections_with_different_field_order_are_unequal():
     t2 = t.select(b=2, a=1)
 
     assert not t1.equals(t2)
+
+
+def test_filters_are_allowed_to_have_the_same_name():
+    t = ibis.table({"a": "int64", "b": "string"}, name="t")
+    f1 = t.filter(t.a > 1, t.a > 1)
+    f2 = t.filter(t.a > 1)
+    f3 = t.filter((t.a > 1).name("a"))
+    f4 = t.filter((t.a > 1).name("a"), (t.a > 1).name("b"))
+    assert f1.equals(f2)
+    assert f1.equals(f3)
+    assert f1.equals(f4)
diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py
index cb6e4fc9c395..4d80735888e8 100644
--- a/ibis/expr/types/relations.py
+++ b/ibis/expr/types/relations.py
@@ -2542,8 +2542,21 @@ def filter(
         from ibis.expr.rewrites import flatten_predicates, rewrite_filter_input
 
         preds = self.bind(*predicates)
-        preds = unwrap_aliases(preds)
-        preds = flatten_predicates(list(preds.values()))
+
+        # we can't use `unwrap_aliases` here because that function
+        # deduplicates based on name alone
+        #
+        # it's perfectly valid to repeat a filter, even if it might be
+        # useless, so enforcing uniquely named expressions here doesn't make
+        # sense
+        #
+        # instead, compute all distinct unaliased predicates
+        result = toolz.unique(
+            node.arg if isinstance(node := value.op(), ops.Alias) else node
+            for value in preds
+        )
+
+        preds = flatten_predicates(list(result))
         preds = list(map(rewrite_filter_input, preds))
         if not preds:
             raise com.IbisInputError("You must pass at least one predicate to filter")