From cd9a34ce7643fcdaf1e8f8166b968fd8a175308a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Wed, 27 Jul 2022 05:57:58 -0400 Subject: [PATCH] feat(api): add `distinct` to `Intersection` and `Difference` operations --- ibis/expr/operations/relations.py | 7 ++++--- ibis/expr/types/relations.py | 15 +++++++++------ 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index bb7e5691de70..58cb57f97ba5 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -254,13 +254,14 @@ def __init__(self, left, right, by, predicates, **kwargs): class SetOp(TableNode, sch.HasSchema): left = rlz.table right = rlz.table + distinct = rlz.optional(rlz.instance_of(bool), default=False) - def __init__(self, left, right, **kwargs): + def __init__(self, left, right, distinct: bool, **kwargs): if not left.schema().equals(right.schema()): raise com.RelationError( 'Table schemas must be equal for set operations' ) - super().__init__(left=left, right=right, **kwargs) + super().__init__(left=left, right=right, distinct=distinct, **kwargs) @property def schema(self): @@ -272,7 +273,7 @@ def blocks(self): @public class Union(SetOp): - distinct = rlz.optional(rlz.instance_of(bool), default=False) + pass @public diff --git a/ibis/expr/types/relations.py b/ibis/expr/types/relations.py index f107c5d3c857..25d96dc16c6e 100644 --- a/ibis/expr/types/relations.py +++ b/ibis/expr/types/relations.py @@ -292,7 +292,7 @@ def view(self) -> Table: return ops.SelfReference(self).to_expr() - def difference(self, right: Table) -> Table: + def difference(self, right: Table, distinct: bool = True) -> Table: """Compute the set difference of two table expressions. The input tables must have identical schemas. @@ -301,6 +301,8 @@ def difference(self, right: Table) -> Table: ---------- right Table expression + distinct + Only diff distinct rows not occurring in the calling table Returns ------- @@ -309,7 +311,7 @@ def difference(self, right: Table) -> Table: """ from ibis.expr import operations as ops - return ops.Difference(self, right).to_expr() + return ops.Difference(self, right, distinct=distinct).to_expr() def aggregate( self, @@ -455,8 +457,7 @@ def union( right Table expression distinct - Only union distinct rows not occurring in the calling table (this - can be very expensive, be careful) + Only union distinct rows not occurring in the calling table Returns ------- @@ -467,7 +468,7 @@ def union( return ops.Union(self, right, distinct=distinct).to_expr() - def intersect(self, right: Table) -> Table: + def intersect(self, right: Table, distinct: bool = True) -> Table: """Compute the set intersection of two table expressions. The input tables must have identical schemas. @@ -476,6 +477,8 @@ def intersect(self, right: Table) -> Table: ---------- right Table expression + distinct + Only intersect distinct rows not occurring in the calling table Returns ------- @@ -484,7 +487,7 @@ def intersect(self, right: Table) -> Table: """ from ibis.expr import operations as ops - return ops.Intersection(self, right).to_expr() + return ops.Intersection(self, right, distinct=distinct).to_expr() def to_array(self) -> ir.Column: """View a single column table as an array.