Skip to content

Commit

Permalink
feat(api): add distinct to Intersection and Difference operations
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jul 29, 2022
1 parent 772f56e commit cd9a34c
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
7 changes: 4 additions & 3 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,13 +254,14 @@ def __init__(self, left, right, by, predicates, **kwargs):
class SetOp(TableNode, sch.HasSchema):
left = rlz.table
right = rlz.table
distinct = rlz.optional(rlz.instance_of(bool), default=False)

def __init__(self, left, right, **kwargs):
def __init__(self, left, right, distinct: bool, **kwargs):
if not left.schema().equals(right.schema()):
raise com.RelationError(
'Table schemas must be equal for set operations'
)
super().__init__(left=left, right=right, **kwargs)
super().__init__(left=left, right=right, distinct=distinct, **kwargs)

@property
def schema(self):
Expand All @@ -272,7 +273,7 @@ def blocks(self):

@public
class Union(SetOp):
distinct = rlz.optional(rlz.instance_of(bool), default=False)
pass


@public
Expand Down
15 changes: 9 additions & 6 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ def view(self) -> Table:

return ops.SelfReference(self).to_expr()

def difference(self, right: Table) -> Table:
def difference(self, right: Table, distinct: bool = True) -> Table:
"""Compute the set difference of two table expressions.
The input tables must have identical schemas.
Expand All @@ -301,6 +301,8 @@ def difference(self, right: Table) -> Table:
----------
right
Table expression
distinct
Only diff distinct rows not occurring in the calling table
Returns
-------
Expand All @@ -309,7 +311,7 @@ def difference(self, right: Table) -> Table:
"""
from ibis.expr import operations as ops

return ops.Difference(self, right).to_expr()
return ops.Difference(self, right, distinct=distinct).to_expr()

def aggregate(
self,
Expand Down Expand Up @@ -455,8 +457,7 @@ def union(
right
Table expression
distinct
Only union distinct rows not occurring in the calling table (this
can be very expensive, be careful)
Only union distinct rows not occurring in the calling table
Returns
-------
Expand All @@ -467,7 +468,7 @@ def union(

return ops.Union(self, right, distinct=distinct).to_expr()

def intersect(self, right: Table) -> Table:
def intersect(self, right: Table, distinct: bool = True) -> Table:
"""Compute the set intersection of two table expressions.
The input tables must have identical schemas.
Expand All @@ -476,6 +477,8 @@ def intersect(self, right: Table) -> Table:
----------
right
Table expression
distinct
Only intersect distinct rows not occurring in the calling table
Returns
-------
Expand All @@ -484,7 +487,7 @@ def intersect(self, right: Table) -> Table:
"""
from ibis.expr import operations as ops

return ops.Intersection(self, right).to_expr()
return ops.Intersection(self, right, distinct=distinct).to_expr()

def to_array(self) -> ir.Column:
"""View a single column table as an array.
Expand Down

0 comments on commit cd9a34c

Please sign in to comment.