Skip to content

Commit

Permalink
refactor(ir): loosen the join integrity checks (#8817)
Browse files Browse the repository at this point in the history
  • Loading branch information
kszucs authored Apr 3, 2024
1 parent cd9219b commit 2bc903d
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
6 changes: 4 additions & 2 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,11 @@ class JoinChain(Relation):

def __init__(self, first, rest, values):
allowed_parents = {first}
assert first.index == 0
for join in rest:
assert join.table.index == len(allowed_parents)
if join.table in allowed_parents:
raise IntegrityError(
f"Cannot add {join.table!r} to the join chain, it is already in the chain"
)
allowed_parents.add(join.table)
_check_integrity(join.predicates, allowed_parents)
_check_integrity(values.values(), allowed_parents)
Expand Down
21 changes: 20 additions & 1 deletion ibis/expr/tests/test_newrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,6 @@ def test_project_before_and_after_filter():
)


# TODO(kszucs): add test for failing integrity checks
def test_join():
t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"})
t2 = ibis.table(name="t2", schema={"c": "int64", "d": "string"})
Expand Down Expand Up @@ -581,6 +580,26 @@ def test_join():
)


def test_join_integrity_checks():
t1 = ibis.table(name="t1", schema={"a": "int64", "b": "string"})

# correct example
r1 = ops.JoinTable(t1, 10)
r2 = ops.JoinTable(t1, 20)
assert r1 != r2
assert hash(r1) != hash(r2)
chain = ops.JoinChain(r1, [ops.JoinLink("inner", r2, [True])], values={})
assert isinstance(chain, JoinChain)

# not unique tables
r1 = ops.JoinTable(t1, 10)
r2 = ops.JoinTable(t1, 10)
assert r1 == r2
assert hash(r1) == hash(r2)
with pytest.raises(IntegrityError):
ops.JoinChain(r1, [ops.JoinLink("inner", r2, [True])], values={})


def test_join_unambiguous_select():
a = ibis.table(name="a", schema={"a_int": "int64", "a_str": "string"})
b = ibis.table(name="b", schema={"b_int": "int64", "b_str": "string"})
Expand Down

0 comments on commit 2bc903d

Please sign in to comment.