Skip to content

Commit

Permalink
feat: improve error message on unequal schemas during set ops
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed May 3, 2024
1 parent 4e7a00c commit 82d43e8
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
22 changes: 18 additions & 4 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,24 @@ class Set(Relation):
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
if dict(left.schema) != dict(right.schema):
raise RelationError("Table schemas must be equal for set operations")
elif left.schema.names != right.schema.names:
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ValueError as e:
if "Conflicting values for key" in str(e):
raise RelationError(err_msg) from e
else:
raise
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")

Check warning on line 305 in ibis/expr/operations/relations.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/operations/relations.py#L305

Added line #L305 was not covered by tests
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
cols = {name: Field(right, name) for name in left.schema.names}
Expand Down
15 changes: 8 additions & 7 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _name_locs(self) -> dict[str, int]:
return {v: i for i, v in enumerate(self.names)}

def equals(self, other: Schema) -> bool:
"""Return whether `other` is equal to `self`.
"""Return whether `other` is equal to `self`. Order matters.
Parameters
----------
Expand All @@ -77,12 +77,13 @@ def equals(self, other: Schema) -> bool:
Examples
--------
>>> import ibis
>>> first = ibis.schema({"a": "int"})
>>> second = ibis.schema({"a": "int"})
>>> assert first.equals(second)
>>> third = ibis.schema({"a": "array<int>"})
>>> assert not first.equals(third)
>>> xy = ibis.schema({"x": int, "y": str})
>>> xy2 = ibis.schema({"x": int, "y": str})
>>> yx = ibis.schema({"y": str, "x": int})
>>> xy_float = ibis.schema({"x": float, "y": str})
>>> assert xy.equals(xy2)
>>> assert not xy.equals(yx)
>>> assert not xy.equals(xy_float)
"""
if not isinstance(other, Schema):
raise TypeError(
Expand Down

0 comments on commit 82d43e8

Please sign in to comment.