Skip to content

Commit

Permalink
feat: improve error message on unequal schemas during set ops
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed May 3, 2024
1 parent 4e7a00c commit 50f3693
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 17 deletions.
13 changes: 7 additions & 6 deletions ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,13 @@ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]:
# A key-value pair is conflicting if the key is the same but the value is
# different.
common_keys = self.keys() & other.keys()
for key in common_keys:
left, right = self[key], other[key]
if left != right:
raise ValueError(
f"Conflicting values for key `{key}`: {left} != {right}"
)
conflicts = {
f" `{key}`: {self[key]} != {other[key]}"
for key in common_keys
if self[key] != other[key]
}
if conflicts:
raise ValueError("Conflicting values for keys:\n" + "\n".join(conflicts))
return common_keys

def __ge__(self, other: collections.abc.Mapping) -> bool:
Expand Down
22 changes: 18 additions & 4 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,10 +288,24 @@ class Set(Relation):
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
if dict(left.schema) != dict(right.schema):
raise RelationError("Table schemas must be equal for set operations")
elif left.schema.names != right.schema.names:
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ValueError as e:
if "Conflicting values" in str(e):
raise RelationError(err_msg) from e
else:
raise
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")

Check warning on line 305 in ibis/expr/operations/relations.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/operations/relations.py#L305

Added line #L305 was not covered by tests
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
cols = {name: Field(right, name) for name in left.schema.names}
Expand Down
15 changes: 8 additions & 7 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _name_locs(self) -> dict[str, int]:
return {v: i for i, v in enumerate(self.names)}

def equals(self, other: Schema) -> bool:
"""Return whether `other` is equal to `self`.
"""Return whether `other` is equal to `self`. Order matters.
Parameters
----------
Expand All @@ -77,12 +77,13 @@ def equals(self, other: Schema) -> bool:
Examples
--------
>>> import ibis
>>> first = ibis.schema({"a": "int"})
>>> second = ibis.schema({"a": "int"})
>>> assert first.equals(second)
>>> third = ibis.schema({"a": "array<int>"})
>>> assert not first.equals(third)
>>> xy = ibis.schema({"x": int, "y": str})
>>> xy2 = ibis.schema({"x": int, "y": str})
>>> yx = ibis.schema({"y": str, "x": int})
>>> xy_float = ibis.schema({"x": float, "y": str})
>>> assert xy.equals(xy2)
>>> assert not xy.equals(yx)
>>> assert not xy.equals(xy_float)
"""
if not isinstance(other, Schema):
raise TypeError(
Expand Down

0 comments on commit 50f3693

Please sign in to comment.