Skip to content

Commit

Permalink
feat: improve error message on unequal schemas during set ops
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed May 6, 2024
1 parent 4e7a00c commit 3de6eae
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 19 deletions.
23 changes: 17 additions & 6 deletions ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,16 @@ def __eq__(self, other):
return dict(self.items()) == dict(other.items())


class ConflictingValuesError(ValueError):
"""A single key has conflicting values in two different mappings."""

def __init__(self, conflicts: set[tuple[K, V, V]]):
self.conflicts = conflicts
msgs = [f" `{key}`: {v1} != {v2}" for key, v1, v2 in conflicts]
msg = "Conflicting values for keys:\n" + "\n".join(msgs)
super().__init__(msg)


@public
class MapSet(Mapping[K, V]):
"""A mapping that also supports set-like operations.
Expand Down Expand Up @@ -202,12 +212,13 @@ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]:
# A key-value pair is conflicting if the key is the same but the value is
# different.
common_keys = self.keys() & other.keys()
for key in common_keys:
left, right = self[key], other[key]
if left != right:
raise ValueError(
f"Conflicting values for key `{key}`: {left} != {right}"
)
conflicts = {
(key, self[key], other[key])
for key in common_keys
if self[key] != other[key]
}
if conflicts:
raise ConflictingValuesError(conflicts)
return common_keys

def __ge__(self, other: collections.abc.Mapping) -> bool:
Expand Down
25 changes: 20 additions & 5 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, FrozenOrderedDict
from ibis.common.collections import (
ConflictingValuesError,
FrozenDict,
FrozenOrderedDict,
)
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
from ibis.common.grounds import Concrete
from ibis.common.patterns import Between, InstanceOf
Expand Down Expand Up @@ -288,10 +292,21 @@ class Set(Relation):
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
if dict(left.schema) != dict(right.schema):
raise RelationError("Table schemas must be equal for set operations")
elif left.schema.names != right.schema.names:
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ConflictingValuesError as e:
raise RelationError(err_msg + "\n" + str(e)) from e
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
cols = {name: Field(right, name) for name in left.schema.names}
Expand Down
15 changes: 8 additions & 7 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def _name_locs(self) -> dict[str, int]:
return {v: i for i, v in enumerate(self.names)}

def equals(self, other: Schema) -> bool:
"""Return whether `other` is equal to `self`.
"""Return whether `other` is equal to `self`. Order matters.
Parameters
----------
Expand All @@ -77,12 +77,13 @@ def equals(self, other: Schema) -> bool:
Examples
--------
>>> import ibis
>>> first = ibis.schema({"a": "int"})
>>> second = ibis.schema({"a": "int"})
>>> assert first.equals(second)
>>> third = ibis.schema({"a": "array<int>"})
>>> assert not first.equals(third)
>>> xy = ibis.schema({"x": int, "y": str})
>>> xy2 = ibis.schema({"x": int, "y": str})
>>> yx = ibis.schema({"y": str, "x": int})
>>> xy_float = ibis.schema({"x": float, "y": str})
>>> assert xy.equals(xy2)
>>> assert not xy.equals(yx)
>>> assert not xy.equals(xy_float)
"""
if not isinstance(other, Schema):
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_set_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class D:

@pytest.mark.parametrize("method", ["union", "intersect", "difference"])
def test_operation_requires_equal_schemas(method):
with pytest.raises(RelationError):
with pytest.raises(RelationError, match="`c`: string != float64"):
getattr(a, method)(d)


Expand Down

0 comments on commit 3de6eae

Please sign in to comment.