From fb0926c97f7858b60aab24777b07641b914a69d8 Mon Sep 17 00:00:00 2001 From: Nick Crews Date: Tue, 7 May 2024 10:05:23 -0800 Subject: [PATCH] feat: improve error message on unequal schemas during set ops --- ibis/common/collections.py | 14 ++++++++------ ibis/common/exceptions.py | 12 +++++++++++- ibis/expr/operations/relations.py | 25 ++++++++++++++++++++----- ibis/expr/schema.py | 15 +++++++++------ ibis/tests/expr/test_set_operations.py | 2 +- 5 files changed, 49 insertions(+), 19 deletions(-) diff --git a/ibis/common/collections.py b/ibis/common/collections.py index 6394df918b8a..31de94dac372 100644 --- a/ibis/common/collections.py +++ b/ibis/common/collections.py @@ -8,6 +8,7 @@ from public import public from ibis.common.bases import Abstract, Hashable +from ibis.common.exceptions import ConflictingValuesError if TYPE_CHECKING: from typing_extensions import Self @@ -202,12 +203,13 @@ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]: # A key-value pair is conflicting if the key is the same but the value is # different. common_keys = self.keys() & other.keys() - for key in common_keys: - left, right = self[key], other[key] - if left != right: - raise ValueError( - f"Conflicting values for key `{key}`: {left} != {right}" - ) + conflicts = { + (key, self[key], other[key]) + for key in common_keys + if self[key] != other[key] + } + if conflicts: + raise ConflictingValuesError(conflicts) return common_keys def __ge__(self, other: collections.abc.Mapping) -> bool: diff --git a/ibis/common/exceptions.py b/ibis/common/exceptions.py index feb89b76cd34..a88bdd27a5b1 100644 --- a/ibis/common/exceptions.py +++ b/ibis/common/exceptions.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import Callable +from typing import Any, Callable class IbisError(Exception): @@ -143,6 +143,16 @@ def __str__(self) -> str: return f"Only the `@udf` decorator is allowed in user-defined function: `{name}`; found lines {lines}" +class ConflictingValuesError(ValueError): + """A single key has conflicting values in two different mappings.""" + + def __init__(self, conflicts: set[tuple[Any, Any, Any]]): + self.conflicts = conflicts + msgs = [f" `{key}`: {v1} != {v2}" for key, v1, v2 in conflicts] + msg = "Conflicting values for keys:\n" + "\n".join(msgs) + super().__init__(msg) + + def mark_as_unsupported(f: Callable) -> Callable: """Decorate an unsupported method.""" diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 1a54bdb93e31..b44882d55106 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -10,7 +10,11 @@ import ibis.expr.datashape as ds import ibis.expr.datatypes as dt from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict, FrozenOrderedDict +from ibis.common.collections import ( + ConflictingValuesError, + FrozenDict, + FrozenOrderedDict, +) from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError from ibis.common.grounds import Concrete from ibis.common.patterns import Between, InstanceOf @@ -288,10 +292,21 @@ class Set(Relation): values = FrozenOrderedDict() def __init__(self, left, right, **kwargs): - # convert to dictionary first, to get key-unordered comparison semantics - if dict(left.schema) != dict(right.schema): - raise RelationError("Table schemas must be equal for set operations") - elif left.schema.names != right.schema.names: + err_msg = "Table schemas must be equal for set operations." + try: + missing_from_left = right.schema - left.schema + missing_from_right = left.schema - right.schema + except ConflictingValuesError as e: + raise RelationError(err_msg + "\n" + str(e)) from e + if missing_from_left or missing_from_right: + msgs = [err_msg] + if missing_from_left: + msgs.append(f"Columns missing from the left:\n{missing_from_left}.") + if missing_from_right: + msgs.append(f"Columns missing from the right:\n{missing_from_right}.") + raise RelationError("\n".join(msgs)) + + if left.schema.names != right.schema.names: # rewrite so that both sides have the columns in the same order making it # easier for the backends to implement set operations cols = {name: Field(right, name) for name in left.schema.names} diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index 65d4a88df52c..786b49736ee1 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -69,6 +69,8 @@ def _name_locs(self) -> dict[str, int]: def equals(self, other: Schema) -> bool: """Return whether `other` is equal to `self`. + The order of fields in the schema is taken into account when computing equality. + Parameters ---------- other @@ -77,12 +79,13 @@ def equals(self, other: Schema) -> bool: Examples -------- >>> import ibis - >>> first = ibis.schema({"a": "int"}) - >>> second = ibis.schema({"a": "int"}) - >>> assert first.equals(second) - >>> third = ibis.schema({"a": "array"}) - >>> assert not first.equals(third) - + >>> xy = ibis.schema({"x": int, "y": str}) + >>> xy2 = ibis.schema({"x": int, "y": str}) + >>> yx = ibis.schema({"y": str, "x": int}) + >>> xy_float = ibis.schema({"x": float, "y": str}) + >>> assert xy.equals(xy2) + >>> assert not xy.equals(yx) + >>> assert not xy.equals(xy_float) """ if not isinstance(other, Schema): raise TypeError( diff --git a/ibis/tests/expr/test_set_operations.py b/ibis/tests/expr/test_set_operations.py index b872520ea403..3012c7a0ac10 100644 --- a/ibis/tests/expr/test_set_operations.py +++ b/ibis/tests/expr/test_set_operations.py @@ -40,7 +40,7 @@ class D: @pytest.mark.parametrize("method", ["union", "intersect", "difference"]) def test_operation_requires_equal_schemas(method): - with pytest.raises(RelationError): + with pytest.raises(RelationError, match="`c`: string != float64"): getattr(a, method)(d)