Skip to content

Commit

Permalink
feat: improve error message on unequal schemas during set ops
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews committed May 11, 2024
1 parent 8e5cc3b commit fb0926c
Show file tree
Hide file tree
Showing 5 changed files with 49 additions and 19 deletions.
14 changes: 8 additions & 6 deletions ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from public import public

from ibis.common.bases import Abstract, Hashable
from ibis.common.exceptions import ConflictingValuesError

if TYPE_CHECKING:
from typing_extensions import Self
Expand Down Expand Up @@ -202,12 +203,13 @@ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]:
# A key-value pair is conflicting if the key is the same but the value is
# different.
common_keys = self.keys() & other.keys()
for key in common_keys:
left, right = self[key], other[key]
if left != right:
raise ValueError(
f"Conflicting values for key `{key}`: {left} != {right}"
)
conflicts = {
(key, self[key], other[key])
for key in common_keys
if self[key] != other[key]
}
if conflicts:
raise ConflictingValuesError(conflicts)
return common_keys

def __ge__(self, other: collections.abc.Mapping) -> bool:
Expand Down
12 changes: 11 additions & 1 deletion ibis/common/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from __future__ import annotations

from typing import Callable
from typing import Any, Callable


class IbisError(Exception):
Expand Down Expand Up @@ -143,6 +143,16 @@ def __str__(self) -> str:
return f"Only the `@udf` decorator is allowed in user-defined function: `{name}`; found lines {lines}"


class ConflictingValuesError(ValueError):
"""A single key has conflicting values in two different mappings."""

def __init__(self, conflicts: set[tuple[Any, Any, Any]]):
self.conflicts = conflicts
msgs = [f" `{key}`: {v1} != {v2}" for key, v1, v2 in conflicts]
msg = "Conflicting values for keys:\n" + "\n".join(msgs)
super().__init__(msg)


def mark_as_unsupported(f: Callable) -> Callable:
"""Decorate an unsupported method."""

Expand Down
25 changes: 20 additions & 5 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, FrozenOrderedDict
from ibis.common.collections import (
ConflictingValuesError,
FrozenDict,
FrozenOrderedDict,
)
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
from ibis.common.grounds import Concrete
from ibis.common.patterns import Between, InstanceOf
Expand Down Expand Up @@ -288,10 +292,21 @@ class Set(Relation):
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
if dict(left.schema) != dict(right.schema):
raise RelationError("Table schemas must be equal for set operations")
elif left.schema.names != right.schema.names:
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ConflictingValuesError as e:
raise RelationError(err_msg + "\n" + str(e)) from e
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")

Check warning on line 306 in ibis/expr/operations/relations.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/operations/relations.py#L306

Added line #L306 was not covered by tests
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
cols = {name: Field(right, name) for name in left.schema.names}
Expand Down
15 changes: 9 additions & 6 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def _name_locs(self) -> dict[str, int]:
def equals(self, other: Schema) -> bool:
"""Return whether `other` is equal to `self`.
The order of fields in the schema is taken into account when computing equality.
Parameters
----------
other
Expand All @@ -77,12 +79,13 @@ def equals(self, other: Schema) -> bool:
Examples
--------
>>> import ibis
>>> first = ibis.schema({"a": "int"})
>>> second = ibis.schema({"a": "int"})
>>> assert first.equals(second)
>>> third = ibis.schema({"a": "array<int>"})
>>> assert not first.equals(third)
>>> xy = ibis.schema({"x": int, "y": str})
>>> xy2 = ibis.schema({"x": int, "y": str})
>>> yx = ibis.schema({"y": str, "x": int})
>>> xy_float = ibis.schema({"x": float, "y": str})
>>> assert xy.equals(xy2)
>>> assert not xy.equals(yx)
>>> assert not xy.equals(xy_float)
"""
if not isinstance(other, Schema):
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_set_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class D:

@pytest.mark.parametrize("method", ["union", "intersect", "difference"])
def test_operation_requires_equal_schemas(method):
with pytest.raises(RelationError):
with pytest.raises(RelationError, match="`c`: string != float64"):
getattr(a, method)(d)


Expand Down

0 comments on commit fb0926c

Please sign in to comment.