Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ux): improve error message on unequal schemas during set ops #9115

Merged
merged 1 commit into from
May 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from public import public

from ibis.common.bases import Abstract, Hashable
from ibis.common.exceptions import ConflictingValuesError

if TYPE_CHECKING:
from typing_extensions import Self
Expand Down Expand Up @@ -202,12 +203,13 @@ def _check_conflict(self, other: collections.abc.Mapping) -> set[K]:
# A key-value pair is conflicting if the key is the same but the value is
# different.
common_keys = self.keys() & other.keys()
for key in common_keys:
left, right = self[key], other[key]
if left != right:
raise ValueError(
f"Conflicting values for key `{key}`: {left} != {right}"
)
conflicts = {
(key, self[key], other[key])
for key in common_keys
if self[key] != other[key]
}
if conflicts:
raise ConflictingValuesError(conflicts)
return common_keys

def __ge__(self, other: collections.abc.Mapping) -> bool:
Expand Down
12 changes: 11 additions & 1 deletion ibis/common/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from __future__ import annotations

from typing import Callable
from typing import Any, Callable


class IbisError(Exception):
Expand Down Expand Up @@ -143,6 +143,16 @@ def __str__(self) -> str:
return f"Only the `@udf` decorator is allowed in user-defined function: `{name}`; found lines {lines}"


class ConflictingValuesError(ValueError):
"""A single key has conflicting values in two different mappings."""

def __init__(self, conflicts: set[tuple[Any, Any, Any]]):
self.conflicts = conflicts
msgs = [f" `{key}`: {v1} != {v2}" for key, v1, v2 in conflicts]
msg = "Conflicting values for keys:\n" + "\n".join(msgs)
super().__init__(msg)


def mark_as_unsupported(f: Callable) -> Callable:
"""Decorate an unsupported method."""

Expand Down
25 changes: 20 additions & 5 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, FrozenOrderedDict
from ibis.common.collections import (
ConflictingValuesError,
FrozenDict,
FrozenOrderedDict,
)
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
from ibis.common.grounds import Concrete
from ibis.common.patterns import Between, InstanceOf
Expand Down Expand Up @@ -288,10 +292,21 @@
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
if dict(left.schema) != dict(right.schema):
raise RelationError("Table schemas must be equal for set operations")
elif left.schema.names != right.schema.names:
err_msg = "Table schemas must be equal for set operations."
try:
missing_from_left = right.schema - left.schema
missing_from_right = left.schema - right.schema
except ConflictingValuesError as e:
raise RelationError(err_msg + "\n" + str(e)) from e
if missing_from_left or missing_from_right:
msgs = [err_msg]
if missing_from_left:
msgs.append(f"Columns missing from the left:\n{missing_from_left}.")
if missing_from_right:
msgs.append(f"Columns missing from the right:\n{missing_from_right}.")

Check warning on line 306 in ibis/expr/operations/relations.py

View check run for this annotation

Codecov / codecov/patch

ibis/expr/operations/relations.py#L306

Added line #L306 was not covered by tests
raise RelationError("\n".join(msgs))

if left.schema.names != right.schema.names:
# rewrite so that both sides have the columns in the same order making it
# easier for the backends to implement set operations
cols = {name: Field(right, name) for name in left.schema.names}
Expand Down
15 changes: 9 additions & 6 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def _name_locs(self) -> dict[str, int]:
def equals(self, other: Schema) -> bool:
"""Return whether `other` is equal to `self`.

The order of fields in the schema is taken into account when computing equality.

Parameters
----------
other
Expand All @@ -77,12 +79,13 @@ def equals(self, other: Schema) -> bool:
Examples
--------
>>> import ibis
>>> first = ibis.schema({"a": "int"})
>>> second = ibis.schema({"a": "int"})
>>> assert first.equals(second)
>>> third = ibis.schema({"a": "array<int>"})
>>> assert not first.equals(third)

>>> xy = ibis.schema({"x": int, "y": str})
>>> xy2 = ibis.schema({"x": int, "y": str})
>>> yx = ibis.schema({"y": str, "x": int})
>>> xy_float = ibis.schema({"x": float, "y": str})
>>> assert xy.equals(xy2)
>>> assert not xy.equals(yx)
>>> assert not xy.equals(xy_float)
"""
if not isinstance(other, Schema):
raise TypeError(
Expand Down
2 changes: 1 addition & 1 deletion ibis/tests/expr/test_set_operations.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class D:

@pytest.mark.parametrize("method", ["union", "intersect", "difference"])
def test_operation_requires_equal_schemas(method):
with pytest.raises(RelationError):
with pytest.raises(RelationError, match="`c`: string != float64"):
getattr(a, method)(d)


Expand Down
Loading