From f926995d0d84b81a81ef659a81f3b5e9f7cad321 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Tue, 30 Apr 2024 14:06:53 +0200 Subject: [PATCH] feat(common): introduce `FrozenOrderedDict` (#9081) Continuation of https://github.com/ibis-project/ibis/pull/9068 by adding `FrozenOrderedDict` which calculates its hash from `tuple(self.items()` rather than `frozenset(self.items())` and also checks for item order during equality checks. Closes #9063. --------- Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com> --- ibis/backends/tests/test_client.py | 26 +++++++++++++++++++ ibis/common/collections.py | 23 +++++++++++++++- ibis/common/tests/test_collections.py | 36 ++++++++++++++++++++++++++ ibis/expr/datatypes/core.py | 4 +-- ibis/expr/datatypes/tests/test_core.py | 12 +++++++++ ibis/expr/operations/relations.py | 30 ++++++++++----------- ibis/expr/schema.py | 4 +-- ibis/expr/tests/test_newrels.py | 9 +++++++ ibis/expr/tests/test_schema.py | 12 +++++++++ 9 files changed, 136 insertions(+), 20 deletions(-) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index bfe9448c6a04..d50d487d8df3 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1608,3 +1608,29 @@ def test_json_to_pyarrow(con): if val is not None } assert result == expected + + +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) +@pytest.mark.notyet( + ["risingwave", "exasol"], + raises=com.UnsupportedOperationError, + reason="no temp table support", +) +@pytest.mark.notyet( + ["impala", "trino"], raises=NotImplementedError, reason="no temp table support" +) +@pytest.mark.notyet( + ["druid"], raises=NotImplementedError, reason="doesn't support create_table" +) +@pytest.mark.notyet( + ["flink"], raises=com.IbisError, reason="no persistent temp table support" +) +def test_schema_with_caching(alltypes): + t1 = alltypes.limit(5).select("bigint_col", "string_col") + t2 = alltypes.limit(5).select("string_col", "bigint_col") + + pt1 = t1.cache() + pt2 = t2.cache() + + assert pt1.schema() == t1.schema() + assert pt2.schema() == t2.schema() diff --git a/ibis/common/collections.py b/ibis/common/collections.py index 213977782229..6394df918b8a 100644 --- a/ibis/common/collections.py +++ b/ibis/common/collections.py @@ -288,7 +288,9 @@ def __hash__(self) -> int: return self.__precomputed_hash__ def __setitem__(self, key: K, value: V) -> None: - raise TypeError("'FrozenDict' object does not support item assignment") + raise TypeError( + f"'{self.__class__.__name__}' object does not support item assignment" + ) def __setattr__(self, name: str, _: Any) -> None: raise TypeError(f"Attribute {name!r} cannot be assigned to frozendict") @@ -297,6 +299,25 @@ def __reduce__(self) -> tuple: return (self.__class__, (dict(self),)) +@public +class FrozenOrderedDict(FrozenDict[K, V]): + def __init__(self, *args, **kwargs): + super(FrozenDict, self).__init__(*args, **kwargs) + hashable = tuple(self.items()) + object.__setattr__(self, "__precomputed_hash__", hash(hashable)) + + def __hash__(self) -> int: + return self.__precomputed_hash__ + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, collections.abc.Mapping): + return NotImplemented + return tuple(self.items()) == tuple(other.items()) + + def __ne__(self, other: Any) -> bool: + return not self == other + + class RewindableIterator(Iterator[V]): """Iterator that can be rewound to a checkpoint. diff --git a/ibis/common/tests/test_collections.py b/ibis/common/tests/test_collections.py index e529376e3091..7a26d8e22fab 100644 --- a/ibis/common/tests/test_collections.py +++ b/ibis/common/tests/test_collections.py @@ -8,6 +8,7 @@ Collection, Container, FrozenDict, + FrozenOrderedDict, Iterable, Iterator, Mapping, @@ -429,6 +430,41 @@ def test_frozendict(): assert_pickle_roundtrip(d) +def test_frozenordereddict(): + d = FrozenOrderedDict({"a": 1, "b": 2, "c": 3}) + e = FrozenOrderedDict(a=1, b=2, c=3) + f = FrozenOrderedDict(a=1, b=2, c=3, d=4) + g = FrozenOrderedDict(a=1, c=3, b=2) + h = FrozenDict(a=1, b=2, c=3) + + assert isinstance(d, Mapping) + assert isinstance(d, collections.abc.Mapping) + + assert d == e + assert d != f + assert e == h + assert h == e + assert e != g + assert g != e + assert g != h + assert h != g + + assert d["a"] == 1 + assert d["b"] == 2 + + msg = "'FrozenOrderedDict' object does not support item assignment" + with pytest.raises(TypeError, match=msg): + d["a"] = 2 + with pytest.raises(TypeError, match=msg): + d["d"] = 4 + + assert hash(FrozenOrderedDict(a=1, b=2)) == hash(FrozenOrderedDict(a=1, b=2)) + assert hash(FrozenOrderedDict(a=1, b=2)) != hash(FrozenOrderedDict(b=2, a=1)) + assert hash(FrozenOrderedDict(a=1, b=2)) != hash(d) + + assert_pickle_roundtrip(d) + + def test_rewindable_iterator(): it = RewindableIterator(range(10)) assert next(it) == 0 diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index c640190fa9b6..6beaa7fc2cdb 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -14,7 +14,7 @@ from typing_extensions import Self, get_args, get_origin from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict, MapSet +from ibis.common.collections import FrozenOrderedDict, MapSet from ibis.common.dispatch import lazy_singledispatch from ibis.common.grounds import Concrete, Singleton from ibis.common.patterns import Coercible, CoercionError @@ -823,7 +823,7 @@ def _pretty_piece(self) -> str: class Struct(Parametric, MapSet): """Structured values.""" - fields: FrozenDict[str, DataType] + fields: FrozenOrderedDict[str, DataType] scalar = "StructScalar" column = "StructColumn" diff --git a/ibis/expr/datatypes/tests/test_core.py b/ibis/expr/datatypes/tests/test_core.py index ba6ad18d35c7..cceef7a8f5bc 100644 --- a/ibis/expr/datatypes/tests/test_core.py +++ b/ibis/expr/datatypes/tests/test_core.py @@ -421,6 +421,18 @@ def test_struct_set_operations(): assert d > c +def test_struct_equality(): + st1 = dt.Struct({"a": dt.int64, "b": dt.string}) + st2 = dt.Struct({"a": dt.int64, "b": dt.string}) + st3 = dt.Struct({"b": dt.string, "a": dt.int64}) + st4 = dt.Struct({"a": dt.int64, "b": dt.string, "c": dt.float64}) + + assert st1 == st2 + assert st1 != st3 + assert st1 != st4 + assert st3 != st2 + + def test_singleton_null(): assert dt.null is dt.Null() diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 4618de7a4d52..1a54bdb93e31 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -10,7 +10,7 @@ import ibis.expr.datashape as ds import ibis.expr.datatypes as dt from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict +from ibis.common.collections import FrozenDict, FrozenOrderedDict from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError from ibis.common.grounds import Concrete from ibis.common.patterns import Between, InstanceOf @@ -41,7 +41,7 @@ def __coerce__(cls, value): @property @abstractmethod - def values(self) -> FrozenDict[str, Value]: + def values(self) -> FrozenOrderedDict[str, Value]: """A mapping of column names to expressions which build up the relation. This attribute is heavily used in rewrites as well as during field @@ -59,13 +59,13 @@ def schema(self) -> Schema: ... @property - def fields(self) -> FrozenDict[str, Column]: + def fields(self) -> FrozenOrderedDict[str, Column]: """A mapping of column names to fields of the relation. This calculated property shouldn't be overridden in subclasses since it is mostly used for convenience. """ - return FrozenDict({k: Field(self, k) for k in self.schema}) + return FrozenOrderedDict({k: Field(self, k) for k in self.schema}) def to_expr(self): from ibis.expr.types import Table @@ -110,7 +110,7 @@ def _check_integrity(values, allowed_parents): @public class Project(Relation): parent: Relation - values: FrozenDict[str, NonSortKey[Unaliased[Value]]] + values: FrozenOrderedDict[str, NonSortKey[Unaliased[Value]]] def __init__(self, parent, values): _check_integrity(values.values(), {parent}) @@ -152,7 +152,7 @@ def schema(self): # TODO(kszucs): remove in favor of View @public class SelfReference(Reference): - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -187,7 +187,7 @@ class JoinLink(Node): class JoinChain(Relation): first: Reference rest: VarTuple[JoinLink] - values: FrozenDict[str, Unaliased[Value]] + values: FrozenOrderedDict[str, Unaliased[Value]] def __init__(self, first, rest, values): allowed_parents = {first} @@ -259,8 +259,8 @@ class Limit(Simple): @public class Aggregate(Relation): parent: Relation - groups: FrozenDict[str, Unaliased[Column]] - metrics: FrozenDict[str, Unaliased[Scalar]] + groups: FrozenOrderedDict[str, Unaliased[Column]] + metrics: FrozenOrderedDict[str, Unaliased[Scalar]] def __init__(self, parent, groups, metrics): _check_integrity(groups.values(), {parent}) @@ -273,7 +273,7 @@ def __init__(self, parent, groups, metrics): @attribute def values(self): - return FrozenDict({**self.groups, **self.metrics}) + return FrozenOrderedDict({**self.groups, **self.metrics}) @attribute def schema(self): @@ -285,7 +285,7 @@ class Set(Relation): left: Relation right: Relation distinct: bool = False - values = FrozenDict() + values = FrozenOrderedDict() def __init__(self, left, right, **kwargs): # convert to dictionary first, to get key-unordered comparison semantics @@ -321,7 +321,7 @@ class Difference(Set): @public class PhysicalTable(Relation): name: str - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -356,7 +356,7 @@ class SQLQueryResult(Relation): query: str schema: Schema source: Any - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -378,12 +378,12 @@ class SQLStringView(Relation): child: Relation query: str schema: Schema - values = FrozenDict() + values = FrozenOrderedDict() @public class DummyTable(Relation): - values: FrozenDict[str, Value] + values: FrozenOrderedDict[str, Value] @attribute def schema(self): diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index a5802072bf9e..65d4a88df52c 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -5,7 +5,7 @@ import ibis.expr.datatypes as dt from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict, MapSet +from ibis.common.collections import FrozenOrderedDict, MapSet from ibis.common.dispatch import lazy_singledispatch from ibis.common.exceptions import InputTypeError, IntegrityError from ibis.common.grounds import Concrete @@ -19,7 +19,7 @@ class Schema(Concrete, Coercible, MapSet): """An ordered mapping of str -> [datatype](./datatypes.qmd), used to hold a [Table](./expression-tables.qmd#ibis.expr.tables.Table)'s schema.""" - fields: FrozenDict[str, dt.DataType] + fields: FrozenOrderedDict[str, dt.DataType] """A mapping of [](`str`) to [`DataType`](./datatypes.qmd#ibis.expr.datatypes.DataType) objects representing the type of each column.""" diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 22588deb65d8..e6454e5b7fcf 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1711,3 +1711,12 @@ def test_mutate_ambiguty_check_not_too_strict(): values={"id": first.id, "v": first.v, "v2": first.id}, ) assert second.op() == expected + + +def test_projections_with_different_field_order_are_unequal(): + t = ibis.table({"a": "int64", "b": "string"}, name="t") + + t1 = t.select(a=1, b=2) + t2 = t.select(b=2, a=1) + + assert not t1.equals(t2) diff --git a/ibis/expr/tests/test_schema.py b/ibis/expr/tests/test_schema.py index 05467619de17..e9a7766b33e2 100644 --- a/ibis/expr/tests/test_schema.py +++ b/ibis/expr/tests/test_schema.py @@ -203,6 +203,18 @@ def test_schema_mapping_api(): assert tuple(s.items()) == tuple(zip(s.names, s.types)) +def test_schema_equality(): + s1 = sch.schema({"a": "int64", "b": "string"}) + s2 = sch.schema({"a": "int64", "b": "string"}) + s3 = sch.schema({"b": "string", "a": "int64"}) + s4 = sch.schema({"a": "int64", "b": "int64", "c": "string"}) + + assert s1 == s2 + assert s1 != s3 + assert s1 != s4 + assert s3 != s2 + + class BarSchema: a: int b: str