From 0941568574d1c22812a859dfda9f101bed2a1dfc Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sat, 27 Apr 2024 07:25:50 -0700 Subject: [PATCH 1/3] fix(ir): ensure that schema column order matters --- ibis/backends/tests/test_client.py | 26 ++++++++++++++++++++++++++ ibis/common/collections.py | 5 +++++ ibis/expr/schema.py | 9 +++++++++ ibis/expr/tests/test_dereference.py | 2 +- 4 files changed, 41 insertions(+), 1 deletion(-) diff --git a/ibis/backends/tests/test_client.py b/ibis/backends/tests/test_client.py index bfe9448c6a04..d50d487d8df3 100644 --- a/ibis/backends/tests/test_client.py +++ b/ibis/backends/tests/test_client.py @@ -1608,3 +1608,29 @@ def test_json_to_pyarrow(con): if val is not None } assert result == expected + + +@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError) +@pytest.mark.notyet( + ["risingwave", "exasol"], + raises=com.UnsupportedOperationError, + reason="no temp table support", +) +@pytest.mark.notyet( + ["impala", "trino"], raises=NotImplementedError, reason="no temp table support" +) +@pytest.mark.notyet( + ["druid"], raises=NotImplementedError, reason="doesn't support create_table" +) +@pytest.mark.notyet( + ["flink"], raises=com.IbisError, reason="no persistent temp table support" +) +def test_schema_with_caching(alltypes): + t1 = alltypes.limit(5).select("bigint_col", "string_col") + t2 = alltypes.limit(5).select("string_col", "bigint_col") + + pt1 = t1.cache() + pt2 = t2.cache() + + assert pt1.schema() == t1.schema() + assert pt2.schema() == t2.schema() diff --git a/ibis/common/collections.py b/ibis/common/collections.py index 213977782229..6fcbcc63e9c6 100644 --- a/ibis/common/collections.py +++ b/ibis/common/collections.py @@ -287,6 +287,11 @@ def __init__(self, *args, **kwargs): def __hash__(self) -> int: return self.__precomputed_hash__ + def __eq__(self, other: Any) -> bool: + if not isinstance(other, collections.abc.Mapping): + return NotImplemented + return collections.OrderedDict(self) == collections.OrderedDict(other) + def __setitem__(self, key: K, value: V) -> None: raise TypeError("'FrozenDict' object does not support item assignment") diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index a5802072bf9e..e50b22068f81 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -1,5 +1,6 @@ from __future__ import annotations +import collections from collections.abc import Iterable, Iterator, Mapping from typing import TYPE_CHECKING, Any, Union @@ -90,6 +91,14 @@ def equals(self, other: Schema) -> bool: ) return self == other + def __hash__(self) -> int: + return super().__hash__() + + def __eq__(self, other): + if not isinstance(other, Mapping): + return NotImplemented + return collections.OrderedDict(self) == collections.OrderedDict(other) + @classmethod def from_tuples( cls, diff --git a/ibis/expr/tests/test_dereference.py b/ibis/expr/tests/test_dereference.py index 4396c0ac5fb0..e5b42c617251 100644 --- a/ibis/expr/tests/test_dereference.py +++ b/ibis/expr/tests/test_dereference.py @@ -24,8 +24,8 @@ def test_dereference_project(): expected = dereference_expect( { p.int_col: p.int_col, - p.double_col: p.double_col, t.int_col: p.int_col, + p.double_col: p.double_col, t.double_col: p.double_col, } ) From a1c726a4b141578a72f04369ce74f28fc63dc723 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 29 Apr 2024 16:07:49 -0400 Subject: [PATCH 2/3] chore: bump ibis-testing-data --- nix/overlay.nix | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nix/overlay.nix b/nix/overlay.nix index de617bebb1fe..191b50b21459 100644 --- a/nix/overlay.nix +++ b/nix/overlay.nix @@ -21,8 +21,8 @@ in name = "ibis-testing-data"; owner = "ibis-project"; repo = "testing-data"; - rev = "2c6a4bb5d5d525058d8d5b2312a9fee5dafc5476"; - sha256 = "sha256-Lq503bqh9ESZJSk6yVq/uZwkAubzmSmoTBZSsqMm0DY="; + rev = "1922bd4617546b877e66e78bb2b87abeb510cf8e"; + sha256 = "sha256-l5d7r/6Voy6N2pXq3IivLX3N0tNfKKwsbZXRexzc8Z8="; }; ibis39 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python39; }; From 63e05635213a0af888c8439d0773bd5fa1d4b5c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= <szucs.krisztian@gmail.com> Date: Mon, 29 Apr 2024 23:28:25 +0200 Subject: [PATCH 3/3] feat(common): introduce `FrozenOrderedDict` --- ibis/common/collections.py | 28 +++++++++++++++----- ibis/common/tests/test_collections.py | 36 ++++++++++++++++++++++++++ ibis/expr/datatypes/core.py | 4 +-- ibis/expr/datatypes/tests/test_core.py | 12 +++++++++ ibis/expr/operations/relations.py | 30 ++++++++++----------- ibis/expr/schema.py | 13 ++-------- ibis/expr/tests/test_dereference.py | 2 +- ibis/expr/tests/test_newrels.py | 9 +++++++ ibis/expr/tests/test_schema.py | 12 +++++++++ 9 files changed, 111 insertions(+), 35 deletions(-) diff --git a/ibis/common/collections.py b/ibis/common/collections.py index 6fcbcc63e9c6..6394df918b8a 100644 --- a/ibis/common/collections.py +++ b/ibis/common/collections.py @@ -287,13 +287,10 @@ def __init__(self, *args, **kwargs): def __hash__(self) -> int: return self.__precomputed_hash__ - def __eq__(self, other: Any) -> bool: - if not isinstance(other, collections.abc.Mapping): - return NotImplemented - return collections.OrderedDict(self) == collections.OrderedDict(other) - def __setitem__(self, key: K, value: V) -> None: - raise TypeError("'FrozenDict' object does not support item assignment") + raise TypeError( + f"'{self.__class__.__name__}' object does not support item assignment" + ) def __setattr__(self, name: str, _: Any) -> None: raise TypeError(f"Attribute {name!r} cannot be assigned to frozendict") @@ -302,6 +299,25 @@ def __reduce__(self) -> tuple: return (self.__class__, (dict(self),)) +@public +class FrozenOrderedDict(FrozenDict[K, V]): + def __init__(self, *args, **kwargs): + super(FrozenDict, self).__init__(*args, **kwargs) + hashable = tuple(self.items()) + object.__setattr__(self, "__precomputed_hash__", hash(hashable)) + + def __hash__(self) -> int: + return self.__precomputed_hash__ + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, collections.abc.Mapping): + return NotImplemented + return tuple(self.items()) == tuple(other.items()) + + def __ne__(self, other: Any) -> bool: + return not self == other + + class RewindableIterator(Iterator[V]): """Iterator that can be rewound to a checkpoint. diff --git a/ibis/common/tests/test_collections.py b/ibis/common/tests/test_collections.py index e529376e3091..7a26d8e22fab 100644 --- a/ibis/common/tests/test_collections.py +++ b/ibis/common/tests/test_collections.py @@ -8,6 +8,7 @@ Collection, Container, FrozenDict, + FrozenOrderedDict, Iterable, Iterator, Mapping, @@ -429,6 +430,41 @@ def test_frozendict(): assert_pickle_roundtrip(d) +def test_frozenordereddict(): + d = FrozenOrderedDict({"a": 1, "b": 2, "c": 3}) + e = FrozenOrderedDict(a=1, b=2, c=3) + f = FrozenOrderedDict(a=1, b=2, c=3, d=4) + g = FrozenOrderedDict(a=1, c=3, b=2) + h = FrozenDict(a=1, b=2, c=3) + + assert isinstance(d, Mapping) + assert isinstance(d, collections.abc.Mapping) + + assert d == e + assert d != f + assert e == h + assert h == e + assert e != g + assert g != e + assert g != h + assert h != g + + assert d["a"] == 1 + assert d["b"] == 2 + + msg = "'FrozenOrderedDict' object does not support item assignment" + with pytest.raises(TypeError, match=msg): + d["a"] = 2 + with pytest.raises(TypeError, match=msg): + d["d"] = 4 + + assert hash(FrozenOrderedDict(a=1, b=2)) == hash(FrozenOrderedDict(a=1, b=2)) + assert hash(FrozenOrderedDict(a=1, b=2)) != hash(FrozenOrderedDict(b=2, a=1)) + assert hash(FrozenOrderedDict(a=1, b=2)) != hash(d) + + assert_pickle_roundtrip(d) + + def test_rewindable_iterator(): it = RewindableIterator(range(10)) assert next(it) == 0 diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index c640190fa9b6..6beaa7fc2cdb 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -14,7 +14,7 @@ from typing_extensions import Self, get_args, get_origin from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict, MapSet +from ibis.common.collections import FrozenOrderedDict, MapSet from ibis.common.dispatch import lazy_singledispatch from ibis.common.grounds import Concrete, Singleton from ibis.common.patterns import Coercible, CoercionError @@ -823,7 +823,7 @@ def _pretty_piece(self) -> str: class Struct(Parametric, MapSet): """Structured values.""" - fields: FrozenDict[str, DataType] + fields: FrozenOrderedDict[str, DataType] scalar = "StructScalar" column = "StructColumn" diff --git a/ibis/expr/datatypes/tests/test_core.py b/ibis/expr/datatypes/tests/test_core.py index ba6ad18d35c7..cceef7a8f5bc 100644 --- a/ibis/expr/datatypes/tests/test_core.py +++ b/ibis/expr/datatypes/tests/test_core.py @@ -421,6 +421,18 @@ def test_struct_set_operations(): assert d > c +def test_struct_equality(): + st1 = dt.Struct({"a": dt.int64, "b": dt.string}) + st2 = dt.Struct({"a": dt.int64, "b": dt.string}) + st3 = dt.Struct({"b": dt.string, "a": dt.int64}) + st4 = dt.Struct({"a": dt.int64, "b": dt.string, "c": dt.float64}) + + assert st1 == st2 + assert st1 != st3 + assert st1 != st4 + assert st3 != st2 + + def test_singleton_null(): assert dt.null is dt.Null() diff --git a/ibis/expr/operations/relations.py b/ibis/expr/operations/relations.py index 4618de7a4d52..1a54bdb93e31 100644 --- a/ibis/expr/operations/relations.py +++ b/ibis/expr/operations/relations.py @@ -10,7 +10,7 @@ import ibis.expr.datashape as ds import ibis.expr.datatypes as dt from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict +from ibis.common.collections import FrozenDict, FrozenOrderedDict from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError from ibis.common.grounds import Concrete from ibis.common.patterns import Between, InstanceOf @@ -41,7 +41,7 @@ def __coerce__(cls, value): @property @abstractmethod - def values(self) -> FrozenDict[str, Value]: + def values(self) -> FrozenOrderedDict[str, Value]: """A mapping of column names to expressions which build up the relation. This attribute is heavily used in rewrites as well as during field @@ -59,13 +59,13 @@ def schema(self) -> Schema: ... @property - def fields(self) -> FrozenDict[str, Column]: + def fields(self) -> FrozenOrderedDict[str, Column]: """A mapping of column names to fields of the relation. This calculated property shouldn't be overridden in subclasses since it is mostly used for convenience. """ - return FrozenDict({k: Field(self, k) for k in self.schema}) + return FrozenOrderedDict({k: Field(self, k) for k in self.schema}) def to_expr(self): from ibis.expr.types import Table @@ -110,7 +110,7 @@ def _check_integrity(values, allowed_parents): @public class Project(Relation): parent: Relation - values: FrozenDict[str, NonSortKey[Unaliased[Value]]] + values: FrozenOrderedDict[str, NonSortKey[Unaliased[Value]]] def __init__(self, parent, values): _check_integrity(values.values(), {parent}) @@ -152,7 +152,7 @@ def schema(self): # TODO(kszucs): remove in favor of View @public class SelfReference(Reference): - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -187,7 +187,7 @@ class JoinLink(Node): class JoinChain(Relation): first: Reference rest: VarTuple[JoinLink] - values: FrozenDict[str, Unaliased[Value]] + values: FrozenOrderedDict[str, Unaliased[Value]] def __init__(self, first, rest, values): allowed_parents = {first} @@ -259,8 +259,8 @@ class Limit(Simple): @public class Aggregate(Relation): parent: Relation - groups: FrozenDict[str, Unaliased[Column]] - metrics: FrozenDict[str, Unaliased[Scalar]] + groups: FrozenOrderedDict[str, Unaliased[Column]] + metrics: FrozenOrderedDict[str, Unaliased[Scalar]] def __init__(self, parent, groups, metrics): _check_integrity(groups.values(), {parent}) @@ -273,7 +273,7 @@ def __init__(self, parent, groups, metrics): @attribute def values(self): - return FrozenDict({**self.groups, **self.metrics}) + return FrozenOrderedDict({**self.groups, **self.metrics}) @attribute def schema(self): @@ -285,7 +285,7 @@ class Set(Relation): left: Relation right: Relation distinct: bool = False - values = FrozenDict() + values = FrozenOrderedDict() def __init__(self, left, right, **kwargs): # convert to dictionary first, to get key-unordered comparison semantics @@ -321,7 +321,7 @@ class Difference(Set): @public class PhysicalTable(Relation): name: str - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -356,7 +356,7 @@ class SQLQueryResult(Relation): query: str schema: Schema source: Any - values = FrozenDict() + values = FrozenOrderedDict() @public @@ -378,12 +378,12 @@ class SQLStringView(Relation): child: Relation query: str schema: Schema - values = FrozenDict() + values = FrozenOrderedDict() @public class DummyTable(Relation): - values: FrozenDict[str, Value] + values: FrozenOrderedDict[str, Value] @attribute def schema(self): diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index e50b22068f81..65d4a88df52c 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -1,12 +1,11 @@ from __future__ import annotations -import collections from collections.abc import Iterable, Iterator, Mapping from typing import TYPE_CHECKING, Any, Union import ibis.expr.datatypes as dt from ibis.common.annotations import attribute -from ibis.common.collections import FrozenDict, MapSet +from ibis.common.collections import FrozenOrderedDict, MapSet from ibis.common.dispatch import lazy_singledispatch from ibis.common.exceptions import InputTypeError, IntegrityError from ibis.common.grounds import Concrete @@ -20,7 +19,7 @@ class Schema(Concrete, Coercible, MapSet): """An ordered mapping of str -> [datatype](./datatypes.qmd), used to hold a [Table](./expression-tables.qmd#ibis.expr.tables.Table)'s schema.""" - fields: FrozenDict[str, dt.DataType] + fields: FrozenOrderedDict[str, dt.DataType] """A mapping of [](`str`) to [`DataType`](./datatypes.qmd#ibis.expr.datatypes.DataType) objects representing the type of each column.""" @@ -91,14 +90,6 @@ def equals(self, other: Schema) -> bool: ) return self == other - def __hash__(self) -> int: - return super().__hash__() - - def __eq__(self, other): - if not isinstance(other, Mapping): - return NotImplemented - return collections.OrderedDict(self) == collections.OrderedDict(other) - @classmethod def from_tuples( cls, diff --git a/ibis/expr/tests/test_dereference.py b/ibis/expr/tests/test_dereference.py index e5b42c617251..4396c0ac5fb0 100644 --- a/ibis/expr/tests/test_dereference.py +++ b/ibis/expr/tests/test_dereference.py @@ -24,8 +24,8 @@ def test_dereference_project(): expected = dereference_expect( { p.int_col: p.int_col, - t.int_col: p.int_col, p.double_col: p.double_col, + t.int_col: p.int_col, t.double_col: p.double_col, } ) diff --git a/ibis/expr/tests/test_newrels.py b/ibis/expr/tests/test_newrels.py index 22588deb65d8..e6454e5b7fcf 100644 --- a/ibis/expr/tests/test_newrels.py +++ b/ibis/expr/tests/test_newrels.py @@ -1711,3 +1711,12 @@ def test_mutate_ambiguty_check_not_too_strict(): values={"id": first.id, "v": first.v, "v2": first.id}, ) assert second.op() == expected + + +def test_projections_with_different_field_order_are_unequal(): + t = ibis.table({"a": "int64", "b": "string"}, name="t") + + t1 = t.select(a=1, b=2) + t2 = t.select(b=2, a=1) + + assert not t1.equals(t2) diff --git a/ibis/expr/tests/test_schema.py b/ibis/expr/tests/test_schema.py index 05467619de17..e9a7766b33e2 100644 --- a/ibis/expr/tests/test_schema.py +++ b/ibis/expr/tests/test_schema.py @@ -203,6 +203,18 @@ def test_schema_mapping_api(): assert tuple(s.items()) == tuple(zip(s.names, s.types)) +def test_schema_equality(): + s1 = sch.schema({"a": "int64", "b": "string"}) + s2 = sch.schema({"a": "int64", "b": "string"}) + s3 = sch.schema({"b": "string", "a": "int64"}) + s4 = sch.schema({"a": "int64", "b": "int64", "c": "string"}) + + assert s1 == s2 + assert s1 != s3 + assert s1 != s4 + assert s3 != s2 + + class BarSchema: a: int b: str