Skip to content

Commit

Permalink
feat(common): introduce FrozenOrderedDict (#9081)
Browse files Browse the repository at this point in the history
Continuation of #9068 by adding
`FrozenOrderedDict` which calculates its hash from `tuple(self.items()`
rather than `frozenset(self.items())` and also checks for item order
during equality checks.

Closes #9063.

---------

Co-authored-by: Phillip Cloud <417981+cpcloud@users.noreply.github.com>
  • Loading branch information
kszucs and cpcloud authored Apr 30, 2024
1 parent f5d9084 commit f926995
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 20 deletions.
26 changes: 26 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1608,3 +1608,29 @@ def test_json_to_pyarrow(con):
if val is not None
}
assert result == expected


@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError)
@pytest.mark.notyet(
["risingwave", "exasol"],
raises=com.UnsupportedOperationError,
reason="no temp table support",
)
@pytest.mark.notyet(
["impala", "trino"], raises=NotImplementedError, reason="no temp table support"
)
@pytest.mark.notyet(
["druid"], raises=NotImplementedError, reason="doesn't support create_table"
)
@pytest.mark.notyet(
["flink"], raises=com.IbisError, reason="no persistent temp table support"
)
def test_schema_with_caching(alltypes):
t1 = alltypes.limit(5).select("bigint_col", "string_col")
t2 = alltypes.limit(5).select("string_col", "bigint_col")

pt1 = t1.cache()
pt2 = t2.cache()

assert pt1.schema() == t1.schema()
assert pt2.schema() == t2.schema()
23 changes: 22 additions & 1 deletion ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,9 @@ def __hash__(self) -> int:
return self.__precomputed_hash__

def __setitem__(self, key: K, value: V) -> None:
raise TypeError("'FrozenDict' object does not support item assignment")
raise TypeError(
f"'{self.__class__.__name__}' object does not support item assignment"
)

def __setattr__(self, name: str, _: Any) -> None:
raise TypeError(f"Attribute {name!r} cannot be assigned to frozendict")
Expand All @@ -297,6 +299,25 @@ def __reduce__(self) -> tuple:
return (self.__class__, (dict(self),))


@public
class FrozenOrderedDict(FrozenDict[K, V]):
def __init__(self, *args, **kwargs):
super(FrozenDict, self).__init__(*args, **kwargs)
hashable = tuple(self.items())
object.__setattr__(self, "__precomputed_hash__", hash(hashable))

def __hash__(self) -> int:
return self.__precomputed_hash__

def __eq__(self, other: Any) -> bool:
if not isinstance(other, collections.abc.Mapping):
return NotImplemented
return tuple(self.items()) == tuple(other.items())

def __ne__(self, other: Any) -> bool:
return not self == other


class RewindableIterator(Iterator[V]):
"""Iterator that can be rewound to a checkpoint.
Expand Down
36 changes: 36 additions & 0 deletions ibis/common/tests/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Collection,
Container,
FrozenDict,
FrozenOrderedDict,
Iterable,
Iterator,
Mapping,
Expand Down Expand Up @@ -429,6 +430,41 @@ def test_frozendict():
assert_pickle_roundtrip(d)


def test_frozenordereddict():
d = FrozenOrderedDict({"a": 1, "b": 2, "c": 3})
e = FrozenOrderedDict(a=1, b=2, c=3)
f = FrozenOrderedDict(a=1, b=2, c=3, d=4)
g = FrozenOrderedDict(a=1, c=3, b=2)
h = FrozenDict(a=1, b=2, c=3)

assert isinstance(d, Mapping)
assert isinstance(d, collections.abc.Mapping)

assert d == e
assert d != f
assert e == h
assert h == e
assert e != g
assert g != e
assert g != h
assert h != g

assert d["a"] == 1
assert d["b"] == 2

msg = "'FrozenOrderedDict' object does not support item assignment"
with pytest.raises(TypeError, match=msg):
d["a"] = 2
with pytest.raises(TypeError, match=msg):
d["d"] = 4

assert hash(FrozenOrderedDict(a=1, b=2)) == hash(FrozenOrderedDict(a=1, b=2))
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(FrozenOrderedDict(b=2, a=1))
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(d)

assert_pickle_roundtrip(d)


def test_rewindable_iterator():
it = RewindableIterator(range(10))
assert next(it) == 0
Expand Down
4 changes: 2 additions & 2 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing_extensions import Self, get_args, get_origin

from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, MapSet
from ibis.common.collections import FrozenOrderedDict, MapSet
from ibis.common.dispatch import lazy_singledispatch
from ibis.common.grounds import Concrete, Singleton
from ibis.common.patterns import Coercible, CoercionError
Expand Down Expand Up @@ -823,7 +823,7 @@ def _pretty_piece(self) -> str:
class Struct(Parametric, MapSet):
"""Structured values."""

fields: FrozenDict[str, DataType]
fields: FrozenOrderedDict[str, DataType]

scalar = "StructScalar"
column = "StructColumn"
Expand Down
12 changes: 12 additions & 0 deletions ibis/expr/datatypes/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,18 @@ def test_struct_set_operations():
assert d > c


def test_struct_equality():
st1 = dt.Struct({"a": dt.int64, "b": dt.string})
st2 = dt.Struct({"a": dt.int64, "b": dt.string})
st3 = dt.Struct({"b": dt.string, "a": dt.int64})
st4 = dt.Struct({"a": dt.int64, "b": dt.string, "c": dt.float64})

assert st1 == st2
assert st1 != st3
assert st1 != st4
assert st3 != st2


def test_singleton_null():
assert dt.null is dt.Null()

Expand Down
30 changes: 15 additions & 15 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict
from ibis.common.collections import FrozenDict, FrozenOrderedDict
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
from ibis.common.grounds import Concrete
from ibis.common.patterns import Between, InstanceOf
Expand Down Expand Up @@ -41,7 +41,7 @@ def __coerce__(cls, value):

@property
@abstractmethod
def values(self) -> FrozenDict[str, Value]:
def values(self) -> FrozenOrderedDict[str, Value]:
"""A mapping of column names to expressions which build up the relation.
This attribute is heavily used in rewrites as well as during field
Expand All @@ -59,13 +59,13 @@ def schema(self) -> Schema:
...

@property
def fields(self) -> FrozenDict[str, Column]:
def fields(self) -> FrozenOrderedDict[str, Column]:
"""A mapping of column names to fields of the relation.
This calculated property shouldn't be overridden in subclasses since it
is mostly used for convenience.
"""
return FrozenDict({k: Field(self, k) for k in self.schema})
return FrozenOrderedDict({k: Field(self, k) for k in self.schema})

def to_expr(self):
from ibis.expr.types import Table
Expand Down Expand Up @@ -110,7 +110,7 @@ def _check_integrity(values, allowed_parents):
@public
class Project(Relation):
parent: Relation
values: FrozenDict[str, NonSortKey[Unaliased[Value]]]
values: FrozenOrderedDict[str, NonSortKey[Unaliased[Value]]]

def __init__(self, parent, values):
_check_integrity(values.values(), {parent})
Expand Down Expand Up @@ -152,7 +152,7 @@ def schema(self):
# TODO(kszucs): remove in favor of View
@public
class SelfReference(Reference):
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand Down Expand Up @@ -187,7 +187,7 @@ class JoinLink(Node):
class JoinChain(Relation):
first: Reference
rest: VarTuple[JoinLink]
values: FrozenDict[str, Unaliased[Value]]
values: FrozenOrderedDict[str, Unaliased[Value]]

def __init__(self, first, rest, values):
allowed_parents = {first}
Expand Down Expand Up @@ -259,8 +259,8 @@ class Limit(Simple):
@public
class Aggregate(Relation):
parent: Relation
groups: FrozenDict[str, Unaliased[Column]]
metrics: FrozenDict[str, Unaliased[Scalar]]
groups: FrozenOrderedDict[str, Unaliased[Column]]
metrics: FrozenOrderedDict[str, Unaliased[Scalar]]

def __init__(self, parent, groups, metrics):
_check_integrity(groups.values(), {parent})
Expand All @@ -273,7 +273,7 @@ def __init__(self, parent, groups, metrics):

@attribute
def values(self):
return FrozenDict({**self.groups, **self.metrics})
return FrozenOrderedDict({**self.groups, **self.metrics})

@attribute
def schema(self):
Expand All @@ -285,7 +285,7 @@ class Set(Relation):
left: Relation
right: Relation
distinct: bool = False
values = FrozenDict()
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
Expand Down Expand Up @@ -321,7 +321,7 @@ class Difference(Set):
@public
class PhysicalTable(Relation):
name: str
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand Down Expand Up @@ -356,7 +356,7 @@ class SQLQueryResult(Relation):
query: str
schema: Schema
source: Any
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand All @@ -378,12 +378,12 @@ class SQLStringView(Relation):
child: Relation
query: str
schema: Schema
values = FrozenDict()
values = FrozenOrderedDict()


@public
class DummyTable(Relation):
values: FrozenDict[str, Value]
values: FrozenOrderedDict[str, Value]

@attribute
def schema(self):
Expand Down
4 changes: 2 additions & 2 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, MapSet
from ibis.common.collections import FrozenOrderedDict, MapSet
from ibis.common.dispatch import lazy_singledispatch
from ibis.common.exceptions import InputTypeError, IntegrityError
from ibis.common.grounds import Concrete
Expand All @@ -19,7 +19,7 @@
class Schema(Concrete, Coercible, MapSet):
"""An ordered mapping of str -> [datatype](./datatypes.qmd), used to hold a [Table](./expression-tables.qmd#ibis.expr.tables.Table)'s schema."""

fields: FrozenDict[str, dt.DataType]
fields: FrozenOrderedDict[str, dt.DataType]
"""A mapping of [](`str`) to
[`DataType`](./datatypes.qmd#ibis.expr.datatypes.DataType)
objects representing the type of each column."""
Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/tests/test_newrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1711,3 +1711,12 @@ def test_mutate_ambiguty_check_not_too_strict():
values={"id": first.id, "v": first.v, "v2": first.id},
)
assert second.op() == expected


def test_projections_with_different_field_order_are_unequal():
t = ibis.table({"a": "int64", "b": "string"}, name="t")

t1 = t.select(a=1, b=2)
t2 = t.select(b=2, a=1)

assert not t1.equals(t2)
12 changes: 12 additions & 0 deletions ibis/expr/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ def test_schema_mapping_api():
assert tuple(s.items()) == tuple(zip(s.names, s.types))


def test_schema_equality():
s1 = sch.schema({"a": "int64", "b": "string"})
s2 = sch.schema({"a": "int64", "b": "string"})
s3 = sch.schema({"b": "string", "a": "int64"})
s4 = sch.schema({"a": "int64", "b": "int64", "c": "string"})

assert s1 == s2
assert s1 != s3
assert s1 != s4
assert s3 != s2


class BarSchema:
a: int
b: str
Expand Down

0 comments on commit f926995

Please sign in to comment.