Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(common): introduce FrozenOrderedDict #9081

Merged
merged 3 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1608,3 +1608,29 @@ def test_json_to_pyarrow(con):
if val is not None
}
assert result == expected


@pytest.mark.notyet(["mssql"], raises=PyODBCProgrammingError)
@pytest.mark.notyet(
["risingwave", "exasol"],
raises=com.UnsupportedOperationError,
reason="no temp table support",
)
@pytest.mark.notyet(
["impala", "trino"], raises=NotImplementedError, reason="no temp table support"
)
@pytest.mark.notyet(
["druid"], raises=NotImplementedError, reason="doesn't support create_table"
)
@pytest.mark.notyet(
["flink"], raises=com.IbisError, reason="no persistent temp table support"
)
def test_schema_with_caching(alltypes):
t1 = alltypes.limit(5).select("bigint_col", "string_col")
t2 = alltypes.limit(5).select("string_col", "bigint_col")

pt1 = t1.cache()
pt2 = t2.cache()

assert pt1.schema() == t1.schema()
assert pt2.schema() == t2.schema()
23 changes: 22 additions & 1 deletion ibis/common/collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,9 @@
return self.__precomputed_hash__

def __setitem__(self, key: K, value: V) -> None:
raise TypeError("'FrozenDict' object does not support item assignment")
raise TypeError(
f"'{self.__class__.__name__}' object does not support item assignment"
)

def __setattr__(self, name: str, _: Any) -> None:
raise TypeError(f"Attribute {name!r} cannot be assigned to frozendict")
Expand All @@ -297,6 +299,25 @@
return (self.__class__, (dict(self),))


@public
class FrozenOrderedDict(FrozenDict[K, V]):
def __init__(self, *args, **kwargs):
super(FrozenDict, self).__init__(*args, **kwargs)
hashable = tuple(self.items())
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hash value is different then FrozenDict's hash which uses frozenset() to calculate the hash.

object.__setattr__(self, "__precomputed_hash__", hash(hashable))

def __hash__(self) -> int:
return self.__precomputed_hash__

def __eq__(self, other: Any) -> bool:
if not isinstance(other, collections.abc.Mapping):
return NotImplemented

Check warning on line 314 in ibis/common/collections.py

View check run for this annotation

Codecov / codecov/patch

ibis/common/collections.py#L314

Added line #L314 was not covered by tests
return tuple(self.items()) == tuple(other.items())

def __ne__(self, other: Any) -> bool:
return not self == other


class RewindableIterator(Iterator[V]):
"""Iterator that can be rewound to a checkpoint.

Expand Down
36 changes: 36 additions & 0 deletions ibis/common/tests/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Collection,
Container,
FrozenDict,
FrozenOrderedDict,
Iterable,
Iterator,
Mapping,
Expand Down Expand Up @@ -429,6 +430,41 @@ def test_frozendict():
assert_pickle_roundtrip(d)


def test_frozenordereddict():
d = FrozenOrderedDict({"a": 1, "b": 2, "c": 3})
e = FrozenOrderedDict(a=1, b=2, c=3)
f = FrozenOrderedDict(a=1, b=2, c=3, d=4)
g = FrozenOrderedDict(a=1, c=3, b=2)
h = FrozenDict(a=1, b=2, c=3)

assert isinstance(d, Mapping)
assert isinstance(d, collections.abc.Mapping)

assert d == e
assert d != f
assert e == h
assert h == e
assert e != g
assert g != e
assert g != h
assert h != g

assert d["a"] == 1
assert d["b"] == 2

msg = "'FrozenOrderedDict' object does not support item assignment"
with pytest.raises(TypeError, match=msg):
d["a"] = 2
with pytest.raises(TypeError, match=msg):
d["d"] = 4

assert hash(FrozenOrderedDict(a=1, b=2)) == hash(FrozenOrderedDict(a=1, b=2))
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(FrozenOrderedDict(b=2, a=1))
assert hash(FrozenOrderedDict(a=1, b=2)) != hash(d)

assert_pickle_roundtrip(d)


def test_rewindable_iterator():
it = RewindableIterator(range(10))
assert next(it) == 0
Expand Down
4 changes: 2 additions & 2 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing_extensions import Self, get_args, get_origin

from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, MapSet
from ibis.common.collections import FrozenOrderedDict, MapSet
from ibis.common.dispatch import lazy_singledispatch
from ibis.common.grounds import Concrete, Singleton
from ibis.common.patterns import Coercible, CoercionError
Expand Down Expand Up @@ -823,7 +823,7 @@ def _pretty_piece(self) -> str:
class Struct(Parametric, MapSet):
"""Structured values."""

fields: FrozenDict[str, DataType]
fields: FrozenOrderedDict[str, DataType]

scalar = "StructScalar"
column = "StructColumn"
Expand Down
12 changes: 12 additions & 0 deletions ibis/expr/datatypes/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,18 @@ def test_struct_set_operations():
assert d > c


def test_struct_equality():
st1 = dt.Struct({"a": dt.int64, "b": dt.string})
st2 = dt.Struct({"a": dt.int64, "b": dt.string})
st3 = dt.Struct({"b": dt.string, "a": dt.int64})
st4 = dt.Struct({"a": dt.int64, "b": dt.string, "c": dt.float64})

assert st1 == st2
assert st1 != st3
assert st1 != st4
assert st3 != st2


def test_singleton_null():
assert dt.null is dt.Null()

Expand Down
30 changes: 15 additions & 15 deletions ibis/expr/operations/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict
from ibis.common.collections import FrozenDict, FrozenOrderedDict
from ibis.common.exceptions import IbisTypeError, IntegrityError, RelationError
from ibis.common.grounds import Concrete
from ibis.common.patterns import Between, InstanceOf
Expand Down Expand Up @@ -41,7 +41,7 @@ def __coerce__(cls, value):

@property
@abstractmethod
def values(self) -> FrozenDict[str, Value]:
def values(self) -> FrozenOrderedDict[str, Value]:
"""A mapping of column names to expressions which build up the relation.

This attribute is heavily used in rewrites as well as during field
Expand All @@ -59,13 +59,13 @@ def schema(self) -> Schema:
...

@property
def fields(self) -> FrozenDict[str, Column]:
def fields(self) -> FrozenOrderedDict[str, Column]:
"""A mapping of column names to fields of the relation.

This calculated property shouldn't be overridden in subclasses since it
is mostly used for convenience.
"""
return FrozenDict({k: Field(self, k) for k in self.schema})
return FrozenOrderedDict({k: Field(self, k) for k in self.schema})

def to_expr(self):
from ibis.expr.types import Table
Expand Down Expand Up @@ -110,7 +110,7 @@ def _check_integrity(values, allowed_parents):
@public
class Project(Relation):
parent: Relation
values: FrozenDict[str, NonSortKey[Unaliased[Value]]]
values: FrozenOrderedDict[str, NonSortKey[Unaliased[Value]]]

def __init__(self, parent, values):
_check_integrity(values.values(), {parent})
Expand Down Expand Up @@ -152,7 +152,7 @@ def schema(self):
# TODO(kszucs): remove in favor of View
@public
class SelfReference(Reference):
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand Down Expand Up @@ -187,7 +187,7 @@ class JoinLink(Node):
class JoinChain(Relation):
first: Reference
rest: VarTuple[JoinLink]
values: FrozenDict[str, Unaliased[Value]]
values: FrozenOrderedDict[str, Unaliased[Value]]

def __init__(self, first, rest, values):
allowed_parents = {first}
Expand Down Expand Up @@ -259,8 +259,8 @@ class Limit(Simple):
@public
class Aggregate(Relation):
parent: Relation
groups: FrozenDict[str, Unaliased[Column]]
metrics: FrozenDict[str, Unaliased[Scalar]]
groups: FrozenOrderedDict[str, Unaliased[Column]]
metrics: FrozenOrderedDict[str, Unaliased[Scalar]]

def __init__(self, parent, groups, metrics):
_check_integrity(groups.values(), {parent})
Expand All @@ -273,7 +273,7 @@ def __init__(self, parent, groups, metrics):

@attribute
def values(self):
return FrozenDict({**self.groups, **self.metrics})
return FrozenOrderedDict({**self.groups, **self.metrics})

@attribute
def schema(self):
Expand All @@ -285,7 +285,7 @@ class Set(Relation):
left: Relation
right: Relation
distinct: bool = False
values = FrozenDict()
values = FrozenOrderedDict()

def __init__(self, left, right, **kwargs):
# convert to dictionary first, to get key-unordered comparison semantics
Expand Down Expand Up @@ -321,7 +321,7 @@ class Difference(Set):
@public
class PhysicalTable(Relation):
name: str
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand Down Expand Up @@ -356,7 +356,7 @@ class SQLQueryResult(Relation):
query: str
schema: Schema
source: Any
values = FrozenDict()
values = FrozenOrderedDict()


@public
Expand All @@ -378,12 +378,12 @@ class SQLStringView(Relation):
child: Relation
query: str
schema: Schema
values = FrozenDict()
values = FrozenOrderedDict()


@public
class DummyTable(Relation):
values: FrozenDict[str, Value]
values: FrozenOrderedDict[str, Value]

@attribute
def schema(self):
Expand Down
4 changes: 2 additions & 2 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import ibis.expr.datatypes as dt
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict, MapSet
from ibis.common.collections import FrozenOrderedDict, MapSet
from ibis.common.dispatch import lazy_singledispatch
from ibis.common.exceptions import InputTypeError, IntegrityError
from ibis.common.grounds import Concrete
Expand All @@ -19,7 +19,7 @@
class Schema(Concrete, Coercible, MapSet):
"""An ordered mapping of str -> [datatype](./datatypes.qmd), used to hold a [Table](./expression-tables.qmd#ibis.expr.tables.Table)'s schema."""

fields: FrozenDict[str, dt.DataType]
fields: FrozenOrderedDict[str, dt.DataType]
"""A mapping of [](`str`) to
[`DataType`](./datatypes.qmd#ibis.expr.datatypes.DataType)
objects representing the type of each column."""
Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/tests/test_newrels.py
Original file line number Diff line number Diff line change
Expand Up @@ -1711,3 +1711,12 @@ def test_mutate_ambiguty_check_not_too_strict():
values={"id": first.id, "v": first.v, "v2": first.id},
)
assert second.op() == expected


def test_projections_with_different_field_order_are_unequal():
t = ibis.table({"a": "int64", "b": "string"}, name="t")

t1 = t.select(a=1, b=2)
t2 = t.select(b=2, a=1)

assert not t1.equals(t2)
12 changes: 12 additions & 0 deletions ibis/expr/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,18 @@ def test_schema_mapping_api():
assert tuple(s.items()) == tuple(zip(s.names, s.types))


def test_schema_equality():
s1 = sch.schema({"a": "int64", "b": "string"})
s2 = sch.schema({"a": "int64", "b": "string"})
s3 = sch.schema({"b": "string", "a": "int64"})
s4 = sch.schema({"a": "int64", "b": "int64", "c": "string"})

assert s1 == s2
assert s1 != s3
assert s1 != s4
assert s3 != s2


class BarSchema:
a: int
b: str
Expand Down
4 changes: 2 additions & 2 deletions nix/overlay.nix
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@ in
name = "ibis-testing-data";
owner = "ibis-project";
repo = "testing-data";
rev = "2c6a4bb5d5d525058d8d5b2312a9fee5dafc5476";
sha256 = "sha256-Lq503bqh9ESZJSk6yVq/uZwkAubzmSmoTBZSsqMm0DY=";
rev = "1922bd4617546b877e66e78bb2b87abeb510cf8e";
sha256 = "sha256-l5d7r/6Voy6N2pXq3IivLX3N0tNfKKwsbZXRexzc8Z8=";
};

ibis39 = pkgs.callPackage ./ibis.nix { python3 = pkgs.python39; };
Expand Down
Loading