From ff34c7be985615a307d0b3ff491930b2230c5014 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Mon, 23 Jan 2023 12:15:44 +0100 Subject: [PATCH] refactor(datatype): use a mapping to store `StructType` fields rather than `names` and `types` tuples also schedule `Struct.from_dict()`, `Struct.pairs` and `Struct(names, types)` constructor for removal --- ibis/backends/base/sql/alchemy/datatypes.py | 2 +- ibis/backends/clickhouse/datatypes.py | 2 +- ibis/backends/clickhouse/tests/test_types.py | 8 +- ibis/backends/duckdb/__init__.py | 2 +- ibis/backends/duckdb/tests/test_datatypes.py | 2 +- ibis/backends/polars/datatypes.py | 2 +- ibis/backends/pyarrow/datatypes.py | 2 +- ibis/backends/pyspark/datatypes.py | 22 ++++-- ibis/backends/tests/test_aggregation.py | 2 +- ibis/backends/tests/test_struct.py | 2 +- ibis/backends/tests/test_vectorized_udf.py | 16 ++-- ibis/backends/trino/datatypes.py | 2 +- ibis/expr/datatypes/core.py | 78 ++++++++++++++------ ibis/expr/datatypes/value.py | 9 ++- ibis/expr/schema.py | 2 +- ibis/expr/types/structs.py | 16 ++-- ibis/tests/benchmarks/test_benchmarks.py | 10 +-- ibis/tests/expr/test_datatypes.py | 11 +-- ibis/tests/expr/test_format.py | 2 +- ibis/tests/expr/test_literal.py | 2 +- ibis/tests/expr/test_sql.py | 2 +- ibis/tests/sql/test_sqlalchemy.py | 4 +- ibis/tests/strategies.py | 3 +- 23 files changed, 123 insertions(+), 80 deletions(-) diff --git a/ibis/backends/base/sql/alchemy/datatypes.py b/ibis/backends/base/sql/alchemy/datatypes.py index 926112923706..1ec19403b88e 100644 --- a/ibis/backends/base/sql/alchemy/datatypes.py +++ b/ibis/backends/base/sql/alchemy/datatypes.py @@ -182,7 +182,7 @@ def _pg_map(dialect, itype): @to_sqla_type.register(Dialect, dt.Struct) def _struct(dialect, itype): return StructType( - [(name, to_sqla_type(dialect, type)) for name, type in itype.pairs.items()] + [(name, to_sqla_type(dialect, type)) for name, type in itype.fields.items()] ) diff --git a/ibis/backends/clickhouse/datatypes.py b/ibis/backends/clickhouse/datatypes.py index 723d833d240a..1a508d7f195a 100644 --- a/ibis/backends/clickhouse/datatypes.py +++ b/ibis/backends/clickhouse/datatypes.py @@ -260,7 +260,7 @@ def _(ty: dt.Map) -> str: @serialize_raw.register(dt.Struct) def _(ty: dt.Struct) -> str: fields = ", ".join( - f"{name} {serialize(field_ty)}" for name, field_ty in ty.pairs.items() + f"{name} {serialize(field_ty)}" for name, field_ty in ty.fields.items() ) return f"Tuple({fields})" diff --git a/ibis/backends/clickhouse/tests/test_types.py b/ibis/backends/clickhouse/tests/test_types.py index fe7ee3c534ed..b7603bb9b140 100644 --- a/ibis/backends/clickhouse/tests/test_types.py +++ b/ibis/backends/clickhouse/tests/test_types.py @@ -161,7 +161,7 @@ def test_columns_types_with_additional_argument(con): param("Decimal(10, 3)", dt.Decimal(10, 3, nullable=False), id="decimal"), param( "Tuple(a String, b Array(Nullable(Float64)))", - dt.Struct.from_dict( + dt.Struct( dict( a=dt.String(nullable=False), b=dt.Array(dt.float64, nullable=False), @@ -172,7 +172,7 @@ def test_columns_types_with_additional_argument(con): ), param( "Tuple(String, Array(Nullable(Float64)))", - dt.Struct.from_dict( + dt.Struct( dict( f0=dt.String(nullable=False), f1=dt.Array(dt.float64, nullable=False), @@ -183,7 +183,7 @@ def test_columns_types_with_additional_argument(con): ), param( "Tuple(a String, Array(Nullable(Float64)))", - dt.Struct.from_dict( + dt.Struct( dict( a=dt.String(nullable=False), f1=dt.Array(dt.float64, nullable=False), @@ -194,7 +194,7 @@ def test_columns_types_with_additional_argument(con): ), param( "Nested(a String, b Array(Nullable(Float64)))", - dt.Struct.from_dict( + dt.Struct( dict( a=dt.Array(dt.String(nullable=False), nullable=False), b=dt.Array(dt.Array(dt.float64, nullable=False), nullable=False), diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index d82349cbf71d..57e01fc25fce 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -455,7 +455,7 @@ def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: ["column_name", "column_type", "null"], rows.mappings() ): ibis_type = parse(type) - yield name, ibis_type(nullable=null.lower() == "yes") + yield name, ibis_type.copy(nullable=null.lower() == "yes") def _register_in_memory_table(self, table_op): df = table_op.data.to_frame() diff --git a/ibis/backends/duckdb/tests/test_datatypes.py b/ibis/backends/duckdb/tests/test_datatypes.py index 0b1ab52835dd..80abc59544af 100644 --- a/ibis/backends/duckdb/tests/test_datatypes.py +++ b/ibis/backends/duckdb/tests/test_datatypes.py @@ -49,7 +49,7 @@ P=dt.string, Q=dt.Array(dt.int32), R=dt.Map(dt.string, dt.int64), - S=dt.Struct.from_dict( + S=dt.Struct( dict( a=dt.int32, b=dt.string, diff --git a/ibis/backends/polars/datatypes.py b/ibis/backends/polars/datatypes.py index bbd5d61ba092..860b1a17a4b3 100644 --- a/ibis/backends/polars/datatypes.py +++ b/ibis/backends/polars/datatypes.py @@ -58,7 +58,7 @@ def from_ibis_interval(dtype): def from_ibis_struct(dtype): fields = [ pl.Field(name=name, dtype=to_polars_type(dtype)) - for name, dtype in dtype.pairs.items() + for name, dtype in dtype.fields.items() ] return pl.Struct(fields) diff --git a/ibis/backends/pyarrow/datatypes.py b/ibis/backends/pyarrow/datatypes.py index 7b189bf54299..5ebbf2eb2e8b 100644 --- a/ibis/backends/pyarrow/datatypes.py +++ b/ibis/backends/pyarrow/datatypes.py @@ -51,7 +51,7 @@ def from_ibis_interval(dtype: dt.Interval): @to_pyarrow_type.register def from_ibis_struct(dtype: dt.Struct): return pa.struct( - pa.field(name, to_pyarrow_type(typ)) for name, typ in dtype.pairs.items() + pa.field(name, to_pyarrow_type(typ)) for name, typ in dtype.fields.items() ) diff --git a/ibis/backends/pyspark/datatypes.py b/ibis/backends/pyspark/datatypes.py index 1c261a3cc2a8..8dd6c34f26b4 100644 --- a/ibis/backends/pyspark/datatypes.py +++ b/ibis/backends/pyspark/datatypes.py @@ -6,8 +6,8 @@ import ibis.common.exceptions as com import ibis.expr.datatypes as dt +import ibis.expr.schema as sch from ibis.backends.base.sql.registry import sql_type_names -from ibis.expr.schema import Schema _sql_type_names = dict(sql_type_names, date='date') @@ -72,10 +72,11 @@ def _spark_map(spark_dtype_obj, nullable=True): @dt.dtype.register(pt.StructType) def _spark_struct(spark_dtype_obj, nullable=True): - names = spark_dtype_obj.names - fields = spark_dtype_obj.fields - ibis_types = [dt.dtype(f.dataType, nullable=f.nullable) for f in fields] - return dt.Struct(names, ibis_types, nullable=nullable) + fields = { + n: dt.dtype(f.dataType, nullable=f.nullable) + for n, f in zip(spark_dtype_obj.names, spark_dtype_obj.fields) + } + return dt.Struct(fields, nullable=nullable) _IBIS_DTYPE_TO_SPARK_DTYPE = {v: k for k, v in _SPARK_DTYPE_TO_IBIS_DTYPE.items()} @@ -122,10 +123,17 @@ def _map(ibis_dtype_obj): @spark_dtype.register(dt.Struct) -@spark_dtype.register(Schema) def _struct(ibis_dtype_obj): fields = [ pt.StructField(n, spark_dtype(t), t.nullable) - for n, t in zip(ibis_dtype_obj.names, ibis_dtype_obj.types) + for n, t in ibis_dtype_obj.fields.items() + ] + return pt.StructType(fields) + + +@spark_dtype.register(sch.Schema) +def _schema(ibis_schem_obj): + fields = [ + pt.StructField(n, spark_dtype(t), t.nullable) for n, t in ibis_schem_obj.items() ] return pt.StructType(fields) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index ca896241894e..cfa6437ddf3f 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -183,7 +183,7 @@ def test_aggregate_multikey_group_reduction_udf(backend, alltypes, df): @reduction( input_type=[dt.double], - output_type=dt.Struct(['mean', 'std'], [dt.double, dt.double]), + output_type=dt.Struct({'mean': dt.double, 'std': dt.double}), ) def mean_and_std(v): return v.mean(), v.std() diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index 77fd49b7e6e2..ae805c1b8ac2 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -79,7 +79,7 @@ def test_null_literal(con, field): def test_struct_column(alltypes, df): t = alltypes expr = ibis.struct(dict(a=t.string_col, b=1, c=t.bigint_col)).name("s") - assert expr.type() == dt.Struct.from_dict(dict(a=dt.string, b=dt.int8, c=dt.int64)) + assert expr.type() == dt.Struct(dict(a=dt.string, b=dt.int8, c=dt.int64)) result = expr.execute() expected = pd.Series( (dict(a=a, b=1, c=c) for a, c in zip(df.string_col, df.bigint_col)), diff --git a/ibis/backends/tests/test_vectorized_udf.py b/ibis/backends/tests/test_vectorized_udf.py index 6a83c5b60648..a666b5333243 100644 --- a/ibis/backends/tests/test_vectorized_udf.py +++ b/ibis/backends/tests/test_vectorized_udf.py @@ -85,7 +85,7 @@ def add_one_struct(v): def create_add_one_struct_udf(result_formatter): return elementwise( input_type=[dt.double], - output_type=dt.Struct(['col1', 'col2'], [dt.double, dt.double]), + output_type=dt.Struct({'col1': dt.double, 'col2': dt.double}), )(_format_struct_udf_return_type(add_one_struct, result_formatter)) @@ -127,7 +127,7 @@ def create_add_one_struct_udf(result_formatter): @elementwise( input_type=[dt.double], - output_type=dt.Struct(['double_col', 'col2'], [dt.double, dt.double]), + output_type=dt.Struct({'double_col': dt.double, 'col2': dt.double}), ) def overwrite_struct_elementwise(v): assert isinstance(v, pd.Series) @@ -137,7 +137,7 @@ def overwrite_struct_elementwise(v): @elementwise( input_type=[dt.double], output_type=dt.Struct( - ['double_col', 'col2', 'float_col'], [dt.double, dt.double, dt.double] + {'double_col': dt.double, 'col2': dt.double, 'float_col': dt.double} ), ) def multiple_overwrite_struct_elementwise(v): @@ -147,7 +147,7 @@ def multiple_overwrite_struct_elementwise(v): @analytic( input_type=[dt.double, dt.double], - output_type=dt.Struct(['double_col', 'demean_weight'], [dt.double, dt.double]), + output_type=dt.Struct({'double_col': dt.double, 'demean_weight': dt.double}), ) def overwrite_struct_analytic(v, w): assert isinstance(v, pd.Series) @@ -165,7 +165,7 @@ def demean_struct(v, w): def create_demean_struct_udf(result_formatter): return analytic( input_type=[dt.double, dt.double], - output_type=dt.Struct(['demean', 'demean_weight'], [dt.double, dt.double]), + output_type=dt.Struct({'demean': dt.double, 'demean_weight': dt.double}), )(_format_struct_udf_return_type(demean_struct, result_formatter)) @@ -203,7 +203,7 @@ def mean_struct(v, w): def create_mean_struct_udf(result_formatter): return reduction( input_type=[dt.double, dt.int64], - output_type=dt.Struct(['mean', 'mean_weight'], [dt.double, dt.double]), + output_type=dt.Struct({'mean': dt.double, 'mean_weight': dt.double}), )(_format_struct_udf_return_type(mean_struct, result_formatter)) @@ -220,7 +220,7 @@ def create_mean_struct_udf(result_formatter): @reduction( input_type=[dt.double, dt.int64], - output_type=dt.Struct(['double_col', 'mean_weight'], [dt.double, dt.double]), + output_type=dt.Struct({'double_col': dt.double, 'mean_weight': dt.double}), ) def overwrite_struct_reduction(v, w): assert isinstance(v, (np.ndarray, pd.Series)) @@ -495,7 +495,7 @@ def test_elementwise_udf_destructure_exact_once( ): @elementwise( input_type=[dt.double], - output_type=dt.Struct(['col1', 'col2'], [dt.double, dt.double]), + output_type=dt.Struct({'col1': dt.double, 'col2': dt.double}), ) def add_one_struct_exact_once(v): key = v.iloc[0] diff --git a/ibis/backends/trino/datatypes.py b/ibis/backends/trino/datatypes.py index 05af545f18d3..f00b91aeeb35 100644 --- a/ibis/backends/trino/datatypes.py +++ b/ibis/backends/trino/datatypes.py @@ -173,7 +173,7 @@ def _string(_, itype): @to_sqla_type.register(TrinoDialect, dt.Struct) def _struct(dialect, itype): return ROW( - [(name, to_sqla_type(dialect, typ)) for name, typ in itype.pairs.items()] + [(name, to_sqla_type(dialect, typ)) for name, typ in itype.fields.items()] ) diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index fe328d3e8f75..0dc72087aefa 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -14,12 +14,13 @@ from ibis.common.grounds import Concrete, Singleton from ibis.common.validators import ( all_of, + frozendict_of, instance_of, isin, map_to, - tuple_of, validator, ) +from ibis.util import deprecated, warn_deprecated dtype = Dispatcher('dtype') @@ -642,18 +643,42 @@ def to_integer_type(self): class Struct(DataType): """Structured values.""" - names = tuple_of(instance_of(str)) - types = tuple_of(datatype) + fields = frozendict_of(instance_of(str), datatype) scalar = ir.StructScalar column = ir.StructColumn - def __init__(self, names, types, **kwargs): - if len(names) != len(types): - raise IbisTypeError( - 'Struct datatype names and types must have the same length' + @classmethod + def __create__(cls, names, types=None, nullable=True): + if types is None: + fields = names + else: + warn_deprecated( + "Struct(names, types)", + as_of="4.1", + removed_in="5.0", + instead=( + "construct a Struct type using a mapping of names to types instead: " + "Struct(dict(zip(names, types)))" + ), ) - super().__init__(names=names, types=types, **kwargs) + if len(names) != len(types): + raise IbisTypeError( + 'Struct datatype names and types must have the same length' + ) + fields = dict(zip(names, types)) + + return super().__create__(fields=fields, nullable=nullable) + + def __reduce__(self): + return (self.__class__, (self.fields, None, self.nullable)) + + def copy(self, fields=None, nullable=None): + if fields is None: + fields = self.fields + if nullable is None: + nullable = self.nullable + return type(self)(fields, nullable=nullable) @classmethod def from_tuples( @@ -673,10 +698,14 @@ def from_tuples( Struct Struct data type instance """ - names, types = zip(*pairs) - return cls(names, types, nullable=nullable) + return cls(dict(pairs), nullable=nullable) @classmethod + @deprecated( + as_of="4.1", + removed_in="5.0", + instead="directly construct a Struct type instead", + ) def from_dict( cls, pairs: Mapping[str, str | DataType], nullable: bool = True ) -> Struct: @@ -694,26 +723,33 @@ def from_dict( Struct Struct data type instance """ - names, types = pairs.keys(), pairs.values() - return cls(names, types, nullable=nullable) + return cls(pairs, nullable=nullable) @property + @deprecated( + as_of="4.1", + removed_in="5.0", + instead="use struct_type.fields attribute instead", + ) def pairs(self) -> Mapping[str, DataType]: - """Return a mapping from names to data type instances. + return self.fields - Returns - ------- - Mapping[str, DataType] - Mapping of field name to data type - """ - return dict(zip(self.names, self.types)) + @property + def names(self) -> tuple[str, ...]: + """Return the names of the struct's fields.""" + return tuple(self.fields.keys()) + + @property + def types(self) -> tuple[DataType, ...]: + """Return the types of the struct's fields.""" + return tuple(self.fields.values()) def __getitem__(self, key: str) -> DataType: - return self.pairs[key] + return self.fields[key] def __repr__(self) -> str: return '{}({}, nullable={})'.format( - self.name, list(self.pairs.items()), self.nullable + self.name, list(self.fields.items()), self.nullable ) @property diff --git a/ibis/expr/datatypes/value.py b/ibis/expr/datatypes/value.py index 1a73a6416112..a7a286412a3f 100644 --- a/ibis/expr/datatypes/value.py +++ b/ibis/expr/datatypes/value.py @@ -32,12 +32,15 @@ def infer(value: Any) -> dt.DataType: raise InputTypeError(value) +# TODO(kszucs): support NamedTuples and dataclasses instead of OrderedDict +# which should trigger infer_map instead @infer.register(collections.OrderedDict) def infer_struct(value: Mapping[str, Any]) -> dt.Struct: """Infer the [`Struct`][ibis.expr.datatypes.Struct] type of `value`.""" if not value: raise TypeError('Empty struct type not supported') - return dt.Struct(list(value.keys()), list(map(infer, value.values()))) + fields = {name: infer(val) for name, val in value.items()} + return dt.Struct(fields) @infer.register(collections.abc.Mapping) @@ -51,7 +54,7 @@ def infer_map(value: Mapping[Any, Any]) -> dt.Map: highest_precedence(map(infer, value.values())), ) except IbisTypeError: - return dt.Struct.from_dict(toolz.valmap(infer, value, factory=type(value))) + return dt.Struct(toolz.valmap(infer, value, factory=type(value))) @infer.register((list, tuple)) @@ -303,7 +306,7 @@ def normalize(typ, value): return frozendict({k: normalize(typ.value_type, v) for k, v in value.items()}) elif typ.is_struct(): return frozendict( - {k: normalize(typ[k], v) for k, v in value.items() if k in typ.pairs} + {k: normalize(typ[k], v) for k, v in value.items() if k in typ.fields} ) elif typ.is_geospatial(): if isinstance(value, (tuple, list)): diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index ddc78c62112a..370b908e101f 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -216,7 +216,7 @@ def to_pyarrow(self): return ibis_to_pyarrow_schema(self) def as_struct(self) -> dt.Struct: - return dt.Struct(self.names, self.types) + return dt.Struct(dict(self.items())) def __gt__(self, other: Schema) -> bool: """Return whether `self` is a strict superset of `other`.""" diff --git a/ibis/expr/types/structs.py b/ibis/expr/types/structs.py index 4b62c5c1ff04..1f4fa902b168 100644 --- a/ibis/expr/types/structs.py +++ b/ibis/expr/types/structs.py @@ -48,13 +48,13 @@ def struct( """ import ibis.expr.operations as ops - items = dict(value) - values = items.values() - if any(isinstance(value, Value) for value in values): - return ops.StructColumn( - names=tuple(items.keys()), values=tuple(values) - ).to_expr() - return literal(collections.OrderedDict(items), type=type) + fields = dict(value) + if any(isinstance(value, Value) for value in fields.values()): + names = tuple(fields.keys()) + values = tuple(fields.values()) + return ops.StructColumn(names=names, values=values).to_expr() + else: + return literal(collections.OrderedDict(fields), type=type) @public @@ -106,7 +106,7 @@ def types(self) -> Sequence[dt.DataType]: @property def fields(self) -> Mapping[str, dt.DataType]: """Return a mapping from field name to field type of the struct.""" - return util.frozendict(self.type().pairs) + return util.frozendict(self.type().fields) def lift(self) -> ir.Table: """Project the fields of `self` into a table. diff --git a/ibis/tests/benchmarks/test_benchmarks.py b/ibis/tests/benchmarks/test_benchmarks.py index 721b7a257df6..39faa6e7b893 100644 --- a/ibis/tests/benchmarks/test_benchmarks.py +++ b/ibis/tests/benchmarks/test_benchmarks.py @@ -534,9 +534,7 @@ def test_op_args(benchmark): def test_complex_datatype_parse(benchmark): type_str = "array, b: map>>>" expected = dt.Array( - dt.Struct.from_dict( - dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64))) - ) + dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) ) assert dt.parse(type_str) == expected benchmark(dt.parse, type_str) @@ -546,9 +544,7 @@ def test_complex_datatype_parse(benchmark): @pytest.mark.parametrize("func", [str, hash]) def test_complex_datatype_builtins(benchmark, func): datatype = dt.Array( - dt.Struct.from_dict( - dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64))) - ) + dt.Struct(dict(a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)))) ) benchmark(func, datatype) @@ -574,7 +570,7 @@ def test_large_expr_equals(benchmark, tpc_h02): ), pytest.param( dt.Array( - dt.Struct.from_dict( + dt.Struct( dict( a=dt.Array(dt.string), b=dt.Map(dt.string, dt.Array(dt.int64)), diff --git a/ibis/tests/expr/test_datatypes.py b/ibis/tests/expr/test_datatypes.py index d514a6cfb443..b339bbb311f1 100644 --- a/ibis/tests/expr/test_datatypes.py +++ b/ibis/tests/expr/test_datatypes.py @@ -143,16 +143,17 @@ def test_struct_with_string_types(): def test_struct_from_dict(): - result = dt.Struct.from_dict({'b': 'int64', 'a': dt.float64}) + with pytest.warns(FutureWarning): + result = dt.Struct.from_dict({'b': 'int64', 'a': dt.float64}) - assert result == dt.Struct(names=['b', 'a'], types=[dt.int64, dt.float64]) + assert result == dt.Struct({'b': dt.int64, 'a': dt.float64}) def test_struct_names_and_types_legth_must_match(): - with pytest.raises(IbisTypeError): + with pytest.raises(IbisTypeError), pytest.warns(FutureWarning): dt.Struct(names=["a", "b"], types=["int", "str", "float"]) - dtype = dt.Struct(names=["a", "b"], types=["int", "str"]) + dtype = dt.Struct({"a": "int", "b": "str"}) assert isinstance(dtype, dt.Struct) @@ -619,7 +620,7 @@ def test_is_signed_integer(): def test_is_struct(): - assert dt.Struct.from_dict({"a": dt.string}).is_struct() + assert dt.Struct({"a": dt.string}).is_struct() def test_is_unsigned_integer(): diff --git a/ibis/tests/expr/test_format.py b/ibis/tests/expr/test_format.py index 20c70fcfe69b..ebaf2634c29e 100644 --- a/ibis/tests/expr/test_format.py +++ b/ibis/tests/expr/test_format.py @@ -346,7 +346,7 @@ def test_destruct_selection(): @udf.reduction( input_type=['int64'], - output_type=dt.Struct.from_dict( + output_type=dt.Struct( { 'sum': 'int64', 'mean': 'float64', diff --git a/ibis/tests/expr/test_literal.py b/ibis/tests/expr/test_literal.py index 2e34d4e4648f..096cd4e4aac1 100644 --- a/ibis/tests/expr/test_literal.py +++ b/ibis/tests/expr/test_literal.py @@ -124,7 +124,7 @@ def test_struct_literal_non_castable(value): def test_struct_cast_to_empty_struct(): value = ibis.struct({"a": 1, "b": 2.0}) - assert value.type().castable(dt.Struct([], [])) + assert value.type().castable(dt.Struct({})) @pytest.mark.parametrize( diff --git a/ibis/tests/expr/test_sql.py b/ibis/tests/expr/test_sql.py index bbeb049d44c1..b80ae2f999bc 100644 --- a/ibis/tests/expr/test_sql.py +++ b/ibis/tests/expr/test_sql.py @@ -144,7 +144,7 @@ def test_format_short_string_column(): def test_format_nested_column(): - dtype = dt.Struct(["x", "y"], ["int", "float"]) + dtype = dt.Struct({"x": "int", "y": "float"}) values = [{"x": 1, "y": 2.5}, None] fmts, min_len, max_len = format_column(dtype, values) assert str(fmts[1]) == null diff --git a/ibis/tests/sql/test_sqlalchemy.py b/ibis/tests/sql/test_sqlalchemy.py index 69a9d755801a..111a07047bbd 100644 --- a/ibis/tests/sql/test_sqlalchemy.py +++ b/ibis/tests/sql/test_sqlalchemy.py @@ -1100,9 +1100,7 @@ def test_tpc_h11(h11): def test_to_sqla_type_array_of_non_primitive(): - result = to_sqla_type( - DefaultDialect(), dt.Array(dt.Struct.from_dict(dict(a="int"))) - ) + result = to_sqla_type(DefaultDialect(), dt.Array(dt.Struct(dict(a="int")))) [(result_name, result_type)] = result.value_type.pairs expected_name = "a" expected_type = sa.BigInteger() diff --git a/ibis/tests/strategies.py b/ibis/tests/strategies.py index 749f75423591..f59b11dd6448 100644 --- a/ibis/tests/strategies.py +++ b/ibis/tests/strategies.py @@ -100,7 +100,8 @@ def struct_dtypes( num_fields = draw(num_fields) names = draw(st.lists(st.text(), min_size=num_fields, max_size=num_fields)) types = draw(st.lists(item_strategy, min_size=num_fields, max_size=num_fields)) - return dt.Struct(names, types, nullable=draw(nullable)) + fields = dict(zip(names, types)) + return dt.Struct(fields, nullable=draw(nullable)) point_dtype = st.builds(dt.Point, nullable=nullable)