diff --git a/ibis/backends/dask/executor.py b/ibis/backends/dask/executor.py index e35a4140481c..58481ed654c9 100644 --- a/ibis/backends/dask/executor.py +++ b/ibis/backends/dask/executor.py @@ -160,6 +160,12 @@ def visit(cls, op: ops.Array, exprs): lambda row: np.array(row, dtype=object), exprs, name=op.name, dtype=object ) + @classmethod + def visit(cls, op: ops.StructColumn, names, values): + return cls.rowwise( + lambda row: dict(zip(names, row)), values, name=op.name, dtype=object + ) + @classmethod def visit(cls, op: ops.ArrayConcat, arg): dtype = PandasType.from_ibis(op.dtype) diff --git a/ibis/backends/pandas/executor.py b/ibis/backends/pandas/executor.py index 858f49173464..e0b3e19940f9 100644 --- a/ibis/backends/pandas/executor.py +++ b/ibis/backends/pandas/executor.py @@ -223,6 +223,10 @@ def visit(cls, op: ops.FindInSet, needle, values): def visit(cls, op: ops.Array, exprs): return cls.rowwise(lambda row: np.array(row, dtype=object), exprs) + @classmethod + def visit(cls, op: ops.StructColumn, names, values): + return cls.rowwise(lambda row: dict(zip(names, row)), values) + @classmethod def visit(cls, op: ops.ArrayConcat, arg): return cls.rowwise(lambda row: np.concatenate(row.values), arg) diff --git a/ibis/backends/tests/test_generic.py b/ibis/backends/tests/test_generic.py index a43502ac4a44..3d2a7b728ddb 100644 --- a/ibis/backends/tests/test_generic.py +++ b/ibis/backends/tests/test_generic.py @@ -1233,9 +1233,7 @@ def query(t, group_cols): snapshot.assert_match(str(ibis.to_sql(t3, dialect=con.name)), "out.sql") -@pytest.mark.notimpl( - ["dask", "pandas", "oracle", "exasol"], raises=com.OperationNotDefinedError -) +@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError) @pytest.mark.notimpl(["druid"], raises=AssertionError) @pytest.mark.notyet( ["datafusion", "impala", "mssql", "mysql", "sqlite"], diff --git a/ibis/backends/tests/test_struct.py b/ibis/backends/tests/test_struct.py index e7f078f7591a..4e8d94960814 100644 --- a/ibis/backends/tests/test_struct.py +++ b/ibis/backends/tests/test_struct.py @@ -21,7 +21,7 @@ Py4JJavaError, PySparkAnalysisException, ) -from ibis.common.exceptions import IbisError, OperationNotDefinedError +from ibis.common.exceptions import IbisError pytestmark = [ pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"), @@ -101,7 +101,7 @@ def test_null_literal(backend, con, field): backend.assert_series_equal(result, expected) -@pytest.mark.notimpl(["dask", "pandas", "postgres", "risingwave"]) +@pytest.mark.notimpl(["postgres", "risingwave"]) def test_struct_column(alltypes, df): t = alltypes expr = t.select(s=ibis.struct(dict(a=t.string_col, b=1, c=t.bigint_col))) @@ -113,7 +113,7 @@ def test_struct_column(alltypes, df): tm.assert_frame_equal(result, expected) -@pytest.mark.notimpl(["dask", "pandas", "postgres", "risingwave", "polars"]) +@pytest.mark.notimpl(["postgres", "risingwave", "polars"]) @pytest.mark.notyet( ["flink"], reason="flink doesn't support creating struct columns from collect" ) @@ -253,7 +253,12 @@ def test_keyword_fields(con, nullable): raises=PolarsColumnNotFoundError, reason="doesn't seem to support IN-style subqueries on structs", ) -@pytest.mark.notimpl(["pandas", "dask"], raises=OperationNotDefinedError) +@pytest.mark.notimpl( + # https://github.com/pandas-dev/pandas/issues/58909 + ["pandas", "dask"], + raises=TypeError, + reason="unhashable type: 'dict'", +) @pytest.mark.xfail_version( pyspark=["pyspark<3.5"], reason="requires pyspark 3.5",