Skip to content

Commit

Permalink
feat(pandas,dask): implement ops.StructColumn (#9302)
Browse files Browse the repository at this point in the history
  • Loading branch information
NickCrews authored Jun 3, 2024
1 parent 395c8b5 commit ea81d85
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 7 deletions.
6 changes: 6 additions & 0 deletions ibis/backends/dask/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,12 @@ def visit(cls, op: ops.Array, exprs):
lambda row: np.array(row, dtype=object), exprs, name=op.name, dtype=object
)

@classmethod
def visit(cls, op: ops.StructColumn, names, values):
return cls.rowwise(
lambda row: dict(zip(names, row)), values, name=op.name, dtype=object
)

@classmethod
def visit(cls, op: ops.ArrayConcat, arg):
dtype = PandasType.from_ibis(op.dtype)
Expand Down
4 changes: 4 additions & 0 deletions ibis/backends/pandas/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,10 @@ def visit(cls, op: ops.FindInSet, needle, values):
def visit(cls, op: ops.Array, exprs):
return cls.rowwise(lambda row: np.array(row, dtype=object), exprs)

@classmethod
def visit(cls, op: ops.StructColumn, names, values):
return cls.rowwise(lambda row: dict(zip(names, row)), values)

@classmethod
def visit(cls, op: ops.ArrayConcat, arg):
return cls.rowwise(lambda row: np.concatenate(row.values), arg)
Expand Down
4 changes: 1 addition & 3 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,9 +1233,7 @@ def query(t, group_cols):
snapshot.assert_match(str(ibis.to_sql(t3, dialect=con.name)), "out.sql")


@pytest.mark.notimpl(
["dask", "pandas", "oracle", "exasol"], raises=com.OperationNotDefinedError
)
@pytest.mark.notimpl(["oracle", "exasol"], raises=com.OperationNotDefinedError)
@pytest.mark.notimpl(["druid"], raises=AssertionError)
@pytest.mark.notyet(
["datafusion", "impala", "mssql", "mysql", "sqlite"],
Expand Down
13 changes: 9 additions & 4 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
Py4JJavaError,
PySparkAnalysisException,
)
from ibis.common.exceptions import IbisError, OperationNotDefinedError
from ibis.common.exceptions import IbisError

pytestmark = [
pytest.mark.never(["mysql", "sqlite", "mssql"], reason="No struct support"),
Expand Down Expand Up @@ -101,7 +101,7 @@ def test_null_literal(backend, con, field):
backend.assert_series_equal(result, expected)


@pytest.mark.notimpl(["dask", "pandas", "postgres", "risingwave"])
@pytest.mark.notimpl(["postgres", "risingwave"])
def test_struct_column(alltypes, df):
t = alltypes
expr = t.select(s=ibis.struct(dict(a=t.string_col, b=1, c=t.bigint_col)))
Expand All @@ -113,7 +113,7 @@ def test_struct_column(alltypes, df):
tm.assert_frame_equal(result, expected)


@pytest.mark.notimpl(["dask", "pandas", "postgres", "risingwave", "polars"])
@pytest.mark.notimpl(["postgres", "risingwave", "polars"])
@pytest.mark.notyet(
["flink"], reason="flink doesn't support creating struct columns from collect"
)
Expand Down Expand Up @@ -253,7 +253,12 @@ def test_keyword_fields(con, nullable):
raises=PolarsColumnNotFoundError,
reason="doesn't seem to support IN-style subqueries on structs",
)
@pytest.mark.notimpl(["pandas", "dask"], raises=OperationNotDefinedError)
@pytest.mark.notimpl(
# https://github.com/pandas-dev/pandas/issues/58909
["pandas", "dask"],
raises=TypeError,
reason="unhashable type: 'dict'",
)
@pytest.mark.xfail_version(
pyspark=["pyspark<3.5"],
reason="requires pyspark 3.5",
Expand Down

0 comments on commit ea81d85

Please sign in to comment.