Skip to content

Commit

Permalink
prototype as_table
Browse files Browse the repository at this point in the history
  • Loading branch information
chelsea-lin committed Feb 6, 2024
1 parent 497b3cb commit 0ab34cb
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 3 deletions.
8 changes: 7 additions & 1 deletion ibis/backends/base/sqlglot/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def fn(node, _, **kwargs):
try:
return result.subquery(alias)
except AttributeError:
return result.as_(alias, quoted=self.quoted)
return result.as_(alias, quoted=self.quoted, table=[alias.name])

This comment has been minimized.

Copy link
@chelsea-lin

chelsea-lin Feb 6, 2024

Author Owner

This line code causes the The Named columns are not supported in table alias error error. We add it so that the UNNEST node can have TableAlias rather than Alias. Because sqlglot expect an TableAlias for the UNNEST node:

sg.expressions.Unnest(expressions=[unnest], offset=None).as_(alias)
Out[3]: 
Unnest(
  expressions=[
    Column(
      this=Identifier(this=x, quoted=False),
      table=Identifier(this=array_types, quoted=False))],
  alias=Identifier(this=t0, quoted=False))

sg.expressions.Unnest(expressions=[unnest], offset=None).as_(alias, table=['x'])
Out[4]: 
Unnest(
  expressions=[
    Column(
      this=Identifier(this=x, quoted=False),
      table=Identifier(this=array_types, quoted=False))],
  alias=TableAlias(
    this=Identifier(this=t0, quoted=False),
    columns=[
      Identifier(this=x, quoted=False)]))


# apply translate rules in topological order
results = op.map(fn)

This comment has been minimized.

Copy link
@chelsea-lin

chelsea-lin Feb 6, 2024

Author Owner
SELECT
  `t1`.`x`,
  `t1`.`y`,
  `t1`.`z`,
  `t1`.`grouper`,
  `t1`.`scalar_column`,
  `x` AS `x_right`
FROM `bigframes-dev`.`ibis_gbq_testing`.`array_types` AS `t1`
CROSS JOIN UNNEST(`t0`.`x`) AS `t3`

Why UNNEST refers to t0 rather than t1? Below are the generated nodes in topological order:

image
Expand Down Expand Up @@ -1011,6 +1011,12 @@ def visit_DatabaseTable(
name, db=namespace.schema, catalog=namespace.database, quoted=self.quoted
)

@visit_node.register(ops.ArrayTable)
def visit_ArrayTable(
self, op, *, col
) -> sg.Table:
return sge.Unnest(expressions=[col], offset=None)

@visit_node.register(ops.SelfReference)
def visit_SelfReference(self, op, *, parent, identifier):
return parent
Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,14 @@ def test_unnest_idempotent(backend):
tm.assert_frame_equal(result, expected)


@builtin_array
@pytest.mark.notimpl("dask", raises=ValueError)
@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
def test_as_table(backend):
array_types = backend.array_types
print(ibis.to_sql(array_types.cross_join(array_types.x.as_table())).__repr__())


@builtin_array
@pytest.mark.notimpl(["datafusion", "flink"], raises=com.OperationNotDefinedError)
@pytest.mark.broken(
Expand Down
18 changes: 18 additions & 0 deletions ibis/expr/operations/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
from ibis.common.annotations import attribute
from ibis.common.collections import FrozenDict
from ibis.common.typing import VarTuple # noqa: TCH001
from ibis.expr.operations.core import Unary, Value
from ibis.expr.operations.relations import Relation, Simple
from ibis.expr.schema import Schema


@public
Expand Down Expand Up @@ -111,6 +114,21 @@ def dtype(self):
return self.arg.dtype.value_type


@public
class ArrayTable(Relation):
# -> DatabaseTable
"""A table sourced from the result set of a select query."""
col: Value[dt.Array]

@attribute
def values(self):
return FrozenDict()

@property
def schema(self) -> Schema:
return Schema.from_tuples(zip([self.col.name], [self.col.dtype.value_type]))


@public
class ArrayContains(Value):
arg: Value[dt.Array]
Expand Down
16 changes: 14 additions & 2 deletions ibis/expr/types/arrays.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Callable, Iterable, Optional
import inspect
from typing import TYPE_CHECKING, Callable

Expand Down Expand Up @@ -305,12 +306,23 @@ def unnest(self) -> ir.Value:
ir.Value
Unnested array
"""
expr = ops.Unnest(self).to_expr()
expr = ops.Unnest(self).to_expr() # -> UNNEST(t0.x) AS AsTable_x
try:
return expr.name(self.get_name())
return expr.name(self.get_name()) # -> UNNEST(t0.x) AS x
except com.ExpressionError:
return expr

def as_table(
self,
offset_name: Optional[str] = None,
) -> ir.Table:
"""TODO(chelsealin)
"""

# TODO(chelsealin) add offset_name
expr = ops.ArrayTable(self).to_expr()
return expr

def join(self, sep: str | ir.StringValue) -> ir.StringValue:
"""Join the elements of this array expression with `sep`.
Expand Down

1 comment on commit 0ab34cb

@chelsea-lin
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This commit is trying to implement: ibis-project#7781. This code can work with some defects in the below example:

In [1]: import ibis
   ...: bq = ibis.bigquery.connect(project_id="bigframes-dev")
   ...: table = bq.table("bigframes-dev.ibis_gbq_testing.array_types")
   ...: ibis.to_sql(table.cross_join(table.x.as_table()))
   ...: 
Named columns are not supported in table alias.
Out[1]: 
SELECT
  `t1`.`x`,
  `t1`.`y`,
  `t1`.`z`,
  `t1`.`grouper`,
  `t1`.`scalar_column`,
  `x` AS `x_right`
FROM `bigframes-dev`.`ibis_gbq_testing`.`array_types` AS `t1`
CROSS JOIN UNNEST(`t0`.`x`) AS `t3`
  1. The Named columns are not supported in table alias error
  2. The referred UNNEST table t0 should be corrected as t1

Please sign in to comment.