Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(internals): ensure that CTEs are emitted in topological order #9726

Merged
merged 5 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions ibis/backends/sql/rewrites.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,17 +249,17 @@ def merge_select_select(_, **kwargs):
return result if complexity(result) <= complexity(_) else _


def extract_ctes(node):
result = []
def extract_ctes(node: ops.Relation) -> set[ops.Relation]:
cte_types = (Select, ops.Aggregate, ops.JoinChain, ops.Set, ops.Limit, ops.Sample)
dont_count = (ops.Field, ops.CountStar, ops.CountDistinctStar)

g = Graph.from_bfs(node, filter=~InstanceOf(dont_count))
result = set()
for op, dependents in g.invert().items():
if isinstance(op, ops.View) or (
len(dependents) > 1 and isinstance(op, cte_types)
):
result.append(op)
result.add(op)

return result

Expand Down Expand Up @@ -315,14 +315,14 @@ def sqlize(
simplified = sqlized

# extract common table expressions while wrapping them in a CTE node
ctes = frozenset(extract_ctes(simplified))
ctes = extract_ctes(simplified)

def wrap(node, _, **kwargs):
new = node.__recreate__(kwargs)
return CTE(new) if node in ctes else new

result = simplified.replace(wrap)
ctes = reversed([cte.parent for cte in result.find(CTE)])
ctes = [cte.parent for cte in result.find(CTE, ordered=True)]

return result, ctes

Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/tests/sql/test_sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -590,3 +590,17 @@ def test_no_cartesian_join(snapshot):
]
)
snapshot.assert_match(ibis.to_sql(final, dialect="duckdb"), "out.sql")


def test_ctes_in_order():
table1 = ibis.table({"id": "int"}, name="table1")
table2 = ibis.table({"id": "int"}, name="table2")
table3 = ibis.table({"id": "int"}, name="table3")

ids_table = table1.union(table2).alias("first")
info_table = ids_table.union(table3).alias("second")

expr = ids_table.union(info_table)

sql = ibis.to_sql(expr, dialect="duckdb")
assert sql.find('"first" AS (') < sql.find('"second" AS (')
5 changes: 5 additions & 0 deletions ibis/common/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,7 @@ def find(
finder: FinderLike,
filter: Optional[FinderLike] = None,
context: Optional[dict] = None,
ordered: bool = False,
) -> list[Node]:
"""Find all nodes matching a given pattern or type in the graph.

Expand All @@ -355,6 +356,8 @@ def find(
the given filter and stop otherwise.
context
Optional context to use if `finder` or `filter` is a pattern.
ordered
Emit nodes in topological order if `True`.

Returns
-------
Expand All @@ -364,6 +367,8 @@ def find(
"""
graph = Graph.from_bfs(self, filter=filter, context=context)
finder = _coerce_finder(finder, context)
if ordered:
graph, _ = graph.toposort()
return [node for node in graph.nodes() if finder(node)]

@experimental
Expand Down