Skip to content

Commit

Permalink
merge_subqueries fixes (#914)
Browse files Browse the repository at this point in the history
* merge_subqueries fixes

* fixup
  • Loading branch information
barakalon authored Jan 12, 2023
1 parent b6d11df commit e30e21b
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 14 deletions.
2 changes: 1 addition & 1 deletion sqlglot/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@ def csv_reader(read_csv: exp.ReadCSV) -> t.Any:
file.close()


def find_new_name(taken: t.Sequence[str], base: str) -> str:
def find_new_name(taken: t.Collection[str], base: str) -> str:
"""
Searches for a new name.
Expand Down
54 changes: 42 additions & 12 deletions sqlglot/optimizer/merge_subqueries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,9 @@ def merge_ctes(expression, leave_tables_isolated=False):

singular_cte_selections = [v[0] for k, v in cte_selections.items() if len(v) == 1]
for outer_scope, inner_scope, table in singular_cte_selections:
inner_select = inner_scope.expression.unnest()
from_or_join = table.find_ancestor(exp.From, exp.Join)
if _mergeable(outer_scope, inner_select, leave_tables_isolated, from_or_join):
if _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join):
alias = table.alias_or_name

_rename_inner_sources(outer_scope, inner_scope, alias)
_merge_from(outer_scope, inner_scope, table, alias)
_merge_expressions(outer_scope, inner_scope, alias)
Expand All @@ -80,40 +78,41 @@ def merge_ctes(expression, leave_tables_isolated=False):
_merge_order(outer_scope, inner_scope)
_merge_hints(outer_scope, inner_scope)
_pop_cte(inner_scope)
outer_scope.clear_cache()
return expression


def merge_derived_tables(expression, leave_tables_isolated=False):
for outer_scope in traverse_scope(expression):
for subquery in outer_scope.derived_tables:
inner_select = subquery.unnest()
from_or_join = subquery.find_ancestor(exp.From, exp.Join)
if _mergeable(outer_scope, inner_select, leave_tables_isolated, from_or_join):
alias = subquery.alias_or_name
inner_scope = outer_scope.sources[alias]

alias = subquery.alias_or_name
inner_scope = outer_scope.sources[alias]
if _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join):
_rename_inner_sources(outer_scope, inner_scope, alias)
_merge_from(outer_scope, inner_scope, subquery, alias)
_merge_expressions(outer_scope, inner_scope, alias)
_merge_joins(outer_scope, inner_scope, from_or_join)
_merge_where(outer_scope, inner_scope, from_or_join)
_merge_order(outer_scope, inner_scope)
_merge_hints(outer_scope, inner_scope)
outer_scope.clear_cache()
return expression


def _mergeable(outer_scope, inner_select, leave_tables_isolated, from_or_join):
def _mergeable(outer_scope, inner_scope, leave_tables_isolated, from_or_join):
"""
Return True if `inner_select` can be merged into outer query.
Args:
outer_scope (Scope)
inner_select (exp.Select)
inner_scope (Scope)
leave_tables_isolated (bool)
from_or_join (exp.From|exp.Join)
Returns:
bool: True if can be merged
"""
inner_select = inner_scope.expression.unnest()

def _is_a_window_expression_in_unmergable_operation():
window_expressions = inner_select.find_all(exp.Window)
Expand All @@ -133,10 +132,40 @@ def _is_a_window_expression_in_unmergable_operation():
]
return any(window_expressions_in_unmergable)

def _outer_select_joins_on_inner_select_join():
"""
All columns from the inner select in the ON clause must be from the first FROM table.
That is, this can be merged:
SELECT * FROM x JOIN (SELECT y.a AS a FROM y JOIN z) AS q ON x.a = q.a
^^^ ^
But this can't:
SELECT * FROM x JOIN (SELECT z.a AS a FROM y JOIN z) AS q ON x.a = q.a
^^^ ^
"""
if not isinstance(from_or_join, exp.Join):
return False

alias = from_or_join.this.alias_or_name

on = from_or_join.args.get("on")
if not on:
return False
selections = [c.name for c in on.find_all(exp.Column) if c.table == alias]
inner_from = inner_scope.expression.args.get("from")
if not inner_from:
return False
inner_from_table = inner_from.expressions[0].alias_or_name
inner_projections = {s.alias_or_name: s for s in inner_scope.selects}
return any(
col.table != inner_from_table
for selection in selections
for col in inner_projections[selection].find_all(exp.Column)
)

return (
isinstance(outer_scope.expression, exp.Select)
and isinstance(inner_select, exp.Select)
and isinstance(inner_select, exp.Select)
and not any(inner_select.args.get(arg) for arg in UNMERGABLE_ARGS)
and inner_select.args.get("from")
and not any(e.find(exp.AggFunc, exp.Select) for e in inner_select.expressions)
Expand All @@ -153,6 +182,7 @@ def _is_a_window_expression_in_unmergable_operation():
j.side in {"FULL", "RIGHT"} for j in outer_scope.expression.args.get("joins", [])
)
)
and not _outer_select_joins_on_inner_select_join()
and not _is_a_window_expression_in_unmergable_operation()
)

Expand All @@ -168,7 +198,7 @@ def _rename_inner_sources(outer_scope, inner_scope, alias):
"""
taken = set(outer_scope.selected_sources)
conflicts = taken.intersection(set(inner_scope.selected_sources))
conflicts = conflicts - {alias}
conflicts -= {alias}

for conflict in conflicts:
new_name = find_new_name(taken, conflict)
Expand Down
2 changes: 1 addition & 1 deletion sqlglot/optimizer/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def optimize(expression, schema=None, db=None, catalog=None, rules=RULES, **kwar
If no schema is provided then the default schema defined at `sqlgot.schema` will be used
db (str): specify the default database, as might be set by a `USE DATABASE db` statement
catalog (str): specify the default catalog, as might be set by a `USE CATALOG c` statement
rules (list): sequence of optimizer rules to use
rules (sequence): sequence of optimizer rules to use
**kwargs: If a rule has a keyword argument with a same name in **kwargs, it will be passed in.
Returns:
sqlglot.Expression: optimized expression
Expand Down
4 changes: 4 additions & 0 deletions sqlglot/optimizer/pushdown_projections.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ def _remove_unused_selections(scope, parent_selections):
order_refs = set()

new_selections = []
removed = False
for i, selection in enumerate(scope.selects):
if (
SELECT_ALL in parent_selections
Expand All @@ -88,12 +89,15 @@ def _remove_unused_selections(scope, parent_selections):
new_selections.append(selection)
else:
removed_indexes.append(i)
removed = True

# If there are no remaining selections, just select a single constant
if not new_selections:
new_selections.append(DEFAULT_SELECTION.copy())

scope.expression.set("expressions", new_selections)
if removed:
scope.clear_cache()
return removed_indexes


Expand Down
39 changes: 39 additions & 0 deletions tests/fixtures/optimizer/merge_subqueries.sql
Original file line number Diff line number Diff line change
Expand Up @@ -311,3 +311,42 @@ FROM
ON
t1.cola = t2.cola;
SELECT /*+ BROADCAST(a2) */ a1.cola AS cola, a2.cola AS cola FROM VALUES (1) AS a1(cola) JOIN VALUES (1) AS a2(cola) ON a1.cola = a2.cola;

# title: Nested subquery selects from same table as another subquery
WITH i AS (
SELECT
x.a AS a
FROM x AS x
), j AS (
SELECT
x.a,
x.b
FROM x AS x
), k AS (
SELECT
j.a,
j.b
FROM j AS j
)
SELECT
i.a,
k.b
FROM i AS i
LEFT JOIN k AS k
ON i.a = k.a;
SELECT x.a AS a, x_2.b AS b FROM x AS x LEFT JOIN x AS x_2 ON x.a = x_2.a;

# title: Outer select joins on inner select join
WITH i AS (
SELECT
x.a AS a
FROM y AS y
JOIN x AS x
ON y.b = x.b
)
SELECT
x.a AS a
FROM x AS x
LEFT JOIN i AS i
ON x.a = i.a;
WITH i AS (SELECT x.a AS a FROM y AS y JOIN x AS x ON y.b = x.b) SELECT x.a AS a FROM x AS x LEFT JOIN i AS i ON x.a = i.a;
3 changes: 3 additions & 0 deletions tests/fixtures/optimizer/pushdown_projections.sql
Original file line number Diff line number Diff line change
Expand Up @@ -54,3 +54,6 @@ WITH t1 AS (SELECT q.cola AS cola FROM UNNEST(ARRAY(STRUCT(1 AS cola, 'test' AS

SELECT x FROM VALUES(1, 2) AS q(x, y);
SELECT q.x AS x FROM (VALUES (1, 2)) AS q(x, y);

SELECT i.a FROM x AS i LEFT JOIN (SELECT a, b FROM (SELECT a, b FROM x)) AS j ON i.a = j.a;
SELECT i.a AS a FROM x AS i LEFT JOIN (SELECT _q_0.a AS a FROM (SELECT x.a AS a FROM x AS x) AS _q_0) AS j ON i.a = j.a;

0 comments on commit e30e21b

Please sign in to comment.