From 8345d6f0dc07d897983cfb1c8002399ebd188e3e Mon Sep 17 00:00:00 2001 From: kasiafi <30203062+kasiafi@users.noreply.github.com> Date: Sun, 2 Aug 2020 15:27:15 +0200 Subject: [PATCH] Handle reused symbols in ProjectNode Fixes processing of ProjectNode with reused symbols In UnaliasSymbolReferences: Assignment of a form `s -> x` establishes new semantics for symbol s. It is possible though that symbol `s` is present in the source plan, and represents the same or different semantics. As a consequence, any symbol mapping derived from the source plan involving symbol `s` becomes potentially invalid, e.g. `s -> y` or `y -> s` refer to the old semantics of symbol `s`. In such case, the underlying mappings are only used to map projection assignments' values to ensure consistency with the source plan. They aren't used to map projection outputs to avoid: - errors from duplicate assignments - incorrect results from mixed semantics of symbols Also, the underlying mappings aren't passed up the plan, and new mappings aren't derived from projection assignments (with the exception for "deduplicating" mappings for repeated assignments). This can be thought of as a "cut-off" at the point of potentially changed semantics. Note: the issue of ambiguous symbols does not apply to symbols involved in context (correlation) mapping. Those symbols are supposed to represent constant semantics throughout the plan. --- .../UnaliasSymbolReferences.java | 45 ++++++++++++++++--- 1 file changed, 39 insertions(+), 6 deletions(-) diff --git a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java index 9f8a54128aa8..e8b92b6246be 100644 --- a/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java +++ b/presto-main/src/main/java/io/prestosql/sql/planner/optimizations/UnaliasSymbolReferences.java @@ -18,6 +18,7 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.collect.ListMultimap; +import com.google.common.collect.Sets; import io.prestosql.Session; import io.prestosql.execution.warnings.WarningCollector; import io.prestosql.metadata.Metadata; @@ -563,12 +564,41 @@ public PlanAndMappings visitFilter(FilterNode node, UnaliasContext context) public PlanAndMappings visitProject(ProjectNode node, UnaliasContext context) { PlanAndMappings rewrittenSource = node.getSource().accept(this, context); + + // Assignment of a form `s -> x` establishes new semantics for symbol s. + // It is possible though that symbol `s` is present in the source plan, and represents the same or different semantics. + // As a consequence, any symbol mapping derived from the source plan involving symbol `s` becomes potentially invalid, + // e.g. `s -> y` or `y -> s` refer to the old semantics of symbol `s`. + // In such case, the underlying mappings are only used to map projection assignments' values to ensure consistency with the source plan. + // They aren't used to map projection outputs to avoid: + // - errors from duplicate assignments + // - incorrect results from mixed semantics of symbols + // Also, the underlying mappings aren't passed up the plan, and new mappings aren't derived from projection assignments + // (with the exception for "deduplicating" mappings for repeated assignments). + // This can be thought of as a "cut-off" at the point of potentially changed semantics. + // Note: the issue of ambiguous symbols does not apply to symbols involved in context (correlation) mapping. + // Those symbols are supposed to represent constant semantics throughout the plan. + + Assignments assignments = node.getAssignments(); + Set newlyAssignedSymbols = assignments.filter(output -> !assignments.isIdentity(output)).getSymbols(); + Set symbolsInSourceMapping = ImmutableSet.builder() + .addAll(rewrittenSource.getMappings().keySet()) + .addAll(rewrittenSource.getMappings().values()) + .build(); + Set symbolsInCorrelationMapping = ImmutableSet.builder() + .addAll(context.getCorrelationMapping().keySet()) + .addAll(context.getCorrelationMapping().values()) + .build(); + boolean ambiguousSymbolsPresent = !Sets.intersection(newlyAssignedSymbols, Sets.difference(symbolsInSourceMapping, symbolsInCorrelationMapping)).isEmpty(); + SymbolMapper mapper = new SymbolMapper(rewrittenSource.getMappings()); // canonicalize ProjectNode assignments ImmutableList.Builder> rewrittenAssignments = ImmutableList.builder(); for (Map.Entry assignment : node.getAssignments().entrySet()) { - rewrittenAssignments.add(new SimpleEntry<>(mapper.map(assignment.getKey()), mapper.map(assignment.getValue()))); + rewrittenAssignments.add(new SimpleEntry<>( + ambiguousSymbolsPresent ? assignment.getKey() : mapper.map(assignment.getKey()), + mapper.map(assignment.getValue()))); } // deduplicate assignments @@ -579,10 +609,10 @@ public PlanAndMappings visitProject(ProjectNode node, UnaliasContext context) })); // derive new mappings for ProjectNode output symbols - Map newMapping = mappingFromAssignments(deduplicateAssignments); + Map newMapping = mappingFromAssignments(deduplicateAssignments, ambiguousSymbolsPresent); Map outputMapping = new HashMap<>(); - outputMapping.putAll(mapper.getMapping()); + outputMapping.putAll(ambiguousSymbolsPresent ? context.getCorrelationMapping() : mapper.getMapping()); outputMapping.putAll(newMapping); mapper = new SymbolMapper(outputMapping); @@ -599,14 +629,17 @@ public PlanAndMappings visitProject(ProjectNode node, UnaliasContext context) mapper.getMapping()); } - private Map mappingFromAssignments(Map assignments) + private Map mappingFromAssignments(Map assignments, boolean ambiguousSymbolsPresent) { Map newMapping = new HashMap<>(); Map inputsToOutputs = new HashMap<>(); for (Map.Entry assignment : assignments.entrySet()) { Expression expression = assignment.getValue(); // 1. for trivial symbol projection, map output symbol to input symbol - if (expression instanceof SymbolReference) { + // If the assignment potentially introduces a reused (ambiguous) symbol, do not map output to input + // to avoid mixing semantics. Input symbols represent semantics as in the source plan, + // while output symbols represent newly established semantics. + if (expression instanceof SymbolReference && !ambiguousSymbolsPresent) { Symbol value = Symbol.from(expression); if (!assignment.getKey().equals(value)) { newMapping.put(assignment.getKey(), value); @@ -706,7 +739,7 @@ public PlanAndMappings visitApply(ApplyNode node, UnaliasContext context) })); // derive new mappings for Subquery assignments outputs - Map newMapping = mappingFromAssignments(deduplicateAssignments); + Map newMapping = mappingFromAssignments(deduplicateAssignments, false); Map assignmentsOutputMapping = new HashMap<>(); assignmentsOutputMapping.putAll(mapper.getMapping());