-
Notifications
You must be signed in to change notification settings - Fork 24.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SQL: Resolve attributes recursively for improved subquery support #69765
Changes from 3 commits
aba2f63
39e4cf1
015acb3
3b755be
f739e39
932c01c
fa49615
1cb642d
50ab4d1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -316,10 +316,11 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur | |
Map<Expression, Node<?>> missing = new LinkedHashMap<>(); | ||
|
||
o.order().forEach(oe -> { | ||
Expression e = oe.child(); | ||
final Expression e = oe.child(); | ||
final Expression resolvedE = attributeRefs.getOrDefault(e, e); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. See my comment above - it's misleading for a map lookup to do resolution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
||
// aggregates are allowed | ||
if (Functions.isAggregate(attributeRefs.getOrDefault(e, e))) { | ||
if (Functions.isAggregate(resolvedE)) { | ||
return; | ||
} | ||
|
||
|
@@ -340,8 +341,12 @@ private static boolean checkGroupByOrder(LogicalPlan p, Set<Failure> localFailur | |
// e.g.: if "GROUP BY f2(f1(field))" you can "ORDER BY f4(f3(f2(f1(field))))" | ||
// | ||
// Also, make sure to compare attributes directly | ||
if (e.anyMatch(expression -> Expressions.anyMatch(groupingAndMatchingAggregatesAliases, | ||
g -> expression.semanticEquals(expression instanceof Attribute ? Expressions.attribute(g) : g)))) { | ||
if (resolvedE.anyMatch(expression -> Expressions.anyMatch(groupingAndMatchingAggregatesAliases, | ||
g -> { | ||
Expression resolvedG = attributeRefs.getOrDefault(g, g); | ||
resolvedG = expression instanceof Attribute ? Expressions.attribute(resolvedG) : resolvedG; | ||
return expression.semanticEquals(resolvedG); | ||
}))) { | ||
return; | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -131,8 +131,8 @@ protected Iterable<RuleExecutor<LogicalPlan>.Batch> batches() { | |
); | ||
|
||
Batch refs = new Batch("Replace References", Limiter.ONCE, | ||
new ReplaceReferenceAttributeWithSource() | ||
); | ||
new ReplaceReferenceAttributeWithSource() | ||
); | ||
|
||
Batch operators = new Batch("Operator Optimization", | ||
// combining | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Noise There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
|
@@ -222,7 +222,7 @@ public LogicalPlan apply(LogicalPlan plan) { | |
AttributeMap.Builder<Expression> builder = AttributeMap.builder(); | ||
// collect aliases | ||
plan.forEachExpressionUp(Alias.class, a -> builder.put(a.toAttribute(), a.child())); | ||
final Map<Attribute, Expression> collectRefs = builder.build(); | ||
final AttributeMap<Expression> collectRefs = builder.build(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Noise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
java.util.function.Function<ReferenceAttribute, Expression> replaceReference = r -> collectRefs.getOrDefault(r, r); | ||
|
||
plan = plan.transformUp(p -> { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -600,10 +600,12 @@ else if (target.foldable()) { | |
else { | ||
GroupByKey matchingGroup = null; | ||
if (groupingContext != null) { | ||
matchingGroup = groupingContext.groupFor(target); | ||
final Expression resolvedTarget = queryC.aliases().getOrDefault(target, target); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bit of code is confusing due to the left overs. Same for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
matchingGroup = groupingContext.groupFor(resolvedTarget); | ||
Check.notNull(matchingGroup, "Cannot find group [{}]", Expressions.name(ne)); | ||
|
||
queryC = queryC.addColumn(new GroupByRef(matchingGroup.id(), null, isDateBased(ne.dataType())), id); | ||
queryC = queryC.addColumn( | ||
new GroupByRef(matchingGroup.id(), null, isDateBased(ne.dataType())), Expressions.id(resolvedTarget)); | ||
} | ||
// fallback | ||
else { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2599,8 +2599,7 @@ public void testSubqueryFilterOrderByAlias() throws Exception { | |
"WHERE i IS NOT NULL " + | ||
"ORDER BY i"); | ||
} | ||
|
||
@AwaitsFix(bugUrl = "follow-up to https://github.com/elastic/elasticsearch/pull/67216") | ||
|
||
public void testSubqueryGroupByFilterAndOrderByByAlias() throws Exception { | ||
PhysicalPlan p = optimizeAndPlan("SELECT i FROM " + | ||
"( SELECT int AS i FROM test ) " + | ||
|
@@ -2658,4 +2657,29 @@ public void testSubqueryWithAliasOrderByAlias() throws Exception { | |
"( SELECT int AS i FROM test ) AS s " + | ||
"ORDER BY s.i > 10"); | ||
} | ||
|
||
public void testReferenceResolutionInSubqueries() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please break this method into multiple tests that indicate the difference between them in their name. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done |
||
optimizeAndPlan("SELECT i AS j FROM ( SELECT int AS i FROM test) ORDER BY j"); | ||
optimizeAndPlan("SELECT j AS k FROM (SELECT i AS j FROM ( SELECT int AS i FROM test)) ORDER BY k"); | ||
optimizeAndPlan("SELECT int_group AS g, min_date AS d " + | ||
"FROM (" + | ||
" SELECT int % 2 AS int_group, MIN(date) AS min_date " + | ||
" FROM test WHERE date > '1970-01-01'::datetime GROUP BY int_group" + | ||
") " + | ||
"ORDER BY d DESC"); | ||
optimizeAndPlan("SELECT int_group AS g, min_date AS d " + | ||
"FROM (" + | ||
" SELECT int % 2 AS int_group, MIN(date) AS min_date " + | ||
" FROM test WHERE date > '1970-01-01'::datetime GROUP BY int_group " + | ||
")" + | ||
"ORDER BY g DESC"); | ||
optimizeAndPlan("SELECT i AS j FROM ( SELECT int AS i FROM test) GROUP BY j"); | ||
optimizeAndPlan("SELECT j AS k FROM (SELECT i AS j FROM ( SELECT int AS i FROM test)) GROUP BY k"); | ||
optimizeAndPlan("SELECT g FROM (SELECT date AS f, int AS g FROM test) WHERE g IS NOT NULL GROUP BY g ORDER BY g ASC"); | ||
} | ||
|
||
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/69758") | ||
public void testFilterAfterGroupBy() { | ||
optimizeAndPlan("SELECT j AS k FROM (SELECT i AS j FROM ( SELECT int AS i FROM test) GROUP BY j) WHERE j < 5"); | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is backwards.
getOrDefault should delegate to get not vice-versa. The former is an extension of the latter historically and conceptually.
Moreover by making get and getOrDefault recursive the Map contract has been fundamentally broken:
Instead of renaming get to
lookup
and modifying the contract, simply introduce a separate method (e.g.traverse(key)
,getResolvedValue(key)
,getRecursive(key)
or something else) to the original map that does the recursive lookup and use that instead.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think I agree as well on this, I'd prefer to have a dedicated method that does this more complex logic rather than overriding the default
get
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good points, but maybe the solution here is not just a simple rename of the method (
resolve(key)
), but to not implement theMap<Attribute, E>
interface anymore. The Map contract was broken since the beginning because instead ofequals()
thesemanticEquals()
is used for key equality checks (which for example violates "containsKey(key) returns true if and only if this map contains a mapping for a key k such that (key==null ? k==null : key.equals(k)).")After no
get()
s are required anymore, unless you can think of a place where recursive key resolution would actually cause harm (if that is the case, I'd say the AttributeMap building should be fixed instead to limit the scope from where the attributes are picked up from).There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
AttributeMap
is aMap
forAttributeWrapper
s; theget
,contains
rely onequals
. For convenience the underlying class is not exposed insteadAttribute
is used with the wrapping happening automatically.It's similar to
IdentityHashMap
for example.resolve(key)
is a compound method that relies onget
, the latter might not be used publicly now however I see too many downsides in removing it (and theMap
contract) instead of keeping it.For example
get
/put
do per key read/write butresolve
, which does multi-key read has not write counter-party; removing/replacingget
breaks too many assumptions.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I am not sure about the downsides or the breaking assumptions, since we barely utilize anything from the Map interface, practically only some read methods:
isEmpty
,size
andget/getOrDefault
(which can be renamed toresolve
). The building of the map (put
s) happen through theBuilder
methods today, here is how a full minimalization of the public methods would look like (at this point theAttributeMap
is rather anAttributeResolver
): palesz@fe814ecNonetheless, I can keep the current
Map
implementation and create a newresolve
method for the recursive resolution, switch out all theget/getOrDefault
calls toresolve
and leave theget
s there between the other unused methods.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done