-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-26065][SQL] Change query hint from a LogicalPlan
to a field
#23036
Changes from 5 commits
fce106d
785a423
93f33d9
ee0c844
470d682
f51e31d
17b7cce
97377dd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ import scala.collection.mutable | |
import org.apache.spark.internal.Logging | ||
import org.apache.spark.sql.catalyst.expressions.{And, Attribute, AttributeSet, Expression, PredicateHelper} | ||
import org.apache.spark.sql.catalyst.plans.{Inner, InnerLike, JoinType} | ||
import org.apache.spark.sql.catalyst.plans.logical.{BinaryNode, Join, LogicalPlan, Project} | ||
import org.apache.spark.sql.catalyst.plans.logical._ | ||
import org.apache.spark.sql.catalyst.rules.Rule | ||
import org.apache.spark.sql.internal.SQLConf | ||
|
||
|
@@ -40,24 +40,33 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper { | |
if (!conf.cboEnabled || !conf.joinReorderEnabled) { | ||
plan | ||
} else { | ||
// Use a map to track the hints on the join items. If a join relation turns out unchanged | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how to define "unchanged"? If |
||
// at the end of the join reorder, we can apply the original hint back to it if any. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This needs a few test cases to ensure this works as expected. |
||
val hintMap = new mutable.HashMap[LogicalPlan, HintInfo] | ||
val result = plan transformDown { | ||
// Start reordering with a joinable item, which is an InnerLike join with conditions. | ||
case j @ Join(_, _, _: InnerLike, Some(cond)) => | ||
reorder(j, j.output) | ||
case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond))) | ||
case j @ Join(_, _, _: InnerLike, Some(cond), _) => | ||
reorder(j, j.output, hintMap) | ||
case p @ Project(projectList, Join(_, _, _: InnerLike, Some(cond), _)) | ||
if projectList.forall(_.isInstanceOf[Attribute]) => | ||
reorder(p, p.output) | ||
reorder(p, p.output, hintMap) | ||
} | ||
|
||
// After reordering is finished, convert OrderedJoin back to Join | ||
result transformDown { | ||
case OrderedJoin(left, right, jt, cond) => Join(left, right, jt, cond) | ||
// After reordering is finished, convert OrderedJoin back to Join. | ||
// Note that this needs to be done bottom-up to make sure the hints can be mapped to any | ||
// unchanged relations. | ||
result transformUp { | ||
case OrderedJoin(left, right, jt, cond) => | ||
Join(left, right, jt, cond, | ||
JoinHint(hintMap.get(left), hintMap.get(right))) | ||
} | ||
} | ||
} | ||
|
||
private def reorder(plan: LogicalPlan, output: Seq[Attribute]): LogicalPlan = { | ||
val (items, conditions) = extractInnerJoins(plan) | ||
private def reorder( | ||
plan: LogicalPlan, | ||
output: Seq[Attribute], | ||
hintMap: mutable.HashMap[LogicalPlan, HintInfo]): LogicalPlan = { | ||
val (items, conditions) = extractInnerJoins(plan, hintMap) | ||
val result = | ||
// Do reordering if the number of items is appropriate and join conditions exist. | ||
// We also need to check if costs of all items can be evaluated. | ||
|
@@ -75,27 +84,31 @@ object CostBasedJoinReorder extends Rule[LogicalPlan] with PredicateHelper { | |
* Extracts items of consecutive inner joins and join conditions. | ||
* This method works for bushy trees and left/right deep trees. | ||
*/ | ||
private def extractInnerJoins(plan: LogicalPlan): (Seq[LogicalPlan], Set[Expression]) = { | ||
private def extractInnerJoins( | ||
plan: LogicalPlan, | ||
hintMap: mutable.HashMap[LogicalPlan, HintInfo]): (Seq[LogicalPlan], Set[Expression]) = { | ||
plan match { | ||
case Join(left, right, _: InnerLike, Some(cond)) => | ||
val (leftPlans, leftConditions) = extractInnerJoins(left) | ||
val (rightPlans, rightConditions) = extractInnerJoins(right) | ||
case Join(left, right, _: InnerLike, Some(cond), hint) => | ||
hint.leftHint.map(hintMap.put(left, _)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. for purely side-effect function, use |
||
hint.rightHint.map(hintMap.put(right, _)) | ||
val (leftPlans, leftConditions) = extractInnerJoins(left, hintMap) | ||
val (rightPlans, rightConditions) = extractInnerJoins(right, hintMap) | ||
(leftPlans ++ rightPlans, splitConjunctivePredicates(cond).toSet ++ | ||
leftConditions ++ rightConditions) | ||
case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond))) | ||
case Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _)) | ||
if projectList.forall(_.isInstanceOf[Attribute]) => | ||
extractInnerJoins(j) | ||
extractInnerJoins(j, hintMap) | ||
case _ => | ||
(Seq(plan), Set()) | ||
} | ||
} | ||
|
||
private def replaceWithOrderedJoin(plan: LogicalPlan): LogicalPlan = plan match { | ||
case j @ Join(left, right, jt: InnerLike, Some(cond)) => | ||
case j @ Join(left, right, jt: InnerLike, Some(cond), _) => | ||
val replacedLeft = replaceWithOrderedJoin(left) | ||
val replacedRight = replaceWithOrderedJoin(right) | ||
OrderedJoin(replacedLeft, replacedRight, jt, Some(cond)) | ||
case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond))) => | ||
case p @ Project(projectList, j @ Join(_, _, _: InnerLike, Some(cond), _)) => | ||
p.copy(child = replaceWithOrderedJoin(j)) | ||
case _ => | ||
plan | ||
|
@@ -295,7 +308,7 @@ object JoinReorderDP extends PredicateHelper with Logging { | |
} else { | ||
(otherPlan, onePlan) | ||
} | ||
val newJoin = Join(left, right, Inner, joinConds.reduceOption(And)) | ||
val newJoin = Join(left, right, Inner, joinConds.reduceOption(And), JoinHint.NONE) | ||
val collectedJoinConds = joinConds ++ oneJoinPlan.joinConds ++ otherJoinPlan.joinConds | ||
val remainingConds = conditions -- collectedJoinConds | ||
val neededAttr = AttributeSet(remainingConds.flatMap(_.references)) ++ topOutput | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -115,6 +115,7 @@ abstract class Optimizer(sessionCatalog: SessionCatalog) | |
// However, because we also use the analyzer to canonicalized queries (for view definition), | ||
// we do not eliminate subqueries or compute current time in the analyzer. | ||
Batch("Finish Analysis", Once, | ||
EliminateResolvedHint, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Also add it to nonExcludableRules |
||
EliminateSubqueryAliases, | ||
EliminateView, | ||
ReplaceExpressions, | ||
|
@@ -356,7 +357,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { | |
// not allowed to use the same attributes. We use a blacklist to prevent us from creating a | ||
// situation in which this happens; the rule will only remove an alias if its child | ||
// attribute is not on the black list. | ||
case Join(left, right, joinType, condition) => | ||
case Join(left, right, joinType, condition, hint) => | ||
val newLeft = removeRedundantAliases(left, blacklist ++ right.outputSet) | ||
val newRight = removeRedundantAliases(right, blacklist ++ newLeft.outputSet) | ||
val mapping = AttributeMap( | ||
|
@@ -365,7 +366,7 @@ object RemoveRedundantAliases extends Rule[LogicalPlan] { | |
val newCondition = condition.map(_.transform { | ||
case a: Attribute => mapping.getOrElse(a, a) | ||
}) | ||
Join(newLeft, newRight, joinType, newCondition) | ||
Join(newLeft, newRight, joinType, newCondition, hint) | ||
|
||
case _ => | ||
// Remove redundant aliases in the subtree(s). | ||
|
@@ -460,7 +461,7 @@ object LimitPushDown extends Rule[LogicalPlan] { | |
// on both sides if it is applied multiple times. Therefore: | ||
// - If one side is already limited, stack another limit on top if the new limit is smaller. | ||
// The redundant limit will be collapsed by the CombineLimits rule. | ||
case LocalLimit(exp, join @ Join(left, right, joinType, _)) => | ||
case LocalLimit(exp, join @ Join(left, right, joinType, _, _)) => | ||
val newJoin = joinType match { | ||
case RightOuter => join.copy(right = maybePushLocalLimit(exp, right)) | ||
case LeftOuter => join.copy(left = maybePushLocalLimit(exp, left)) | ||
|
@@ -578,7 +579,7 @@ object ColumnPruning extends Rule[LogicalPlan] { | |
p.copy(child = g.copy(child = newChild, unrequiredChildIndex = unrequiredIndices)) | ||
|
||
// Eliminate unneeded attributes from right side of a Left Existence Join. | ||
case j @ Join(_, right, LeftExistence(_), _) => | ||
case j @ Join(_, right, LeftExistence(_), _, _) => | ||
j.copy(right = prunedChild(right, j.references)) | ||
|
||
// all the columns will be used to compare, so we can't prune them | ||
|
@@ -792,7 +793,7 @@ object InferFiltersFromConstraints extends Rule[LogicalPlan] | |
filter | ||
} | ||
|
||
case join @ Join(left, right, joinType, conditionOpt) => | ||
case join @ Join(left, right, joinType, conditionOpt, _) => | ||
joinType match { | ||
// For inner join, we can infer additional filters for both sides. LeftSemi is kind of an | ||
// inner join, it just drops the right side in the final output. | ||
|
@@ -919,7 +920,6 @@ object RemoveRedundantSorts extends Rule[LogicalPlan] { | |
def canEliminateSort(plan: LogicalPlan): Boolean = plan match { | ||
case p: Project => p.projectList.forall(_.deterministic) | ||
case f: Filter => f.condition.deterministic | ||
case _: ResolvedHint => true | ||
case _ => false | ||
} | ||
} | ||
|
@@ -1094,7 +1094,6 @@ object PushDownPredicate extends Rule[LogicalPlan] with PredicateHelper { | |
// Note that some operators (e.g. project, aggregate, union) are being handled separately | ||
// (earlier in this rule). | ||
case _: AppendColumns => true | ||
case _: ResolvedHint => true | ||
case _: Distinct => true | ||
case _: Generate => true | ||
case _: Pivot => true | ||
|
@@ -1179,7 +1178,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
|
||
def apply(plan: LogicalPlan): LogicalPlan = plan transform { | ||
// push the where condition down into join filter | ||
case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition)) => | ||
case f @ Filter(filterCondition, Join(left, right, joinType, joinCondition, hint)) => | ||
val (leftFilterConditions, rightFilterConditions, commonFilterCondition) = | ||
split(splitConjunctivePredicates(filterCondition), left, right) | ||
joinType match { | ||
|
@@ -1193,7 +1192,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
commonFilterCondition.partition(canEvaluateWithinJoin) | ||
val newJoinCond = (newJoinConditions ++ joinCondition).reduceLeftOption(And) | ||
|
||
val join = Join(newLeft, newRight, joinType, newJoinCond) | ||
val join = Join(newLeft, newRight, joinType, newJoinCond, hint) | ||
if (others.nonEmpty) { | ||
Filter(others.reduceLeft(And), join) | ||
} else { | ||
|
@@ -1205,7 +1204,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
val newRight = rightFilterConditions. | ||
reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) | ||
val newJoinCond = joinCondition | ||
val newJoin = Join(newLeft, newRight, RightOuter, newJoinCond) | ||
val newJoin = Join(newLeft, newRight, RightOuter, newJoinCond, hint) | ||
|
||
(leftFilterConditions ++ commonFilterCondition). | ||
reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin) | ||
|
@@ -1215,7 +1214,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) | ||
val newRight = right | ||
val newJoinCond = joinCondition | ||
val newJoin = Join(newLeft, newRight, joinType, newJoinCond) | ||
val newJoin = Join(newLeft, newRight, joinType, newJoinCond, hint) | ||
|
||
(rightFilterConditions ++ commonFilterCondition). | ||
reduceLeftOption(And).map(Filter(_, newJoin)).getOrElse(newJoin) | ||
|
@@ -1225,7 +1224,7 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
} | ||
|
||
// push down the join filter into sub query scanning if applicable | ||
case j @ Join(left, right, joinType, joinCondition) => | ||
case j @ Join(left, right, joinType, joinCondition, hint) => | ||
val (leftJoinConditions, rightJoinConditions, commonJoinCondition) = | ||
split(joinCondition.map(splitConjunctivePredicates).getOrElse(Nil), left, right) | ||
|
||
|
@@ -1238,23 +1237,23 @@ object PushPredicateThroughJoin extends Rule[LogicalPlan] with PredicateHelper { | |
reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) | ||
val newJoinCond = commonJoinCondition.reduceLeftOption(And) | ||
|
||
Join(newLeft, newRight, joinType, newJoinCond) | ||
Join(newLeft, newRight, joinType, newJoinCond, hint) | ||
case RightOuter => | ||
// push down the left side only join filter for left side sub query | ||
val newLeft = leftJoinConditions. | ||
reduceLeftOption(And).map(Filter(_, left)).getOrElse(left) | ||
val newRight = right | ||
val newJoinCond = (rightJoinConditions ++ commonJoinCondition).reduceLeftOption(And) | ||
|
||
Join(newLeft, newRight, RightOuter, newJoinCond) | ||
Join(newLeft, newRight, RightOuter, newJoinCond, hint) | ||
case LeftOuter | LeftAnti | ExistenceJoin(_) => | ||
// push down the right side only join filter for right sub query | ||
val newLeft = left | ||
val newRight = rightJoinConditions. | ||
reduceLeftOption(And).map(Filter(_, right)).getOrElse(right) | ||
val newJoinCond = (leftJoinConditions ++ commonJoinCondition).reduceLeftOption(And) | ||
|
||
Join(newLeft, newRight, joinType, newJoinCond) | ||
Join(newLeft, newRight, joinType, newJoinCond, hint) | ||
case FullOuter => j | ||
case NaturalJoin(_) => sys.error("Untransformed NaturalJoin node") | ||
case UsingJoin(_, _) => sys.error("Untransformed Using join node") | ||
|
@@ -1310,7 +1309,7 @@ object CheckCartesianProducts extends Rule[LogicalPlan] with PredicateHelper { | |
if (SQLConf.get.crossJoinEnabled) { | ||
plan | ||
} else plan transform { | ||
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _) | ||
case j @ Join(left, right, Inner | LeftOuter | RightOuter | FullOuter, _, _) | ||
if isCartesianProduct(j) => | ||
throw new AnalysisException( | ||
s"""Detected implicit cartesian product for ${j.joinType.sql} join between logical plans | ||
|
@@ -1449,7 +1448,7 @@ object ReplaceIntersectWithSemiJoin extends Rule[LogicalPlan] { | |
case Intersect(left, right, false) => | ||
assert(left.output.size == right.output.size) | ||
val joinCond = left.output.zip(right.output).map { case (l, r) => EqualNullSafe(l, r) } | ||
Distinct(Join(left, right, LeftSemi, joinCond.reduceLeftOption(And))) | ||
Distinct(Join(left, right, LeftSemi, joinCond.reduceLeftOption(And), JoinHint.NONE)) | ||
} | ||
} | ||
|
||
|
@@ -1470,7 +1469,7 @@ object ReplaceExceptWithAntiJoin extends Rule[LogicalPlan] { | |
case Except(left, right, false) => | ||
assert(left.output.size == right.output.size) | ||
val joinCond = left.output.zip(right.output).map { case (l, r) => EqualNullSafe(l, r) } | ||
Distinct(Join(left, right, LeftAnti, joinCond.reduceLeftOption(And))) | ||
Distinct(Join(left, right, LeftAnti, joinCond.reduceLeftOption(And), JoinHint.NONE)) | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: two more space