-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-29375][SPARK-28940][SPARK-32041][SQL] Whole plan exchange and subquery reuse #28885
Changes from all commits
bf29f1a
c346387
5e6ae6b
5b4b719
7187ebd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.util | ||
|
||
import scala.collection.mutable.{ArrayBuffer, Map} | ||
|
||
import org.apache.spark.sql.catalyst.plans.QueryPlan | ||
import org.apache.spark.sql.types.StructType | ||
|
||
/** | ||
* Map of canonicalized plans that can be used to find reuse possibilities. | ||
* | ||
* To avoid costly canonicalization of a plan: | ||
* - we use its schema first to check if it can be replaced to a reused one at all | ||
* - we insert it into the map of canonicalized plans only when at least 2 have the same schema | ||
* | ||
* @tparam T the type of the node we want to reuse | ||
* @tparam T2 the type of the canonicalized node | ||
*/ | ||
class ReuseMap[T <: T2, T2 <: QueryPlan[T2]] { | ||
private val map = Map[StructType, ArrayBuffer[T]]() | ||
|
||
/** | ||
* Find a matching plan with the same canonicalized form in the map or add the new plan to the | ||
* map otherwise. | ||
* | ||
* @param plan the input plan | ||
* @return the matching plan or the input plan | ||
*/ | ||
private def lookupOrElseAdd(plan: T): T = { | ||
val sameSchema = map.getOrElseUpdate(plan.schema, ArrayBuffer()) | ||
val samePlan = sameSchema.find(plan.sameResult) | ||
if (samePlan.isDefined) { | ||
samePlan.get | ||
} else { | ||
sameSchema += plan | ||
plan | ||
} | ||
} | ||
|
||
/** | ||
* Find a matching plan with the same canonicalized form in the map and apply `f` on it or add | ||
* the new plan to the map otherwise. | ||
* | ||
* @param plan the input plan | ||
* @param f the function to apply | ||
* @tparam T2 the type of the reuse node | ||
* @return the matching plan with `f` applied or the input plan | ||
*/ | ||
def reuseOrElseAdd[T2 >: T](plan: T, f: T => T2): T2 = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: this might be a better name: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks @attilapiros for the feedback. I agree with you and I'm happy to rename the method and but I would wait a bit for some more feedback from others as I've renamed this method a few times. |
||
val found = lookupOrElseAdd(plan) | ||
if (found eq plan) { | ||
plan | ||
} else { | ||
f(found) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one or more | ||
* contributor license agreements. See the NOTICE file distributed with | ||
* this work for additional information regarding copyright ownership. | ||
* The ASF licenses this file to You under the Apache License, Version 2.0 | ||
* (the "License"); you may not use this file except in compliance with | ||
* the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package org.apache.spark.sql.util | ||
|
||
import org.apache.spark.SparkFunSuite | ||
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference} | ||
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode} | ||
import org.apache.spark.sql.types.IntegerType | ||
|
||
case class TestNode(children: Seq[TestNode], output: Seq[Attribute]) extends LogicalPlan { | ||
override protected def withNewChildrenInternal( | ||
newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = copy(children = children) | ||
} | ||
case class TestReuseNode(child: LogicalPlan) extends UnaryNode { | ||
override def output: Seq[Attribute] = child.output | ||
|
||
override protected def withNewChildInternal(newChild: LogicalPlan): LogicalPlan = | ||
copy(child = newChild) | ||
} | ||
|
||
class ReuseMapSuite extends SparkFunSuite { | ||
private val leafNode1 = TestNode(Nil, Seq(AttributeReference("a", IntegerType)())) | ||
private val leafNode2 = TestNode(Nil, Seq(AttributeReference("b", IntegerType)())) | ||
private val parentNode1 = TestNode(Seq(leafNode1), Seq(AttributeReference("a", IntegerType)())) | ||
private val parentNode2 = TestNode(Seq(leafNode2), Seq(AttributeReference("b", IntegerType)())) | ||
|
||
private def reuse(testNode: TestNode) = TestReuseNode(testNode) | ||
|
||
test("no reuse if same instance") { | ||
val reuseMap = new ReuseMap[TestNode, LogicalPlan]() | ||
|
||
reuseMap.reuseOrElseAdd(leafNode1, reuse) | ||
reuseMap.reuseOrElseAdd(parentNode1, reuse) | ||
|
||
assert(reuseMap.reuseOrElseAdd(leafNode1, reuse) == leafNode1) | ||
assert(reuseMap.reuseOrElseAdd(parentNode1, reuse) == parentNode1) | ||
} | ||
|
||
test("reuse if different instance with same canonicalized plan") { | ||
val reuseMap = new ReuseMap[TestNode, LogicalPlan]() | ||
reuseMap.reuseOrElseAdd(leafNode1, reuse) | ||
reuseMap.reuseOrElseAdd(parentNode1, reuse) | ||
|
||
assert(reuseMap.reuseOrElseAdd(leafNode1.clone.asInstanceOf[TestNode], reuse) == | ||
reuse(leafNode1)) | ||
assert(reuseMap.reuseOrElseAdd(parentNode1.clone.asInstanceOf[TestNode], reuse) == | ||
reuse(parentNode1)) | ||
} | ||
|
||
test("no reuse if different canonicalized plan") { | ||
val reuseMap = new ReuseMap[TestNode, LogicalPlan]() | ||
reuseMap.reuseOrElseAdd(leafNode1, reuse) | ||
reuseMap.reuseOrElseAdd(parentNode1, reuse) | ||
|
||
assert(reuseMap.reuseOrElseAdd(leafNode2, reuse) == leafNode2) | ||
assert(reuseMap.reuseOrElseAdd(parentNode2, reuse) == parentNode2) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It might look like
T2
is not required, but it is "silently" used atsameSchema.find(plan.sameResult)
and Scala would complain without it.