diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStage.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStage.scala index a896d83671cbe..617f80fde6ff6 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStage.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStage.scala @@ -33,7 +33,10 @@ import org.apache.spark.util.ThreadUtils /** * In adaptive execution mode, an execution plan is divided into multiple QueryStages. Each - * QueryStage is a sub-tree that runs in a single stage. + * QueryStage is a sub-tree that runs in a single stage. Before executing current stage, we will + * first submit all its child stages, wait for their completions and collect their statistics. + * Based on the collected data, we can potentially optimize the execution plan in current stage, + * change the number of reducer and do other optimizations. */ abstract class QueryStage extends UnaryExecNode { diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala index b8d68d34c5bc6..8c33e83a91d9b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/adaptive/QueryStageInput.scala @@ -25,13 +25,13 @@ import org.apache.spark.sql.catalyst.plans.physical.{HashPartitioning, Partition import org.apache.spark.sql.execution._ /** - * QueryStageInput is the leaf node of a QueryStage and serves as its input. It is responsible for - * changing the output partition based on the need of its QueryStage. It gets the ShuffledRowRDD + * QueryStageInput is the leaf node of a QueryStage and serves as its input. A QueryStage knows + * its child stages by collecting all the QueryStageInputs. For a ShuffleQueryStageInput, it + * controls how to read the ShuffledRowRDD generated by its child stage. It gets the ShuffledRowRDD * from its child stage and creates a new ShuffledRowRDD with different partitions by specifying - * an optional array of partition start indices. For example, a ShuffledQueryStage can be reused - * by two different QueryStages. One QueryStageInput can let the first task read partition 0 to 3, - * while in another stage, the QueryStageInput can let the first task read partition 0 to 1. - * A QueryStage knows its child stages by collecting all the QueryStageInputs. + * an array of partition start indices. For example, a ShuffledQueryStage can be reused by two + * different QueryStages. One QueryStageInput can let the first task read partition 0 to 3, while + * in another stage, the QueryStageInput can let the first task read partition 0 to 1. */ abstract class QueryStageInput extends LeafExecNode {