From 033fe6f77fc56e12f6b978f67b8aa487de0f6beb Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 2 Sep 2024 16:42:30 -0700 Subject: [PATCH] fix: Add output to Comet operators equal and hashCode (#902) * fix: Add output to Comet operators equal and hashCode * Update --- .../spark/sql/comet/CometWindowExec.scala | 3 +- .../apache/spark/sql/comet/operators.scala | 36 +++-- .../q47/explain.txt | 145 ++++++++++-------- .../q47/simplified.txt | 8 +- .../q57/explain.txt | 145 ++++++++++-------- .../q57/simplified.txt | 8 +- .../approved-plans-v1_4/q47/explain.txt | 145 ++++++++++-------- .../approved-plans-v1_4/q47/simplified.txt | 8 +- .../approved-plans-v1_4/q57/explain.txt | 145 ++++++++++-------- .../approved-plans-v1_4/q57/simplified.txt | 8 +- .../q47/explain.txt | 145 ++++++++++-------- .../q47/simplified.txt | 8 +- .../q57/explain.txt | 145 ++++++++++-------- .../q57/simplified.txt | 8 +- .../approved-plans-v2_7/q47/explain.txt | 145 ++++++++++-------- .../approved-plans-v2_7/q47/simplified.txt | 8 +- .../approved-plans-v2_7/q57/explain.txt | 145 ++++++++++-------- .../approved-plans-v2_7/q57/simplified.txt | 8 +- 18 files changed, 690 insertions(+), 573 deletions(-) diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala index 93b516173..bc257014a 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/CometWindowExec.scala @@ -65,6 +65,7 @@ case class CometWindowExec( override def equals(obj: Any): Boolean = { obj match { case other: CometWindowExec => + this.output == other.output && this.windowExpression == other.windowExpression && this.child == other.child && this.partitionSpec == other.partitionSpec && this.orderSpec == other.orderSpec && this.serializedPlanOpt == other.serializedPlanOpt @@ -74,5 +75,5 @@ case class CometWindowExec( } override def hashCode(): Int = - Objects.hashCode(windowExpression, partitionSpec, orderSpec, child) + Objects.hashCode(output, windowExpression, partitionSpec, orderSpec, child) } diff --git a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala index 546d15c49..35ae8ad62 100644 --- a/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala +++ b/spark/src/main/scala/org/apache/spark/sql/comet/operators.scala @@ -428,6 +428,7 @@ case class CometProjectExec( override def equals(obj: Any): Boolean = { obj match { case other: CometProjectExec => + this.output == other.output && this.projectList == other.projectList && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt @@ -436,7 +437,7 @@ case class CometProjectExec( } } - override def hashCode(): Int = Objects.hashCode(projectList, child) + override def hashCode(): Int = Objects.hashCode(output, projectList, child) override protected def outputExpressions: Seq[NamedExpression] = projectList } @@ -462,6 +463,7 @@ case class CometFilterExec( override def equals(obj: Any): Boolean = { obj match { case other: CometFilterExec => + this.output == other.output && this.condition == other.condition && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt case _ => @@ -469,7 +471,7 @@ case class CometFilterExec( } } - override def hashCode(): Int = Objects.hashCode(condition, child) + override def hashCode(): Int = Objects.hashCode(output, condition, child) override def verboseStringWithOperatorId(): String = { s""" @@ -501,6 +503,7 @@ case class CometSortExec( override def equals(obj: Any): Boolean = { obj match { case other: CometSortExec => + this.output == other.output && this.sortOrder == other.sortOrder && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt case _ => @@ -508,7 +511,7 @@ case class CometSortExec( } } - override def hashCode(): Int = Objects.hashCode(sortOrder, child) + override def hashCode(): Int = Objects.hashCode(output, sortOrder, child) override lazy val metrics: Map[String, SQLMetric] = CometMetricNode.baselineMetrics(sparkContext) ++ @@ -539,6 +542,7 @@ case class CometLocalLimitExec( override def equals(obj: Any): Boolean = { obj match { case other: CometLocalLimitExec => + this.output == other.output && this.limit == other.limit && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt case _ => @@ -546,7 +550,7 @@ case class CometLocalLimitExec( } } - override def hashCode(): Int = Objects.hashCode(limit: java.lang.Integer, child) + override def hashCode(): Int = Objects.hashCode(output, limit: java.lang.Integer, child) } case class CometGlobalLimitExec( @@ -569,6 +573,7 @@ case class CometGlobalLimitExec( override def equals(obj: Any): Boolean = { obj match { case other: CometGlobalLimitExec => + this.output == other.output && this.limit == other.limit && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt case _ => @@ -576,7 +581,7 @@ case class CometGlobalLimitExec( } } - override def hashCode(): Int = Objects.hashCode(limit: java.lang.Integer, child) + override def hashCode(): Int = Objects.hashCode(output, limit: java.lang.Integer, child) } case class CometExpandExec( @@ -599,6 +604,7 @@ case class CometExpandExec( override def equals(obj: Any): Boolean = { obj match { case other: CometExpandExec => + this.output == other.output && this.projections == other.projections && this.child == other.child && this.serializedPlanOpt == other.serializedPlanOpt case _ => @@ -606,7 +612,7 @@ case class CometExpandExec( } } - override def hashCode(): Int = Objects.hashCode(projections, child) + override def hashCode(): Int = Objects.hashCode(output, projections, child) // TODO: support native Expand metrics override lazy val metrics: Map[String, SQLMetric] = Map.empty @@ -638,12 +644,14 @@ case class CometUnionExec( override def equals(obj: Any): Boolean = { obj match { - case other: CometUnionExec => this.children == other.children + case other: CometUnionExec => + this.output == other.output && + this.children == other.children case _ => false } } - override def hashCode(): Int = Objects.hashCode(children) + override def hashCode(): Int = Objects.hashCode(output, children) } case class CometHashAggregateExec( @@ -677,6 +685,7 @@ case class CometHashAggregateExec( override def equals(obj: Any): Boolean = { obj match { case other: CometHashAggregateExec => + this.output == other.output && this.groupingExpressions == other.groupingExpressions && this.aggregateExpressions == other.aggregateExpressions && this.input == other.input && @@ -689,7 +698,7 @@ case class CometHashAggregateExec( } override def hashCode(): Int = - Objects.hashCode(groupingExpressions, aggregateExpressions, input, mode, child) + Objects.hashCode(output, groupingExpressions, aggregateExpressions, input, mode, child) override protected def outputExpressions: Seq[NamedExpression] = resultExpressions } @@ -729,6 +738,7 @@ case class CometHashJoinExec( override def equals(obj: Any): Boolean = { obj match { case other: CometHashJoinExec => + this.output == other.output && this.leftKeys == other.leftKeys && this.rightKeys == other.rightKeys && this.condition == other.condition && @@ -742,7 +752,7 @@ case class CometHashJoinExec( } override def hashCode(): Int = - Objects.hashCode(leftKeys, rightKeys, condition, buildSide, left, right) + Objects.hashCode(output, leftKeys, rightKeys, condition, buildSide, left, right) override lazy val metrics: Map[String, SQLMetric] = CometMetricNode.hashJoinMetrics(sparkContext) @@ -865,6 +875,7 @@ case class CometBroadcastHashJoinExec( override def equals(obj: Any): Boolean = { obj match { case other: CometBroadcastHashJoinExec => + this.output == other.output && this.leftKeys == other.leftKeys && this.rightKeys == other.rightKeys && this.condition == other.condition && @@ -878,7 +889,7 @@ case class CometBroadcastHashJoinExec( } override def hashCode(): Int = - Objects.hashCode(leftKeys, rightKeys, condition, buildSide, left, right) + Objects.hashCode(output, leftKeys, rightKeys, condition, buildSide, left, right) override lazy val metrics: Map[String, SQLMetric] = CometMetricNode.hashJoinMetrics(sparkContext) @@ -918,6 +929,7 @@ case class CometSortMergeJoinExec( override def equals(obj: Any): Boolean = { obj match { case other: CometSortMergeJoinExec => + this.output == other.output && this.leftKeys == other.leftKeys && this.rightKeys == other.rightKeys && this.condition == other.condition && @@ -930,7 +942,7 @@ case class CometSortMergeJoinExec( } override def hashCode(): Int = - Objects.hashCode(leftKeys, rightKeys, condition, left, right) + Objects.hashCode(output, leftKeys, rightKeys, condition, left, right) override lazy val metrics: Map[String, SQLMetric] = CometMetricNode.sortMergeJoinMetrics(sparkContext) diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/explain.txt index 8221d32a3..f6569a3b6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.store (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.0000 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] -(29) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] -(30) CometSort +(30) CometHashAggregate +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] +Keys [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(ss_sales_price#27))] + +(31) CometExchange +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] Arguments: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16], [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, s_store_name#22 ASC NULLS FIRST, s_company_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -(32) Window +(34) Window Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#27, rn#26] -Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#26] +(35) Project [codegen id : 4] +Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#29, rn#28] +Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#28] -(34) BroadcastExchange -Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#26 + 1)] +Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#28 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27] -Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] -(37) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(38) CometSort -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(40) CometSort +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16], [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, s_store_name#32 ASC NULLS FIRST, s_company_name#33 ASC NULLS FIRST, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(40) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(42) Window +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [rank(d_year#34, d_moy#35) windowspecdefinition(i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#30, i_brand#31, s_store_name#32, s_company_name#33], [d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#16 AS sum_sales#35, rn#34] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] +(43) Project [codegen id : 6] +Output [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#16 AS sum_sales#37, rn#36] +Input [8]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16, rn#36] -(42) BroadcastExchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#34 - 1)] +Right keys [5]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, (rn#36 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#27 AS psum#36, sum_sales#35 AS nsum#37] -Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] +(46) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#29 AS psum#38, sum_sales#37 AS nsum#39] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29, i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] -(45) TakeOrderedAndProject -Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] -Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +(47) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/simplified.txt index 2c8192f43..cf4556774 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q47/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,s_store_name,s_company_name] #8 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/explain.txt index e8a40a0f7..629959102 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.call_center (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.0000 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] -(29) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] -(30) CometSort +(30) CometHashAggregate +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] +Keys [5]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23] +Functions [1]: [sum(UnscaledValue(cs_sales_price#25))] + +(31) CometExchange +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, cc_name#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] Arguments: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, cc_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -(32) Window +(34) Window Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] +Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#25, rn#24] -Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#24] +(35) Project [codegen id : 4] +Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#27, rn#26] +Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#26] -(34) BroadcastExchange -Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#24 + 1)] +Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#26 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25] -Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] -(37) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(38) CometSort -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15], [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(40) CometSort +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(40) Window -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#31], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#15 AS sum_sales#32, rn#31] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15, rn#31] +(43) Project [codegen id : 6] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#15 AS sum_sales#34, rn#33] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15, rn#33] -(42) BroadcastExchange -Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#31 - 1)] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#33 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#25 AS psum#33, sum_sales#32 AS nsum#34] -Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25, i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] +(46) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#27 AS psum#35, sum_sales#34 AS nsum#36] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27, i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] -(45) TakeOrderedAndProject -Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] -Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] +(47) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = cs_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/simplified.txt index 96a3fdfa0..39a7ce9d6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4-spark3_5/q57/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_ ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,cc_name] #8 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,cc_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_ ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt index 8221d32a3..f6569a3b6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.store (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.0000 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] -(29) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] -(30) CometSort +(30) CometHashAggregate +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] +Keys [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(ss_sales_price#27))] + +(31) CometExchange +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] Arguments: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16], [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, s_store_name#22 ASC NULLS FIRST, s_company_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -(32) Window +(34) Window Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#27, rn#26] -Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#26] +(35) Project [codegen id : 4] +Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#29, rn#28] +Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#28] -(34) BroadcastExchange -Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#26 + 1)] +Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#28 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27] -Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] -(37) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(38) CometSort -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(40) CometSort +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16], [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, s_store_name#32 ASC NULLS FIRST, s_company_name#33 ASC NULLS FIRST, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(40) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(42) Window +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [rank(d_year#34, d_moy#35) windowspecdefinition(i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#30, i_brand#31, s_store_name#32, s_company_name#33], [d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#16 AS sum_sales#35, rn#34] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] +(43) Project [codegen id : 6] +Output [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#16 AS sum_sales#37, rn#36] +Input [8]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16, rn#36] -(42) BroadcastExchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#34 - 1)] +Right keys [5]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, (rn#36 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#27 AS psum#36, sum_sales#35 AS nsum#37] -Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] +(46) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#29 AS psum#38, sum_sales#37 AS nsum#39] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29, i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] -(45) TakeOrderedAndProject -Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] -Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +(47) TakeOrderedAndProject +Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, s_store_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt index 2c8192f43..cf4556774 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q47/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,s_store_name,s_company_name] #8 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,s_store_name,i_category,i_bra ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt index e8a40a0f7..629959102 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.call_center (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.0000 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] -(29) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] -(30) CometSort +(30) CometHashAggregate +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] +Keys [5]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23] +Functions [1]: [sum(UnscaledValue(cs_sales_price#25))] + +(31) CometExchange +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, cc_name#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] Arguments: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, cc_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -(32) Window +(34) Window Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] +Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#25, rn#24] -Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#24] +(35) Project [codegen id : 4] +Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#27, rn#26] +Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#26] -(34) BroadcastExchange -Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#24 + 1)] +Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#26 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25] -Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] -(37) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(38) CometSort -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15], [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(40) CometSort +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(40) Window -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#31], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#15 AS sum_sales#32, rn#31] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15, rn#31] +(43) Project [codegen id : 6] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#15 AS sum_sales#34, rn#33] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15, rn#33] -(42) BroadcastExchange -Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#31 - 1)] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#33 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#25 AS psum#33, sum_sales#32 AS nsum#34] -Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25, i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] +(46) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#27 AS psum#35, sum_sales#34 AS nsum#36] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27, i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] -(45) TakeOrderedAndProject -Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] -Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] +(47) TakeOrderedAndProject +Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, cc_name#13 ASC NULLS FIRST], [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = cs_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt index 96a3fdfa0..39a7ce9d6 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v1_4/q57/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_ ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,cc_name] #8 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,cc_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,cc_name,i_category,i_brand,d_ ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/explain.txt index 3bc9bb6e0..ab2ebd0a1 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.store (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.0000 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] -(29) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] -(30) CometSort +(30) CometHashAggregate +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] +Keys [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(ss_sales_price#27))] + +(31) CometExchange +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] Arguments: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16], [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, s_store_name#22 ASC NULLS FIRST, s_company_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -(32) Window +(34) Window Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#27, rn#26] -Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#26] +(35) Project [codegen id : 4] +Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#29, rn#28] +Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#28] -(34) BroadcastExchange -Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#26 + 1)] +Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#28 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27] -Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] -(37) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(38) CometSort -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(40) CometSort +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16], [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, s_store_name#32 ASC NULLS FIRST, s_company_name#33 ASC NULLS FIRST, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(40) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(42) Window +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [rank(d_year#34, d_moy#35) windowspecdefinition(i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#30, i_brand#31, s_store_name#32, s_company_name#33], [d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#16 AS sum_sales#35, rn#34] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] +(43) Project [codegen id : 6] +Output [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#16 AS sum_sales#37, rn#36] +Input [8]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16, rn#36] -(42) BroadcastExchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#34 - 1)] +Right keys [5]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, (rn#36 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#27 AS psum#36, sum_sales#35 AS nsum#37] -Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] +(46) Project [codegen id : 7] +Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#29 AS psum#38, sum_sales#37 AS nsum#39] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29, i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] -(45) TakeOrderedAndProject -Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] -Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +(47) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/simplified.txt index c16c278d5..a18f68d83 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q47/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum, ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,s_store_name,s_company_name] #8 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum, ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/explain.txt index 01a5d7c56..97f571929 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.call_center (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.0000 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] -(29) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] -(30) CometSort +(30) CometHashAggregate +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] +Keys [5]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23] +Functions [1]: [sum(UnscaledValue(cs_sales_price#25))] + +(31) CometExchange +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, cc_name#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] Arguments: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, cc_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -(32) Window +(34) Window Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] +Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#25, rn#24] -Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#24] +(35) Project [codegen id : 4] +Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#27, rn#26] +Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#26] -(34) BroadcastExchange -Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#24 + 1)] +Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#26 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25] -Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] -(37) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(38) CometSort -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15], [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(40) CometSort +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(40) Window -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#31], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#15 AS sum_sales#32, rn#31] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15, rn#31] +(43) Project [codegen id : 6] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#15 AS sum_sales#34, rn#33] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15, rn#33] -(42) BroadcastExchange -Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#31 - 1)] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#33 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#25 AS psum#33, sum_sales#32 AS nsum#34] -Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25, i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] +(46) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#27 AS psum#35, sum_sales#34 AS nsum#36] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27, i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] -(45) TakeOrderedAndProject -Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] -Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] +(47) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = cs_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/simplified.txt index 539839af7..3ea3165fc 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7-spark3_5/q57/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,cc_name] #8 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,cc_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt index 3bc9bb6e0..ab2ebd0a1 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.store (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#19) AND (avg_monthly_sales#19 > 0.0000 Output [9]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18] Input [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, _w0#17, rn#18, avg_monthly_sales#19] -(29) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +(29) ReusedExchange [Reuses operator id: 19] +Output [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] -(30) CometSort +(30) CometHashAggregate +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum#26] +Keys [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25] +Functions [1]: [sum(UnscaledValue(ss_sales_price#27))] + +(31) CometExchange +Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] +Arguments: hashpartitioning(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] Arguments: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16], [i_category#20 ASC NULLS FIRST, i_brand#21 ASC NULLS FIRST, s_store_name#22 ASC NULLS FIRST, s_company_name#23 ASC NULLS FIRST, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -(32) Window +(34) Window Input [7]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16] -Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] +Arguments: [rank(d_year#24, d_moy#25) windowspecdefinition(i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#28], [i_category#20, i_brand#21, s_store_name#22, s_company_name#23], [d_year#24 ASC NULLS FIRST, d_moy#25 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#27, rn#26] -Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#26] +(35) Project [codegen id : 4] +Output [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#16 AS sum_sales#29, rn#28] +Input [8]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, d_year#24, d_moy#25, sum_sales#16, rn#28] -(34) BroadcastExchange -Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [6]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#26 + 1)] +Right keys [5]: [i_category#20, i_brand#21, s_store_name#22, s_company_name#23, (rn#28 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27] -Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#27, rn#26] +(38) Project [codegen id : 7] +Output [10]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29] +Input [15]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, i_category#20, i_brand#21, s_store_name#22, s_company_name#23, sum_sales#29, rn#28] -(37) ReusedExchange [Reuses operator id: 21] -Output [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(39) ReusedExchange [Reuses operator id: 31] +Output [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(38) CometSort -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, s_store_name#30 ASC NULLS FIRST, s_company_name#31 ASC NULLS FIRST, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(40) CometSort +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16], [i_category#30 ASC NULLS FIRST, i_brand#31 ASC NULLS FIRST, s_store_name#32 ASC NULLS FIRST, s_company_name#33 ASC NULLS FIRST, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] +(41) ColumnarToRow [codegen id : 5] +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] -(40) Window -Input [7]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16] -Arguments: [rank(d_year#32, d_moy#33) windowspecdefinition(i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#34], [i_category#28, i_brand#29, s_store_name#30, s_company_name#31], [d_year#32 ASC NULLS FIRST, d_moy#33 ASC NULLS FIRST] +(42) Window +Input [7]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16] +Arguments: [rank(d_year#34, d_moy#35) windowspecdefinition(i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#36], [i_category#30, i_brand#31, s_store_name#32, s_company_name#33], [d_year#34 ASC NULLS FIRST, d_moy#35 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#16 AS sum_sales#35, rn#34] -Input [8]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, d_year#32, d_moy#33, sum_sales#16, rn#34] +(43) Project [codegen id : 6] +Output [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#16 AS sum_sales#37, rn#36] +Input [8]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, d_year#34, d_moy#35, sum_sales#16, rn#36] -(42) BroadcastExchange -Input [6]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [6]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], input[3, string, true], (input[5, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [5]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, rn#18] -Right keys [5]: [i_category#28, i_brand#29, s_store_name#30, s_company_name#31, (rn#34 - 1)] +Right keys [5]: [i_category#30, i_brand#31, s_store_name#32, s_company_name#33, (rn#36 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#27 AS psum#36, sum_sales#35 AS nsum#37] -Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#27, i_category#28, i_brand#29, s_store_name#30, s_company_name#31, sum_sales#35, rn#34] +(46) Project [codegen id : 7] +Output [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, sum_sales#29 AS psum#38, sum_sales#37 AS nsum#39] +Input [16]: [i_category#3, i_brand#2, s_store_name#13, s_company_name#14, d_year#10, d_moy#11, sum_sales#16, avg_monthly_sales#19, rn#18, sum_sales#29, i_category#30, i_brand#31, s_store_name#32, s_company_name#33, sum_sales#37, rn#36] -(45) TakeOrderedAndProject -Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] -Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#36, nsum#37] +(47) TakeOrderedAndProject +Input [7]: [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] +Arguments: 100, [(sum_sales#16 - avg_monthly_sales#19) ASC NULLS FIRST, d_moy#11 ASC NULLS FIRST], [i_category#3, d_year#10, d_moy#11, avg_monthly_sales#19, sum_sales#16, psum#38, nsum#39] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = ss_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt index c16c278d5..a18f68d83 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q47/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum, ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,s_store_name,s_company_name] #8 + CometHashAggregate [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(ss_sales_price))] + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,s_store_name,s_company_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_moy,i_category,d_year,psum, ColumnarToRow InputAdapter CometSort [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,s_store_name,s_company_name,d_year,d_moy,sum_sales] #8 diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt index 01a5d7c56..97f571929 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/explain.txt @@ -1,9 +1,9 @@ == Physical Plan == -TakeOrderedAndProject (45) -+- * Project (44) - +- * BroadcastHashJoin Inner BuildRight (43) - :- * Project (36) - : +- * BroadcastHashJoin Inner BuildRight (35) +TakeOrderedAndProject (47) ++- * Project (46) + +- * BroadcastHashJoin Inner BuildRight (45) + :- * Project (38) + : +- * BroadcastHashJoin Inner BuildRight (37) : :- * Project (28) : : +- * Filter (27) : : +- Window (26) @@ -32,18 +32,20 @@ TakeOrderedAndProject (45) : : +- CometBroadcastExchange (15) : : +- CometFilter (14) : : +- CometScan parquet spark_catalog.default.call_center (13) - : +- BroadcastExchange (34) - : +- * Project (33) - : +- Window (32) - : +- * ColumnarToRow (31) - : +- CometSort (30) - : +- ReusedExchange (29) - +- BroadcastExchange (42) - +- * Project (41) - +- Window (40) - +- * ColumnarToRow (39) - +- CometSort (38) - +- ReusedExchange (37) + : +- BroadcastExchange (36) + : +- * Project (35) + : +- Window (34) + : +- * ColumnarToRow (33) + : +- CometSort (32) + : +- CometExchange (31) + : +- CometHashAggregate (30) + : +- ReusedExchange (29) + +- BroadcastExchange (44) + +- * Project (43) + +- Window (42) + +- * ColumnarToRow (41) + +- CometSort (40) + +- ReusedExchange (39) (1) Scan parquet spark_catalog.default.item @@ -175,99 +177,108 @@ Condition : ((isnotnull(avg_monthly_sales#18) AND (avg_monthly_sales#18 > 0.0000 Output [8]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17] Input [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, _w0#16, rn#17, avg_monthly_sales#18] -(29) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +(29) ReusedExchange [Reuses operator id: 19] +Output [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] -(30) CometSort +(30) CometHashAggregate +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum#24] +Keys [5]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23] +Functions [1]: [sum(UnscaledValue(cs_sales_price#25))] + +(31) CometExchange +Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] +Arguments: hashpartitioning(i_category#19, i_brand#20, cc_name#21, 5), ENSURE_REQUIREMENTS, CometNativeShuffle, [plan_id=3] + +(32) CometSort Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] Arguments: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15], [i_category#19 ASC NULLS FIRST, i_brand#20 ASC NULLS FIRST, cc_name#21 ASC NULLS FIRST, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(31) ColumnarToRow [codegen id : 3] +(33) ColumnarToRow [codegen id : 3] Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -(32) Window +(34) Window Input [6]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15] -Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#24], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] +Arguments: [rank(d_year#22, d_moy#23) windowspecdefinition(i_category#19, i_brand#20, cc_name#21, d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#26], [i_category#19, i_brand#20, cc_name#21], [d_year#22 ASC NULLS FIRST, d_moy#23 ASC NULLS FIRST] -(33) Project [codegen id : 4] -Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#25, rn#24] -Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#24] +(35) Project [codegen id : 4] +Output [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#15 AS sum_sales#27, rn#26] +Input [7]: [i_category#19, i_brand#20, cc_name#21, d_year#22, d_moy#23, sum_sales#15, rn#26] -(34) BroadcastExchange -Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=3] +(36) BroadcastExchange +Input [5]: [i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] + 1)),false), [plan_id=4] -(35) BroadcastHashJoin [codegen id : 7] +(37) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#24 + 1)] +Right keys [4]: [i_category#19, i_brand#20, cc_name#21, (rn#26 + 1)] Join type: Inner Join condition: None -(36) Project [codegen id : 7] -Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25] -Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#25, rn#24] +(38) Project [codegen id : 7] +Output [9]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27] +Input [13]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, i_category#19, i_brand#20, cc_name#21, sum_sales#27, rn#26] -(37) ReusedExchange [Reuses operator id: 21] -Output [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(39) ReusedExchange [Reuses operator id: 31] +Output [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(38) CometSort -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15], [i_category#26 ASC NULLS FIRST, i_brand#27 ASC NULLS FIRST, cc_name#28 ASC NULLS FIRST, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(40) CometSort +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15], [i_category#28 ASC NULLS FIRST, i_brand#29 ASC NULLS FIRST, cc_name#30 ASC NULLS FIRST, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(39) ColumnarToRow [codegen id : 5] -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] +(41) ColumnarToRow [codegen id : 5] +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] -(40) Window -Input [6]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15] -Arguments: [rank(d_year#29, d_moy#30) windowspecdefinition(i_category#26, i_brand#27, cc_name#28, d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#31], [i_category#26, i_brand#27, cc_name#28], [d_year#29 ASC NULLS FIRST, d_moy#30 ASC NULLS FIRST] +(42) Window +Input [6]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15] +Arguments: [rank(d_year#31, d_moy#32) windowspecdefinition(i_category#28, i_brand#29, cc_name#30, d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST, specifiedwindowframe(RowFrame, unboundedpreceding$(), currentrow$())) AS rn#33], [i_category#28, i_brand#29, cc_name#30], [d_year#31 ASC NULLS FIRST, d_moy#32 ASC NULLS FIRST] -(41) Project [codegen id : 6] -Output [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#15 AS sum_sales#32, rn#31] -Input [7]: [i_category#26, i_brand#27, cc_name#28, d_year#29, d_moy#30, sum_sales#15, rn#31] +(43) Project [codegen id : 6] +Output [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#15 AS sum_sales#34, rn#33] +Input [7]: [i_category#28, i_brand#29, cc_name#30, d_year#31, d_moy#32, sum_sales#15, rn#33] -(42) BroadcastExchange -Input [5]: [i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] -Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=4] +(44) BroadcastExchange +Input [5]: [i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] +Arguments: HashedRelationBroadcastMode(List(input[0, string, true], input[1, string, true], input[2, string, true], (input[4, int, false] - 1)),false), [plan_id=5] -(43) BroadcastHashJoin [codegen id : 7] +(45) BroadcastHashJoin [codegen id : 7] Left keys [4]: [i_category#3, i_brand#2, cc_name#13, rn#17] -Right keys [4]: [i_category#26, i_brand#27, cc_name#28, (rn#31 - 1)] +Right keys [4]: [i_category#28, i_brand#29, cc_name#30, (rn#33 - 1)] Join type: Inner Join condition: None -(44) Project [codegen id : 7] -Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#25 AS psum#33, sum_sales#32 AS nsum#34] -Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#25, i_category#26, i_brand#27, cc_name#28, sum_sales#32, rn#31] +(46) Project [codegen id : 7] +Output [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, sum_sales#27 AS psum#35, sum_sales#34 AS nsum#36] +Input [14]: [i_category#3, i_brand#2, cc_name#13, d_year#10, d_moy#11, sum_sales#15, avg_monthly_sales#18, rn#17, sum_sales#27, i_category#28, i_brand#29, cc_name#30, sum_sales#34, rn#33] -(45) TakeOrderedAndProject -Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] -Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#33, nsum#34] +(47) TakeOrderedAndProject +Input [8]: [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] +Arguments: 100, [(sum_sales#15 - avg_monthly_sales#18) ASC NULLS FIRST, d_year#10 ASC NULLS FIRST], [i_category#3, i_brand#2, d_year#10, d_moy#11, avg_monthly_sales#18, sum_sales#15, psum#35, nsum#36] ===== Subqueries ===== Subquery:1 Hosting operator id = 3 Hosting Expression = cs_sold_date_sk#7 IN dynamicpruning#8 -BroadcastExchange (49) -+- * ColumnarToRow (48) - +- CometFilter (47) - +- CometScan parquet spark_catalog.default.date_dim (46) +BroadcastExchange (51) ++- * ColumnarToRow (50) + +- CometFilter (49) + +- CometScan parquet spark_catalog.default.date_dim (48) -(46) Scan parquet spark_catalog.default.date_dim +(48) Scan parquet spark_catalog.default.date_dim Output [3]: [d_date_sk#9, d_year#10, d_moy#11] Batched: true Location [not included in comparison]/{warehouse_dir}/date_dim] PushedFilters: [Or(Or(EqualTo(d_year,1999),And(EqualTo(d_year,1998),EqualTo(d_moy,12))),And(EqualTo(d_year,2000),EqualTo(d_moy,1))), IsNotNull(d_date_sk)] ReadSchema: struct -(47) CometFilter +(49) CometFilter Input [3]: [d_date_sk#9, d_year#10, d_moy#11] Condition : ((((d_year#10 = 1999) OR ((d_year#10 = 1998) AND (d_moy#11 = 12))) OR ((d_year#10 = 2000) AND (d_moy#11 = 1))) AND isnotnull(d_date_sk#9)) -(48) ColumnarToRow [codegen id : 1] +(50) ColumnarToRow [codegen id : 1] Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -(49) BroadcastExchange +(51) BroadcastExchange Input [3]: [d_date_sk#9, d_year#10, d_moy#11] -Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=5] +Arguments: HashedRelationBroadcastMode(List(cast(input[0, int, false] as bigint)),false), [plan_id=6] diff --git a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt index 539839af7..3ea3165fc 100644 --- a/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt +++ b/spark/src/test/resources/tpcds-plan-stability/approved-plans-v2_7/q57/simplified.txt @@ -54,9 +54,11 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + CometExchange [i_category,i_brand,cc_name] #8 + CometHashAggregate [i_category,i_brand,cc_name,d_year,d_moy,sum_sales,sum,sum(UnscaledValue(cs_sales_price))] + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum] #2 InputAdapter - BroadcastExchange #8 + BroadcastExchange #9 WholeStageCodegen (6) Project [i_category,i_brand,cc_name,sum_sales,rn] InputAdapter @@ -65,4 +67,4 @@ TakeOrderedAndProject [sum_sales,avg_monthly_sales,d_year,i_category,i_brand,d_m ColumnarToRow InputAdapter CometSort [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] - ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #1 + ReusedExchange [i_category,i_brand,cc_name,d_year,d_moy,sum_sales] #8