-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-44355][SQL] Move WithCTE into command queries #42036
Changes from all commits
4eabbd8
4349764
4f1cbee
a6633e4
25230f2
58bc261
fedc9a6
45672ac
8a36db1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -46,7 +46,7 @@ trait KeepAnalyzedQuery extends Command { | |
/** | ||
* Base trait for DataSourceV2 write commands | ||
*/ | ||
trait V2WriteCommand extends UnaryCommand with KeepAnalyzedQuery { | ||
trait V2WriteCommand extends UnaryCommand with KeepAnalyzedQuery with CTEInChildren { | ||
def table: NamedRelation | ||
def query: LogicalPlan | ||
def isByName: Boolean | ||
|
@@ -392,9 +392,16 @@ case class WriteDelta( | |
} | ||
} | ||
|
||
trait V2CreateTableAsSelectPlan extends V2CreateTablePlan with AnalysisOnlyCommand { | ||
trait V2CreateTableAsSelectPlan | ||
extends V2CreateTablePlan | ||
with AnalysisOnlyCommand | ||
with CTEInChildren { | ||
def query: LogicalPlan | ||
|
||
override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { | ||
withNameAndQuery(newName = name, newQuery = WithCTE(query, cteDefs)) | ||
} | ||
|
||
override lazy val resolved: Boolean = childrenResolved && { | ||
// the table schema is created from the query schema, so the only resolution needed is to check | ||
// that the columns referenced by the table's partitioning exist in the query schema | ||
|
@@ -1234,12 +1241,16 @@ case class RepairTable( | |
case class AlterViewAs( | ||
child: LogicalPlan, | ||
originalText: String, | ||
query: LogicalPlan) extends BinaryCommand { | ||
query: LogicalPlan) extends BinaryCommand with CTEInChildren { | ||
override def left: LogicalPlan = child | ||
override def right: LogicalPlan = query | ||
override protected def withNewChildrenInternal( | ||
newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = | ||
copy(child = newLeft, query = newRight) | ||
|
||
override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { | ||
withNewChildren(Seq(child, WithCTE(query, cteDefs))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
} | ||
|
||
/** | ||
|
@@ -1253,12 +1264,16 @@ case class CreateView( | |
originalText: Option[String], | ||
query: LogicalPlan, | ||
allowExisting: Boolean, | ||
replace: Boolean) extends BinaryCommand { | ||
replace: Boolean) extends BinaryCommand with CTEInChildren { | ||
override def left: LogicalPlan = child | ||
override def right: LogicalPlan = query | ||
override protected def withNewChildrenInternal( | ||
newLeft: LogicalPlan, newRight: LogicalPlan): LogicalPlan = | ||
copy(child = newLeft, query = newRight) | ||
|
||
override def withCTEDefs(cteDefs: Seq[CTERelationDef]): LogicalPlan = { | ||
withNewChildren(Seq(child, WithCTE(query, cteDefs))) | ||
} | ||
} | ||
|
||
/** | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3759,6 +3759,15 @@ object SQLConf { | |
.checkValues(LegacyBehaviorPolicy.values.map(_.toString)) | ||
.createWithDefault(LegacyBehaviorPolicy.EXCEPTION.toString) | ||
|
||
val LEGACY_INLINE_CTE_IN_COMMANDS = buildConf("spark.sql.legacy.inlineCTEInCommands") | ||
.internal() | ||
.doc("If true, always inline the CTE relations for the queries in commands. This is the " + | ||
"legacy behavior which may produce incorrect results because Spark may evaluate a CTE " + | ||
"relation more than once, even if it's nondeterministic.") | ||
.version("4.0.0") | ||
.booleanConf | ||
cloud-fan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.createWithDefault(false) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need version here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good catch! |
||
|
||
val LEGACY_TIME_PARSER_POLICY = buildConf("spark.sql.legacy.timeParserPolicy") | ||
.internal() | ||
.doc("When LEGACY, java.text.SimpleDateFormat is used for formatting and parsing " + | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if it makes sense to assert that we have only 1 child.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It seems fine to have multiple children, we just duplicate the CTE relations. The current code does not allow it though, and go back to inline CTE.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure that it is always fine to duplicate CTE relations into multiple childrens.
For example, if we have a non-deterministic relation definition and 1-1 reference to it in 2 childrens of
CTEInChildren
and then here we duplicate the relations into the 2 childrens then I think theInlineCTE
rule will decide to inline the relation 2 times, which is not correct.But I agree with you, I don't see that this could happen now.