-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-26402][SQL] Accessing nested fields with different cases in case insensitive mode #23353
Changes from 8 commits
43351df
a5998bb
4f21a36
5f1cc66
a22d13e
cd14e14
2a4ec20
5273c3c
81f5e5e
82fa2e1
1ce6487
f7a64cf
e1da199
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,7 @@ package org.apache.spark.sql.catalyst.expressions | |
* | ||
* The following rules are applied: | ||
* - Names and nullability hints for [[org.apache.spark.sql.types.DataType]]s are stripped. | ||
* - Names for [[org.apache.spark.sql.catalyst.expressions.GetStructField]] are stripped. | ||
* - Commutative and associative operations ([[Add]] and [[Multiply]]) have their children ordered | ||
* by `hashCode`. | ||
* - [[EqualTo]] and [[EqualNullSafe]] are reordered by `hashCode`. | ||
|
@@ -37,10 +38,11 @@ object Canonicalize { | |
expressionReorder(ignoreNamesTypes(e)) | ||
} | ||
|
||
/** Remove names and nullability from types. */ | ||
/** Remove names and nullability from types, and names from `GetStructField`. */ | ||
private[expressions] def ignoreNamesTypes(e: Expression): Expression = e match { | ||
case a: AttributeReference => | ||
AttributeReference("none", a.dataType.asNullable)(exprId = a.exprId) | ||
case GetStructField(child, ordinal, Some(_)) => GetStructField(child, ordinal, None) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It looks not precisely matched the comments of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I can change it to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The comment of
It's also needed to be update. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Actually after this change it is not only for types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I re-wrote it a bit. Should look okay now. |
||
case _ => e | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,10 +18,19 @@ | |
package org.apache.spark.sql.catalyst.expressions | ||
|
||
import org.apache.spark.SparkFunSuite | ||
import org.apache.spark.sql.catalyst.dsl.expressions._ | ||
import org.apache.spark.sql.catalyst.dsl.plans._ | ||
import org.apache.spark.sql.catalyst.plans.logical.Range | ||
import org.apache.spark.sql.catalyst.optimizer._ | ||
import org.apache.spark.sql.catalyst.plans.PlanTest | ||
import org.apache.spark.sql.catalyst.plans.logical.{LocalRelation, LogicalPlan, Range} | ||
import org.apache.spark.sql.catalyst.rules.RuleExecutor | ||
import org.apache.spark.sql.types.{IntegerType, StructField, StructType} | ||
|
||
class CanonicalizeSuite extends SparkFunSuite { | ||
class CanonicalizeSuite extends SparkFunSuite with ExpressionEvalHelper with PlanTest { | ||
|
||
object Optimize extends RuleExecutor[LogicalPlan] { | ||
val batches = Batch("SimplifyBinaryComparison", Once, SimplifyBinaryComparison) :: Nil | ||
} | ||
|
||
test("SPARK-24276: IN expression with different order are semantically equal") { | ||
val range = Range(1, 1, 1, 1) | ||
|
@@ -50,4 +59,43 @@ class CanonicalizeSuite extends SparkFunSuite { | |
assert(range.where(arrays1).sameResult(range.where(arrays2))) | ||
assert(!range.where(arrays1).sameResult(range.where(arrays3))) | ||
} | ||
|
||
test("SPARK-26402: GetStructField with different names are semantically equal") { | ||
val expId = NamedExpression.newExprId | ||
val qualifier = Seq.empty[String] | ||
val structType = StructType( | ||
StructField("a", StructType(StructField("b", IntegerType, false) :: Nil), false) :: Nil) | ||
|
||
val fieldB1 = GetStructField( | ||
AttributeReference("data1", structType, false)(expId, qualifier), | ||
0, Some("b1")) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry for nit-picking. This should be There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. val fieldA1 = GetStructField(
AttributeReference("data1", structType, false)(expId, qualifier),
0, Some("a1"))
val fieldA2 = GetStructField(
AttributeReference("data2", structType, false)(expId, qualifier),
0, Some("a2"))
assert(fieldA1.semanticEquals(fieldA2))
val fieldB1 = GetStructField(
GetStructField(
AttributeReference("data1", structType, false)(expId, qualifier),
0, Some("a1")),
0, Some("b1"))
val fieldB2 = GetStructField(
GetStructField(
AttributeReference("data2", structType, false)(expId, qualifier),
0, Some("a2")),
0, Some("b2"))
assert(fieldB1.semanticEquals(fieldB2)) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @dongjoon-hyun I put the ordering wrong. Addressed as you suggested. Thanks! |
||
val fieldB2 = GetStructField( | ||
AttributeReference("data2", structType, false)(expId, qualifier), | ||
0, Some("b2")) | ||
assert(fieldB1.semanticEquals(fieldB2)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line will fail to build. |
||
|
||
val fieldA1 = GetStructField( | ||
GetStructField( | ||
AttributeReference("data1", structType, false)(expId, qualifier), | ||
0, Some("a1")), | ||
0, Some("b1")) | ||
val fieldA2 = GetStructField( | ||
GetStructField( | ||
AttributeReference("data2", structType, false)(expId, qualifier), | ||
0, Some("a2")), | ||
0, Some("b2")) | ||
assert(fieldA1.semanticEquals(fieldA2)) | ||
|
||
val testRelation = LocalRelation('a.int) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not a real end-to-end test... How about add the following test to SQLQuerySuite?
currently it fials with
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one makes sense, and is addressed by this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. then can we remove this part? i.e. code between L89 to L99 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This test can be a part of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm curious that is that removed too when case sensitive mode is turned on? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It will fail at name resolution. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @viirya I added a test to show in case insensitive mode, it will fail. |
||
|
||
val originalQuery = | ||
LocalRelation('a.int) | ||
.where(EqualTo(fieldA1, fieldA2)) | ||
.analyze | ||
|
||
val optimized = Optimize.execute(originalQuery) | ||
val correctAnswer = testRelation.where(Literal.TrueLiteral).analyze | ||
|
||
comparePlans(optimized, correctAnswer) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2937,6 +2937,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { | |
} | ||
} | ||
} | ||
|
||
test("SPARK-26402: GetStructField with different names are semantically equal") { | ||
withTable("t") { | ||
sql("create table t (s struct<i: Int>) using json") | ||
sql("select s.I from t group by s.i") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's a good practice to always check the result, how about There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that's better. |
||
} | ||
} | ||
} | ||
|
||
case class Foo(bar: Option[String]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[[org.apache.spark.sql.catalyst.expressions.GetStructField]]
->[[GetStructField]]
?GetStructField
is in the same package.