From 037120b2b67dcb7c145211f74bf0b1d996d99ae7 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 28 Jun 2017 15:30:09 -0700 Subject: [PATCH 1/9] [SPARK-21247][SQL] Allow case-insensitive type equality in Set operation --- .../sql/catalyst/analysis/TypeCoercion.scala | 7 ++++ .../org/apache/spark/sql/types/DataType.scala | 7 +++- .../catalyst/analysis/TypeCoercionSuite.scala | 36 ++++++++++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 19 ++++++++++ 4 files changed, 67 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 9ffe646b5e4ec..f435b40adbf83 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.catalyst.analysis +import java.util.Locale import javax.annotation.Nullable import scala.annotation.tailrec @@ -145,6 +146,12 @@ object TypeCoercion { .orElse((t1, t2) match { case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || containsNull2)) + case (st1 @ StructType(fields1), st2 @ StructType(fields2)) if st1.sameType(st2) => + Some(StructType(fields1.zip(fields2).map { case (sf1, sf2) => + val name = if (sf1.name == sf2.name) sf1.name else sf1.name.toLowerCase(Locale.ROOT) + val dataType = findWiderTypeForTwo(sf1.dataType, sf2.dataType).get + StructField(name, dataType, nullable = sf1.nullable || sf2.nullable) + })) case _ => None }) } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala index 30745c6a9d42a..d6e0df12218ad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/DataType.scala @@ -26,6 +26,7 @@ import org.json4s.jackson.JsonMethods._ import org.apache.spark.annotation.InterfaceStability import org.apache.spark.sql.catalyst.expressions.Expression +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.util.Utils /** @@ -80,7 +81,11 @@ abstract class DataType extends AbstractDataType { * (`StructField.nullable`, `ArrayType.containsNull`, and `MapType.valueContainsNull`). */ private[spark] def sameType(other: DataType): Boolean = - DataType.equalsIgnoreNullability(this, other) + if (SQLConf.get.caseSensitiveAnalysis) { + DataType.equalsIgnoreNullability(this, other) + } else { + DataType.equalsIgnoreCaseAndNullability(this, other) + } /** * Returns the same data type but set all nullability fields are true diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index d62e3b6dfe34f..df8ff255a2eec 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -387,7 +387,7 @@ class TypeCoercionSuite extends AnalysisTest { widenTest(ArrayType(IntegerType), StructType(Seq()), None) } - test("wider common type for decimal and array") { + test("wider common type for decimal/array/struct") { def widenTestWithStringPromotion( t1: DataType, t2: DataType, @@ -439,6 +439,40 @@ class TypeCoercionSuite extends AnalysisTest { ArrayType(LongType), ArrayType(StringType), Some(ArrayType(StringType))) widenTestWithStringPromotion( ArrayType(StringType), ArrayType(TimestampType), Some(ArrayType(StringType))) + + // StructType does not widen the types, but supports case-sensitive options. + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType))), + StructType(Seq(StructField("b", IntegerType))), + None) + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = false))))) + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType))), + StructType(Seq(StructField("A", IntegerType))), + None) + } + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), + StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), + Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) + } } private def ruleTest(rule: Rule[LogicalPlan], initial: Expression, transformed: Expression) { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 93a7777b70b46..2086959333b30 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2646,6 +2646,25 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } + test("SPARK-21247: Allow case-insensitive type equality in Set operation") { + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))") + sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))") + } + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + val m1 = intercept[AnalysisException] { + sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))") + }.message + assert(m1.contains("Union can only be performed on tables with the compatible column types")) + + val m2 = intercept[AnalysisException] { + sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))") + }.message + assert(m2.contains("Except can only be performed on tables with the compatible column types")) + } + } + test("SPARK-21335: support un-aliased subquery") { withTempView("v") { Seq(1 -> "a").toDF("i", "j").createOrReplaceTempView("v") From cded71940a7918bdb155e34d06af0cccf9c3362b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 6 Jul 2017 23:50:59 -0700 Subject: [PATCH 2/9] Add comments. --- .../org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index f435b40adbf83..fe7bda1e621f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -148,6 +148,10 @@ object TypeCoercion { findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || containsNull2)) case (st1 @ StructType(fields1), st2 @ StructType(fields2)) if st1.sameType(st2) => Some(StructType(fields1.zip(fields2).map { case (sf1, sf2) => + // Since `st1.sameType(st2)` is true, two StructTypes have the same DataType + // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. + // - Different names: use a lower case name because findWiderTypeForTwo is commutative. + // - Different nullabilities: `nullable` is true iff one of them is nullable. val name = if (sf1.name == sf2.name) sf1.name else sf1.name.toLowerCase(Locale.ROOT) val dataType = findWiderTypeForTwo(sf1.dataType, sf2.dataType).get StructField(name, dataType, nullable = sf1.nullable || sf2.nullable) From 180e1b3d058c7dad98daf494401b4d5fd68a3b9b Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 8 Jul 2017 10:34:50 -0700 Subject: [PATCH 3/9] Address comments. --- .../spark/sql/catalyst/analysis/TypeCoercion.scala | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index fe7bda1e621f2..2c701ef888143 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -181,6 +181,17 @@ object TypeCoercion { case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => findWiderTypeWithoutStringPromotionForTwo(et1, et2) .map(ArrayType(_, containsNull1 || containsNull2)) + case (st1 @ StructType(fields1), st2 @ StructType(fields2)) if st1.sameType(st2) => + Some(StructType(fields1.zip(fields2).map { case (sf1, sf2) => + // Since `st1.sameType(st2)` is true, two StructTypes have the same DataType + // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. + // - Different names: use a lower case name because + // findWiderTypeWithoutStringPromotionForTwo is commutative. + // - Different nullabilities: `nullable` is true iff one of them is nullable. + val name = if (sf1.name == sf2.name) sf1.name else sf1.name.toLowerCase(Locale.ROOT) + val dataType = findWiderTypeWithoutStringPromotionForTwo(sf1.dataType, sf2.dataType).get + StructField(name, dataType, nullable = sf1.nullable || sf2.nullable) + })) case _ => None }) } From 2343d9781b527e8cef0ec5cc3641b9ca2ec9b4af Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sat, 8 Jul 2017 11:02:57 -0700 Subject: [PATCH 4/9] Add test case. --- .../catalyst/analysis/TypeCoercionSuite.scala | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index df8ff255a2eec..cc6af1db54831 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -445,6 +445,15 @@ class TypeCoercionSuite extends AnalysisTest { StructType(Seq(StructField("a", IntegerType))), StructType(Seq(StructField("b", IntegerType))), None) + widenTestWithStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", DoubleType, nullable = false))), + None) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", DoubleType, nullable = false))), + None) + widenTestWithStringPromotion( StructType(Seq(StructField("a", IntegerType, nullable = false))), StructType(Seq(StructField("a", IntegerType, nullable = false))), @@ -461,17 +470,43 @@ class TypeCoercionSuite extends AnalysisTest { StructType(Seq(StructField("a", IntegerType, nullable = true))), StructType(Seq(StructField("a", IntegerType, nullable = true))), Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = false))))) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { widenTestWithStringPromotion( StructType(Seq(StructField("a", IntegerType))), StructType(Seq(StructField("A", IntegerType))), None) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType))), + StructType(Seq(StructField("A", IntegerType))), + None) } withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { widenTestWithStringPromotion( StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) + widenTestWithoutStringPromotion( + StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), + StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), + Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) } } From 8fa884e112294f4db2f86e3327169838e459771a Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 30 Jul 2017 13:40:16 -0700 Subject: [PATCH 5/9] Move to findTightestCommonType. --- .../sql/catalyst/analysis/TypeCoercion.scala | 32 +++++++------------ 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 2c701ef888143..05eb34ec91232 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -101,6 +101,17 @@ object TypeCoercion { case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) => Some(TimestampType) + case (t1 @ StructType(fields1), t2 @ StructType(fields2)) if t1.sameType(t2) => + Some(StructType(fields1.zip(fields2).map { case (f1, f2) => + // Since `t1.sameType(t2)` is true, two StructTypes have the same DataType + // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. + // - Different names: use a lower case name because findTightestCommonType is commutative. + // - Different nullabilities: `nullable` is true iff one of them is nullable. + val name = if (f1.name == f2.name) f1.name else f1.name.toLowerCase(Locale.ROOT) + val dataType = findTightestCommonType(f1.dataType, f2.dataType).get + StructField(name, dataType, nullable = f1.nullable || f2.nullable) + })) + case _ => None } @@ -146,16 +157,6 @@ object TypeCoercion { .orElse((t1, t2) match { case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => findWiderTypeForTwo(et1, et2).map(ArrayType(_, containsNull1 || containsNull2)) - case (st1 @ StructType(fields1), st2 @ StructType(fields2)) if st1.sameType(st2) => - Some(StructType(fields1.zip(fields2).map { case (sf1, sf2) => - // Since `st1.sameType(st2)` is true, two StructTypes have the same DataType - // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. - // - Different names: use a lower case name because findWiderTypeForTwo is commutative. - // - Different nullabilities: `nullable` is true iff one of them is nullable. - val name = if (sf1.name == sf2.name) sf1.name else sf1.name.toLowerCase(Locale.ROOT) - val dataType = findWiderTypeForTwo(sf1.dataType, sf2.dataType).get - StructField(name, dataType, nullable = sf1.nullable || sf2.nullable) - })) case _ => None }) } @@ -181,17 +182,6 @@ object TypeCoercion { case (ArrayType(et1, containsNull1), ArrayType(et2, containsNull2)) => findWiderTypeWithoutStringPromotionForTwo(et1, et2) .map(ArrayType(_, containsNull1 || containsNull2)) - case (st1 @ StructType(fields1), st2 @ StructType(fields2)) if st1.sameType(st2) => - Some(StructType(fields1.zip(fields2).map { case (sf1, sf2) => - // Since `st1.sameType(st2)` is true, two StructTypes have the same DataType - // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. - // - Different names: use a lower case name because - // findWiderTypeWithoutStringPromotionForTwo is commutative. - // - Different nullabilities: `nullable` is true iff one of them is nullable. - val name = if (sf1.name == sf2.name) sf1.name else sf1.name.toLowerCase(Locale.ROOT) - val dataType = findWiderTypeWithoutStringPromotionForTwo(sf1.dataType, sf2.dataType).get - StructField(name, dataType, nullable = sf1.nullable || sf2.nullable) - })) case _ => None }) } From b2d13382310d9d53811d47434d8262ad371a4456 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 30 Jul 2017 13:53:34 -0700 Subject: [PATCH 6/9] Move testcase, too. --- .../catalyst/analysis/TypeCoercionSuite.scala | 110 +++++++----------- 1 file changed, 40 insertions(+), 70 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index cc6af1db54831..7a6f9e76f8a26 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -385,9 +385,48 @@ class TypeCoercionSuite extends AnalysisTest { widenTest(NullType, StructType(Seq()), Some(StructType(Seq()))) widenTest(StringType, MapType(IntegerType, StringType, true), None) widenTest(ArrayType(IntegerType), StructType(Seq()), None) + + widenTest( + StructType(Seq(StructField("a", IntegerType))), + StructType(Seq(StructField("b", IntegerType))), + None) + widenTest( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", DoubleType, nullable = false))), + None) + + widenTest( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = false))))) + widenTest( + StructType(Seq(StructField("a", IntegerType, nullable = false))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTest( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = false))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + widenTest( + StructType(Seq(StructField("a", IntegerType, nullable = true))), + StructType(Seq(StructField("a", IntegerType, nullable = true))), + Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) + + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { + widenTest( + StructType(Seq(StructField("a", IntegerType))), + StructType(Seq(StructField("A", IntegerType))), + None) + } + withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { + widenTest( + StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), + StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), + Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) + } } - test("wider common type for decimal/array/struct") { + test("wider common type for decimal and array") { def widenTestWithStringPromotion( t1: DataType, t2: DataType, @@ -439,75 +478,6 @@ class TypeCoercionSuite extends AnalysisTest { ArrayType(LongType), ArrayType(StringType), Some(ArrayType(StringType))) widenTestWithStringPromotion( ArrayType(StringType), ArrayType(TimestampType), Some(ArrayType(StringType))) - - // StructType does not widen the types, but supports case-sensitive options. - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType))), - StructType(Seq(StructField("b", IntegerType))), - None) - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", DoubleType, nullable = false))), - None) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", DoubleType, nullable = false))), - None) - - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", IntegerType, nullable = false))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = false))))) - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", IntegerType, nullable = true))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = true))), - StructType(Seq(StructField("a", IntegerType, nullable = false))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = true))), - StructType(Seq(StructField("a", IntegerType, nullable = true))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", IntegerType, nullable = false))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = false))))) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = false))), - StructType(Seq(StructField("a", IntegerType, nullable = true))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = true))), - StructType(Seq(StructField("a", IntegerType, nullable = false))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType, nullable = true))), - StructType(Seq(StructField("a", IntegerType, nullable = true))), - Some(StructType(Seq(StructField("a", IntegerType, nullable = true))))) - - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType))), - StructType(Seq(StructField("A", IntegerType))), - None) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType))), - StructType(Seq(StructField("A", IntegerType))), - None) - } - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - widenTestWithStringPromotion( - StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), - StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), - Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) - widenTestWithoutStringPromotion( - StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), - StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), - Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) - } } private def ruleTest(rule: Rule[LogicalPlan], initial: Expression, transformed: Expression) { From c72aa18ccbf23053a06faa35deda77ff1135b818 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 6 Oct 2017 12:57:25 -0700 Subject: [PATCH 7/9] Add test cases. --- .../org/apache/spark/sql/SQLQuerySuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 2086959333b30..100bedee3d16e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2650,6 +2650,12 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))") sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))") + + withTable("struct1", "struct2") { + sql("CREATE TABLE struct1(a struct) USING parquet") + sql("CREATE TABLE struct2(A struct) USING parquet") + checkAnswer(sql("SELECT * FROM struct1, struct2 WHERE struct1.a = struct2.A"), Seq.empty) + } } withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { @@ -2662,6 +2668,15 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))") }.message assert(m2.contains("Except can only be performed on tables with the compatible column types")) + + withTable("struct1", "struct2") { + sql("CREATE TABLE struct1(a struct) USING parquet") + sql("CREATE TABLE struct2(A struct) USING parquet") + val m = intercept[AnalysisException] { + sql("SELECT * FROM struct1, struct2 WHERE a = A") + }.message + assert(m.contains("cannot resolve '(struct1.`a` = struct2.`A`)' due to data type mismatch")) + } } } From 67a037c053e596432f83dc0e2383bad895e9ce21 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 6 Oct 2017 14:08:15 -0700 Subject: [PATCH 8/9] Update test cases. --- .../org/apache/spark/sql/SQLQuerySuite.scala | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 100bedee3d16e..f0c58e2e5bf45 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2651,10 +2651,13 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))") sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))") - withTable("struct1", "struct2") { - sql("CREATE TABLE struct1(a struct) USING parquet") - sql("CREATE TABLE struct2(A struct) USING parquet") - checkAnswer(sql("SELECT * FROM struct1, struct2 WHERE struct1.a = struct2.A"), Seq.empty) + withTable("t", "S") { + sql("CREATE TABLE t(c struct) USING parquet") + sql("CREATE TABLE S(C struct) USING parquet") + Seq(("c", "C"), ("C", "c"), ("c.f", "C.F"), ("C.F", "c.f")).foreach { + case (left, right) => + checkAnswer(sql(s"SELECT * FROM t, S WHERE t.$left = S.$right"), Seq.empty) + } } } @@ -2669,13 +2672,14 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { }.message assert(m2.contains("Except can only be performed on tables with the compatible column types")) - withTable("struct1", "struct2") { - sql("CREATE TABLE struct1(a struct) USING parquet") - sql("CREATE TABLE struct2(A struct) USING parquet") + withTable("t", "S") { + sql("CREATE TABLE t(c struct) USING parquet") + sql("CREATE TABLE S(C struct) USING parquet") + checkAnswer(sql("SELECT * FROM t, S WHERE t.c.f = S.C.F"), Seq.empty) val m = intercept[AnalysisException] { - sql("SELECT * FROM struct1, struct2 WHERE a = A") + sql("SELECT * FROM t, S WHERE c = C") }.message - assert(m.contains("cannot resolve '(struct1.`a` = struct2.`A`)' due to data type mismatch")) + assert(m.contains("cannot resolve '(t.`c` = S.`C`)' due to data type mismatch")) } } } From 52d19d36bb7f704ca79aa398add03393860d69c2 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Wed, 11 Oct 2017 13:02:58 -0700 Subject: [PATCH 9/9] Address comments. --- .../sql/catalyst/analysis/TypeCoercion.scala | 6 ++---- .../catalyst/analysis/TypeCoercionSuite.scala | 17 +++++++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala index 05eb34ec91232..532d22dbf2321 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.analysis -import java.util.Locale import javax.annotation.Nullable import scala.annotation.tailrec @@ -105,11 +104,10 @@ object TypeCoercion { Some(StructType(fields1.zip(fields2).map { case (f1, f2) => // Since `t1.sameType(t2)` is true, two StructTypes have the same DataType // except `name` (in case of `spark.sql.caseSensitive=false`) and `nullable`. - // - Different names: use a lower case name because findTightestCommonType is commutative. + // - Different names: use f1.name // - Different nullabilities: `nullable` is true iff one of them is nullable. - val name = if (f1.name == f2.name) f1.name else f1.name.toLowerCase(Locale.ROOT) val dataType = findTightestCommonType(f1.dataType, f2.dataType).get - StructField(name, dataType, nullable = f1.nullable || f2.nullable) + StructField(f1.name, dataType, nullable = f1.nullable || f2.nullable) })) case _ => None diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala index 7a6f9e76f8a26..793e04f66f0f9 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala @@ -131,14 +131,17 @@ class TypeCoercionSuite extends AnalysisTest { widenFunc: (DataType, DataType) => Option[DataType], t1: DataType, t2: DataType, - expected: Option[DataType]): Unit = { + expected: Option[DataType], + isSymmetric: Boolean = true): Unit = { var found = widenFunc(t1, t2) assert(found == expected, s"Expected $expected as wider common type for $t1 and $t2, found $found") // Test both directions to make sure the widening is symmetric. - found = widenFunc(t2, t1) - assert(found == expected, - s"Expected $expected as wider common type for $t2 and $t1, found $found") + if (isSymmetric) { + found = widenFunc(t2, t1) + assert(found == expected, + s"Expected $expected as wider common type for $t2 and $t1, found $found") + } } test("implicit type cast - ByteType") { @@ -419,10 +422,12 @@ class TypeCoercionSuite extends AnalysisTest { None) } withSQLConf(SQLConf.CASE_SENSITIVE.key -> "false") { - widenTest( + checkWidenType( + TypeCoercion.findTightestCommonType, StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType))), StructType(Seq(StructField("A", IntegerType), StructField("b", IntegerType))), - Some(StructType(Seq(StructField("a", IntegerType), StructField("b", IntegerType))))) + Some(StructType(Seq(StructField("a", IntegerType), StructField("B", IntegerType)))), + isSymmetric = false) } }