From 5eed4bceb39b350247837688c189cfdc786ddb89 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Sun, 8 Mar 2015 23:27:59 -0700 Subject: [PATCH 1/4] promote string and do widen types for IN --- .../catalyst/analysis/HiveTypeCoercion.scala | 24 +++++++++++++++++++ .../sql/catalyst/optimizer/Optimizer.scala | 4 ++-- .../org/apache/spark/sql/SQLQuerySuite.scala | 9 +++++++ 3 files changed, 35 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 35c7f00d4e42a..a967ab9270215 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -221,6 +221,22 @@ trait HiveTypeCoercion { b.makeCopy(Array(newLeft, newRight)) }.getOrElse(b) // If there is no applicable conversion, leave expression unchanged. } + + // Also widen types for InExpressions. + case q: LogicalPlan => q transformExpressions { + // Skip nodes who's children have not been resolved yet. + case e if !e.childrenResolved => e + + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => + b.map(_.dataType).foldLeft(None: Option[DataType])((r, c) => r match { + case None => Some(c) + case Some(dt) => findTightestCommonType(dt, c) + }) match { + // If there is no applicable conversion, leave expression unchanged. + case None => i.makeCopy(Array(a, b)) + case Some(dt) => i.makeCopy(Array(Cast(a, dt), b.map(Cast(_, dt)))) + } + } } } @@ -270,6 +286,14 @@ trait HiveTypeCoercion { i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) => i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) + case i @ In(a, b) if a.dataType == StringType + && b.exists(_.dataType.isInstanceOf[NumericType]) => + i.makeCopy(Array(Cast(a, DoubleType), b)) + case i @ In(a, b) if b.exists(_.dataType == StringType) + && a.dataType.isInstanceOf[NumericType] => + i.makeCopy(Array(a, b.map(_.dataType match{ + case StringType => Cast(a, DoubleType) + }))) case Sum(e) if e.dataType == StringType => Sum(Cast(e, DoubleType)) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 2d03fbfb0d311..9dc6f3c398ae1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -308,8 +308,8 @@ object OptimizeIn extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsDown { case In(v, list) if !list.exists(!_.isInstanceOf[Literal]) => - val hSet = list.map(e => e.eval(null)) - InSet(v, HashSet() ++ hSet) + val hSet = list.map(e => e.eval(null)) + InSet(v, HashSet() ++ hSet) } } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 9e02e69fda3f2..b7fd2fb37e703 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -88,6 +88,15 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { Row(1, 1) :: Nil) } + test("SPARK-6201 IN string promote") { + jsonRDD(sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}"))) + .registerTempTable("d") + + checkAnswer( + sql("select * from d where d.a in (1,2)"), + Seq(Row("1"), Row("2"))) + } + test("SPARK-3176 Added Parser of SQL ABS()") { checkAnswer( sql("SELECT ABS(-1.3)"), From f3f7baf0b1f7873e84095e61043c76913d80c1ba Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Mon, 9 Mar 2015 20:04:43 -0700 Subject: [PATCH 2/4] address comments --- .../catalyst/analysis/HiveTypeCoercion.scala | 32 +++++++++++++------ 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index a967ab9270215..78431cbacc76a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -69,6 +69,7 @@ trait HiveTypeCoercion { val typeCoercionRules = PropagateTypes :: ConvertNaNs :: + PromoteToStrings :: WidenTypes :: PromoteStrings :: DecimalPrecision :: @@ -233,7 +234,7 @@ trait HiveTypeCoercion { case Some(dt) => findTightestCommonType(dt, c) }) match { // If there is no applicable conversion, leave expression unchanged. - case None => i.makeCopy(Array(a, b)) + case None => i case Some(dt) => i.makeCopy(Array(Cast(a, dt), b.map(Cast(_, dt)))) } } @@ -286,14 +287,6 @@ trait HiveTypeCoercion { i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) case i @ In(a, b) if a.dataType == TimestampType && b.forall(_.dataType == DateType) => i.makeCopy(Array(Cast(a, StringType), b.map(Cast(_, StringType)))) - case i @ In(a, b) if a.dataType == StringType - && b.exists(_.dataType.isInstanceOf[NumericType]) => - i.makeCopy(Array(Cast(a, DoubleType), b)) - case i @ In(a, b) if b.exists(_.dataType == StringType) - && a.dataType.isInstanceOf[NumericType] => - i.makeCopy(Array(a, b.map(_.dataType match{ - case StringType => Cast(a, DoubleType) - }))) case Sum(e) if e.dataType == StringType => Sum(Cast(e, DoubleType)) @@ -304,6 +297,27 @@ trait HiveTypeCoercion { } } + /** + * Promotes strings that appear in arithmetic expressions. + */ + object PromoteToStrings extends Rule[LogicalPlan] { + def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { + // For In we need to promote numeric as strings + case i @ In(a, b) if a.dataType == StringType + && b.exists(_.dataType.isInstanceOf[NumericType]) => + i.makeCopy(Array(a, b.map(exp => exp.dataType match { + case n: NumericType => Cast(exp, StringType) + case _ => + }))) + case i @ In(a, b) if b.exists(_.dataType == StringType) + && a.dataType.isInstanceOf[NumericType] => + i.makeCopy(Array(Cast(a, StringType), b.map(exp => exp.dataType match { + case n: NumericType => Cast(exp, StringType) + case _ => + }))) + } + } + // scalastyle:off /** * Calculates and propagates precision for fixed-precision decimals. Hive has a number of From 581fa1cedbc3a8bba104572202a928b48f723b89 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 21 Apr 2015 22:56:27 -0700 Subject: [PATCH 3/4] mysql way --- .../catalyst/analysis/HiveTypeCoercion.scala | 37 +++---------------- .../org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 2 files changed, 6 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index 78431cbacc76a..ac3689f9fc608 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -69,7 +69,7 @@ trait HiveTypeCoercion { val typeCoercionRules = PropagateTypes :: ConvertNaNs :: - PromoteToStrings :: + InConversion :: WidenTypes :: PromoteStrings :: DecimalPrecision :: @@ -222,22 +222,6 @@ trait HiveTypeCoercion { b.makeCopy(Array(newLeft, newRight)) }.getOrElse(b) // If there is no applicable conversion, leave expression unchanged. } - - // Also widen types for InExpressions. - case q: LogicalPlan => q transformExpressions { - // Skip nodes who's children have not been resolved yet. - case e if !e.childrenResolved => e - - case i @ In(a, b) if b.exists(_.dataType != a.dataType) => - b.map(_.dataType).foldLeft(None: Option[DataType])((r, c) => r match { - case None => Some(c) - case Some(dt) => findTightestCommonType(dt, c) - }) match { - // If there is no applicable conversion, leave expression unchanged. - case None => i - case Some(dt) => i.makeCopy(Array(Cast(a, dt), b.map(Cast(_, dt)))) - } - } } } @@ -298,23 +282,12 @@ trait HiveTypeCoercion { } /** - * Promotes strings that appear in arithmetic expressions. + * Convert all expressions in in() list to the left operator type */ - object PromoteToStrings extends Rule[LogicalPlan] { + object InConversion extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - // For In we need to promote numeric as strings - case i @ In(a, b) if a.dataType == StringType - && b.exists(_.dataType.isInstanceOf[NumericType]) => - i.makeCopy(Array(a, b.map(exp => exp.dataType match { - case n: NumericType => Cast(exp, StringType) - case _ => - }))) - case i @ In(a, b) if b.exists(_.dataType == StringType) - && a.dataType.isInstanceOf[NumericType] => - i.makeCopy(Array(Cast(a, StringType), b.map(exp => exp.dataType match { - case n: NumericType => Cast(exp, StringType) - case _ => - }))) + case i @ In(a, b) if b.exists(_.dataType != StringType) => + i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index b7fd2fb37e703..84d70e4a1eb7d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -88,7 +88,7 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll { Row(1, 1) :: Nil) } - test("SPARK-6201 IN string promote") { + test("SPARK-6201 IN type conversion") { jsonRDD(sparkContext.parallelize(Seq("{\"a\": \"1\"}}", "{\"a\": \"2\"}}", "{\"a\": \"3\"}}"))) .registerTempTable("d") From 71e05ccae24c5c47fc38bbe79df0ac2d3b97fdd0 Mon Sep 17 00:00:00 2001 From: Daoyuan Wang Date: Tue, 21 Apr 2015 23:28:46 -0700 Subject: [PATCH 4/4] minor fix --- .../apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala index ac3689f9fc608..21d646f4299dd 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/HiveTypeCoercion.scala @@ -286,7 +286,7 @@ trait HiveTypeCoercion { */ object InConversion extends Rule[LogicalPlan] { def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case i @ In(a, b) if b.exists(_.dataType != StringType) => + case i @ In(a, b) if b.exists(_.dataType != a.dataType) => i.makeCopy(Array(a, b.map(Cast(_, a.dataType)))) } }