From 2e8f6ca4342713da88392bb9b0b9d5221e7ad406 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 7 May 2015 14:55:18 +0800 Subject: [PATCH 1/9] Update Optimizer.scala --- .../apache/spark/sql/catalyst/optimizer/Optimizer.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index e4a60f53d6c09..37456f9e6e72c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -293,11 +293,13 @@ object ConstantFolding extends Rule[LogicalPlan] { // Fold expressions that are foldable. case e if e.foldable => Literal.create(e.eval(null), e.dataType) - // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. - case In(Literal(v, _), list) if list.exists { + // Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly. + case In(Literal(v, _), list) => + val isExists = list.exists { case Literal(candidate, _) if candidate == v => true case _ => false - } => Literal.create(true, BooleanType) + } + if (isExists) Literal.create(true, BooleanType) else Literal.create(false, BooleanType) } } } From 36c194e9d37f91b6e8696d76dc759de7c95eae44 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 7 May 2015 16:02:39 +0800 Subject: [PATCH 2/9] Update Optimizer.scala --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 37456f9e6e72c..6dc08606ee704 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -294,12 +294,10 @@ object ConstantFolding extends Rule[LogicalPlan] { case e if e.foldable => Literal.create(e.eval(null), e.dataType) // Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly. - case In(Literal(v, _), list) => - val isExists = list.exists { - case Literal(candidate, _) if candidate == v => true - case _ => false - } - if (isExists) Literal.create(true, BooleanType) else Literal.create(false, BooleanType) + case InSet(Literal(v, _), hSet) => { + val isExists = hSet.contains(v) + if(isExists) Literal.create(true, BooleanType) else Literal.create(false, BooleanType) + } } } } From 35ceb7aa66243b984414e66342563d45823c7b46 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 7 May 2015 19:10:14 +0800 Subject: [PATCH 3/9] Update Optimizer.scala --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 6dc08606ee704..604821227e0ff 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -46,13 +46,13 @@ object DefaultOptimizer extends Optimizer { CombineLimits) :: Batch("ConstantFolding", FixedPoint(100), NullPropagation, + OptimizeIn, ConstantFolding, LikeSimplification, BooleanSimplification, SimplifyFilters, SimplifyCasts, - SimplifyCaseConversionExpressions, - OptimizeIn) :: + SimplifyCaseConversionExpressions) :: Batch("Decimal Optimizations", FixedPoint(100), DecimalAggregates) :: Batch("LocalRelation", FixedPoint(100), @@ -293,11 +293,19 @@ object ConstantFolding extends Rule[LogicalPlan] { // Fold expressions that are foldable. case e if e.foldable => Literal.create(e.eval(null), e.dataType) - // Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly. + // Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly when all + // elements is literal. case InSet(Literal(v, _), hSet) => { val isExists = hSet.contains(v) if(isExists) Literal.create(true, BooleanType) else Literal.create(false, BooleanType) } + + // Fold "literal in (item1, item2, ..., literal, ...)" into true directly when + // not all elements is literal. + case In(Literal(v, _), list) if list.exists { + case Literal(candidate, _) if candidate == v => true + case _ => false + } => Literal.create(true, BooleanType) } } } From f4dbf50ff06006666784122e9be46879872ee512 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 7 May 2015 19:33:55 +0800 Subject: [PATCH 4/9] Update ConstantFoldingSuite.scala --- .../optimizer/ConstantFoldingSuite.scala | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 18f92150b0966..4e1ce9887f7e2 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -35,6 +35,7 @@ class ConstantFoldingSuite extends PlanTest { Batch("AnalysisNodes", Once, EliminateSubQueries) :: Batch("ConstantFolding", Once, + OptimizeIn, ConstantFolding, BooleanSimplification) :: Nil } @@ -247,4 +248,36 @@ class ConstantFoldingSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + + test("Constant folding test: Fold In(v, list) into true or false") { + var originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), Literal(2)))) + + var optimized = Optimize(originalQuery.analyze) + + var correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + + originalQuery = + testRelation + .select('a) + .where(In(Literal(1), Seq(Literal(1), 'a.attr))) + + optimized = Optimize(originalQuery.analyze) + + correctAnswer = + testRelation + .select('a) + .where(Literal(true)) + .analyze + + comparePlans(optimized, correctAnswer) + } } From 24739bd45852dd3015d4be73236d7f50d3bb4a25 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Thu, 7 May 2015 19:35:42 +0800 Subject: [PATCH 5/9] Update ConstantFoldingSuite.scala --- .../spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala index 4e1ce9887f7e2..e74138c221297 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/ConstantFoldingSuite.scala @@ -255,7 +255,7 @@ class ConstantFoldingSuite extends PlanTest { .select('a) .where(In(Literal(1), Seq(Literal(1), Literal(2)))) - var optimized = Optimize(originalQuery.analyze) + var optimized = Optimize.execute(originalQuery.analyze) var correctAnswer = testRelation @@ -270,7 +270,7 @@ class ConstantFoldingSuite extends PlanTest { .select('a) .where(In(Literal(1), Seq(Literal(1), 'a.attr))) - optimized = Optimize(originalQuery.analyze) + optimized = Optimize.execute(originalQuery.analyze) correctAnswer = testRelation From e34c28aff9382cbfab6c1122289539473f3bf426 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Fri, 8 May 2015 09:29:32 +0800 Subject: [PATCH 6/9] Update predicates.scala --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 26c38c56c04f5..7e4a49c87ea16 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -110,6 +110,7 @@ case class InSet(value: Expression, hset: Set[Any]) override def children: Seq[Expression] = value :: Nil + override def foldable: Boolean = true override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN. override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}" From fa461a5469721e3704a27df833a3a85d45b450b0 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Fri, 8 May 2015 09:30:41 +0800 Subject: [PATCH 7/9] Update Optimizer.scala --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 604821227e0ff..87b41cc82d0e8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -292,16 +292,8 @@ object ConstantFolding extends Rule[LogicalPlan] { // Fold expressions that are foldable. case e if e.foldable => Literal.create(e.eval(null), e.dataType) - - // Fold "literal in (item1, item2, ..., literal, ...)" into true or false directly when all - // elements is literal. - case InSet(Literal(v, _), hSet) => { - val isExists = hSet.contains(v) - if(isExists) Literal.create(true, BooleanType) else Literal.create(false, BooleanType) - } - // Fold "literal in (item1, item2, ..., literal, ...)" into true directly when - // not all elements is literal. + // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. case In(Literal(v, _), list) if list.exists { case Literal(candidate, _) if candidate == v => true case _ => false From abe2bbb4d088c381d6f4a782a8155c8bdbec7d14 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Fri, 8 May 2015 09:34:02 +0800 Subject: [PATCH 8/9] Update Optimizer.scala --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 87b41cc82d0e8..e10ea2dc52a0f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -292,7 +292,7 @@ object ConstantFolding extends Rule[LogicalPlan] { // Fold expressions that are foldable. case e if e.foldable => Literal.create(e.eval(null), e.dataType) - + // Fold "literal in (item1, item2, ..., literal, ...)" into true directly. case In(Literal(v, _), list) if list.exists { case Literal(candidate, _) if candidate == v => true From 4c722a23096960b4ccd2cb2d2295a5ef9ea7e741 Mon Sep 17 00:00:00 2001 From: Zhongshuai Pei <799203320@qq.com> Date: Fri, 8 May 2015 09:55:21 +0800 Subject: [PATCH 9/9] Update predicates.scala --- .../org/apache/spark/sql/catalyst/expressions/predicates.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 7e4a49c87ea16..ede05f6f754ef 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -110,7 +110,7 @@ case class InSet(value: Expression, hset: Set[Any]) override def children: Seq[Expression] = value :: Nil - override def foldable: Boolean = true + override def foldable: Boolean = value.foldable override def nullable: Boolean = true // TODO: Figure out correct nullability semantics of IN. override def toString: String = s"$value INSET ${hset.mkString("(", ",", ")")}"