Revert "[SPARK-23799][SQL] FilterEstimation.evaluateInSet produces de…

…vision by zero in a case of empty table with analyzed statistics" This reverts commit c2f4ee7.
apache · Apr 23, 2018 · 1c3e820 · 1c3e820
1 parent 8eb9a41
commit 1c3e820
Show file tree

Hide file tree

Showing 3 changed files with 0 additions and 43 deletions.
diff --git a/.../scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala b/.../scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/FilterEstimation.scala
@@ -388,10 +388,6 @@ case class FilterEstimation(plan: Filter) extends Logging {
     val dataType = attr.dataType
     var newNdv = ndv
 
-    if (ndv.toDouble == 0 || colStat.min.isEmpty || colStat.max.isEmpty)  {
-      return Some(0.0)
-    }
-
     // use [min, max] to filter the original hSet
     dataType match {
       case _: NumericType | BooleanType | DateType | TimestampType =>

diff --git a/.../src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala b/.../src/test/scala/org/apache/spark/sql/catalyst/statsEstimation/FilterEstimationSuite.scala
@@ -355,17 +355,6 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
       expectedRowCount = 3)
   }
 
-  test("evaluateInSet with all zeros") {
-    validateEstimatedStats(
-      Filter(InSet(attrString, Set(3, 4, 5)),
-        StatsTestPlan(Seq(attrString), 0,
-          AttributeMap(Seq(attrString ->
-            ColumnStat(distinctCount = Some(0), min = None, max = None,
-              nullCount = Some(0), avgLen = Some(0), maxLen = Some(0)))))),
-      Seq(attrString -> ColumnStat(distinctCount = Some(0))),
-      expectedRowCount = 0)
-  }
-
   test("cint NOT IN (3, 4, 5)") {
     validateEstimatedStats(
       Filter(Not(InSet(attrInt, Set(3, 4, 5))), childStatsTestPlan(Seq(attrInt), 10L)),

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StatisticsCollectionSuite.scala
@@ -372,32 +372,4 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
       }
     }
   }
-
-  test("Simple queries must be working, if CBO is turned on") {
-    withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
-      withTable("TBL1", "TBL") {
-        import org.apache.spark.sql.functions._
-        val df = spark.range(1000L).select('id,
-          'id * 2 as "FLD1",
-          'id * 12 as "FLD2",
-          lit("aaa") + 'id as "fld3")
-        df.write
-          .mode(SaveMode.Overwrite)
-          .bucketBy(10, "id", "FLD1", "FLD2")
-          .sortBy("id", "FLD1", "FLD2")
-          .saveAsTable("TBL")
-        sql("ANALYZE TABLE TBL COMPUTE STATISTICS ")
-        sql("ANALYZE TABLE TBL COMPUTE STATISTICS FOR COLUMNS ID, FLD1, FLD2, FLD3")
-        val df2 = spark.sql(
-          """
-             |SELECT t1.id, t1.fld1, t1.fld2, t1.fld3
-             |FROM tbl t1
-             |JOIN tbl t2 on t1.id=t2.id
-             |WHERE  t1.fld3 IN (-123.23,321.23)
-          """.stripMargin)
-        df2.createTempView("TBL2")
-        sql("SELECT * FROM tbl2 WHERE fld3 IN ('qqq', 'qwe')  ").queryExecution.executedPlan
-      }
-    }
-  }
 }