Skip to content

Commit

Permalink
Revert "[SPARK-23799][SQL] FilterEstimation.evaluateInSet produces de…
Browse files Browse the repository at this point in the history
…vision by zero in a case of empty table with analyzed statistics"

This reverts commit c2f4ee7.
  • Loading branch information
gatorsmile committed Apr 23, 2018
1 parent 8eb9a41 commit 1c3e820
Show file tree
Hide file tree
Showing 3 changed files with 0 additions and 43 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -388,10 +388,6 @@ case class FilterEstimation(plan: Filter) extends Logging {
val dataType = attr.dataType
var newNdv = ndv

if (ndv.toDouble == 0 || colStat.min.isEmpty || colStat.max.isEmpty) {
return Some(0.0)
}

// use [min, max] to filter the original hSet
dataType match {
case _: NumericType | BooleanType | DateType | TimestampType =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -355,17 +355,6 @@ class FilterEstimationSuite extends StatsEstimationTestBase {
expectedRowCount = 3)
}

test("evaluateInSet with all zeros") {
validateEstimatedStats(
Filter(InSet(attrString, Set(3, 4, 5)),
StatsTestPlan(Seq(attrString), 0,
AttributeMap(Seq(attrString ->
ColumnStat(distinctCount = Some(0), min = None, max = None,
nullCount = Some(0), avgLen = Some(0), maxLen = Some(0)))))),
Seq(attrString -> ColumnStat(distinctCount = Some(0))),
expectedRowCount = 0)
}

test("cint NOT IN (3, 4, 5)") {
validateEstimatedStats(
Filter(Not(InSet(attrInt, Set(3, 4, 5))), childStatsTestPlan(Seq(attrInt), 10L)),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,32 +372,4 @@ class StatisticsCollectionSuite extends StatisticsCollectionTestBase with Shared
}
}
}

test("Simple queries must be working, if CBO is turned on") {
withSQLConf(SQLConf.CBO_ENABLED.key -> "true") {
withTable("TBL1", "TBL") {
import org.apache.spark.sql.functions._
val df = spark.range(1000L).select('id,
'id * 2 as "FLD1",
'id * 12 as "FLD2",
lit("aaa") + 'id as "fld3")
df.write
.mode(SaveMode.Overwrite)
.bucketBy(10, "id", "FLD1", "FLD2")
.sortBy("id", "FLD1", "FLD2")
.saveAsTable("TBL")
sql("ANALYZE TABLE TBL COMPUTE STATISTICS ")
sql("ANALYZE TABLE TBL COMPUTE STATISTICS FOR COLUMNS ID, FLD1, FLD2, FLD3")
val df2 = spark.sql(
"""
|SELECT t1.id, t1.fld1, t1.fld2, t1.fld3
|FROM tbl t1
|JOIN tbl t2 on t1.id=t2.id
|WHERE t1.fld3 IN (-123.23,321.23)
""".stripMargin)
df2.createTempView("TBL2")
sql("SELECT * FROM tbl2 WHERE fld3 IN ('qqq', 'qwe') ").queryExecution.executedPlan
}
}
}
}

0 comments on commit 1c3e820

Please sign in to comment.