Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-26503][CORE] Get rid of spark.sql.legacy.timeParser.enabled #23411

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,6 @@ package org.apache.spark.sql.catalyst.util
import java.time.{Instant, ZoneId}
import java.util.Locale

import scala.util.Try

import org.apache.commons.lang3.time.FastDateFormat

import org.apache.spark.sql.internal.SQLConf

sealed trait DateFormatter extends Serializable {
def parse(s: String): Int // returns days since epoch
def format(days: Int): String
Expand Down Expand Up @@ -56,43 +50,8 @@ class Iso8601DateFormatter(
}
}

class LegacyDateFormatter(pattern: String, locale: Locale) extends DateFormatter {
@transient
private lazy val format = FastDateFormat.getInstance(pattern, locale)

override def parse(s: String): Int = {
val milliseconds = format.parse(s).getTime
DateTimeUtils.millisToDays(milliseconds)
}

override def format(days: Int): String = {
val date = DateTimeUtils.toJavaDate(days)
format.format(date)
}
}

class LegacyFallbackDateFormatter(
pattern: String,
locale: Locale) extends LegacyDateFormatter(pattern, locale) {
override def parse(s: String): Int = {
Try(super.parse(s)).orElse {
// If it fails to parse, then tries the way used in 2.0 and 1.x for backwards
// compatibility.
Try(DateTimeUtils.millisToDays(DateTimeUtils.stringToTime(s).getTime))
}.getOrElse {
// In Spark 1.5.0, we store the data as number of days since epoch in string.
// So, we just convert it to Int.
s.toInt
}
}
}

object DateFormatter {
def apply(format: String, locale: Locale): DateFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackDateFormatter(format, locale)
} else {
new Iso8601DateFormatter(format, locale)
}
new Iso8601DateFormatter(format, locale)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,6 @@ import java.time.format.DateTimeParseException
import java.time.temporal.TemporalQueries
import java.util.{Locale, TimeZone}

import scala.util.Try

import org.apache.commons.lang3.time.FastDateFormat

import org.apache.spark.sql.internal.SQLConf

sealed trait TimestampFormatter extends Serializable {
/**
* Parses a timestamp in a string and converts it to microseconds.
Expand Down Expand Up @@ -79,37 +73,8 @@ class Iso8601TimestampFormatter(
}
}

class LegacyTimestampFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends TimestampFormatter {
@transient
private lazy val format = FastDateFormat.getInstance(pattern, timeZone, locale)

protected def toMillis(s: String): Long = format.parse(s).getTime

override def parse(s: String): Long = toMillis(s) * DateTimeUtils.MICROS_PER_MILLIS

override def format(us: Long): String = {
format.format(DateTimeUtils.toJavaTimestamp(us))
}
}

class LegacyFallbackTimestampFormatter(
pattern: String,
timeZone: TimeZone,
locale: Locale) extends LegacyTimestampFormatter(pattern, timeZone, locale) {
override def toMillis(s: String): Long = {
Try {super.toMillis(s)}.getOrElse(DateTimeUtils.stringToTime(s).getTime)
}
}

object TimestampFormatter {
def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = {
if (SQLConf.get.legacyTimeParserEnabled) {
new LegacyFallbackTimestampFormatter(format, timeZone, locale)
} else {
new Iso8601TimestampFormatter(format, timeZone, locale)
}
new Iso8601TimestampFormatter(format, timeZone, locale)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1632,13 +1632,6 @@ object SQLConf {
"a SparkConf entry.")
.booleanConf
.createWithDefault(true)

val LEGACY_TIME_PARSER_ENABLED = buildConf("spark.sql.legacy.timeParser.enabled")
.doc("When set to true, java.text.SimpleDateFormat is used for formatting and parsing " +
" dates/timestamps in a locale-sensitive manner. When set to false, classes from " +
"java.time.* packages are used for the same purpose.")
.booleanConf
.createWithDefault(false)
}

/**
Expand Down Expand Up @@ -2064,8 +2057,6 @@ class SQLConf extends Serializable with Logging {
def setCommandRejectsSparkCoreConfs: Boolean =
getConf(SQLConf.SET_COMMAND_REJECTS_SPARK_CORE_CONFS)

def legacyTimeParserEnabled: Boolean = getConf(SQLConf.LEGACY_TIME_PARSER_ENABLED)

/** ********************** SQLConf functionality methods ************ */

/** Set Spark SQL configuration properties. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ import com.fasterxml.jackson.core.JsonFactory

import org.apache.spark.SparkFunSuite
import org.apache.spark.sql.catalyst.plans.SQLHelper
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._

class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
Expand All @@ -43,60 +42,44 @@ class JsonInferSchemaSuite extends SparkFunSuite with SQLHelper {
}

test("inferring timestamp type") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkTimestampType("yyyy", """{"a": "2018"}""")
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"""{"a": "2018-12-02T21:04:00.123"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
}
}
checkTimestampType("yyyy", """{"a": "2018"}""")
checkTimestampType("yyyy=MM", """{"a": "2018=12"}""")
checkTimestampType("yyyy MM dd", """{"a": "2018 12 02"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSS",
"""{"a": "2018-12-02T21:04:00.123"}""")
checkTimestampType(
"yyyy-MM-dd'T'HH:mm:ss.SSSSSSXXX",
"""{"a": "2018-12-02T21:04:00.123567+01:00"}""")
}

test("prefer decimals over timestamps") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map(
"prefersDecimal" -> "true",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = DecimalType(17, 9)
)
}
}
checkType(
options = Map(
"prefersDecimal" -> "true",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = DecimalType(17, 9)
)
}

test("skip decimal type inferring") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map(
"prefersDecimal" -> "false",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = TimestampType
)
}
}
checkType(
options = Map(
"prefersDecimal" -> "false",
"timestampFormat" -> "yyyyMMdd.HHmmssSSS"
),
json = """{"a": "20181202.210400123"}""",
dt = TimestampType
)
}

test("fallback to string type") {
Seq(true, false).foreach { legacyParser =>
withSQLConf(SQLConf.LEGACY_TIME_PARSER_ENABLED.key -> legacyParser.toString) {
checkType(
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
json = """{"a": "20181202.210400123"}""",
dt = StringType
)
}
}
checkType(
options = Map("timestampFormat" -> "yyyy,MM,dd.HHmmssSSS"),
json = """{"a": "20181202.210400123"}""",
dt = StringType
)
}
}
Loading