-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-31762][SQL] Fix perf regression of date/timestamp formatting in toHiveString #28582
Changes from 10 commits
2545366
b325c21
af816e6
d8b0ed4
6d8c2ae
1b2bc2f
bfae42e
3914313
864efdd
29d8e51
5222a0a
31e8ffd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -50,7 +50,10 @@ sealed trait TimestampFormatter extends Serializable { | |
@throws(classOf[DateTimeParseException]) | ||
@throws(classOf[DateTimeException]) | ||
def parse(s: String): Long | ||
|
||
def format(us: Long): String | ||
def format(ts: Timestamp): String | ||
def format(instant: Instant): String | ||
} | ||
|
||
class Iso8601TimestampFormatter( | ||
|
@@ -84,9 +87,17 @@ class Iso8601TimestampFormatter( | |
} | ||
} | ||
|
||
override def format(instant: Instant): String = { | ||
formatter.withZone(zoneId).format(instant) | ||
} | ||
|
||
override def format(us: Long): String = { | ||
val instant = DateTimeUtils.microsToInstant(us) | ||
formatter.withZone(zoneId).format(instant) | ||
format(instant) | ||
} | ||
|
||
override def format(ts: Timestamp): String = { | ||
legacyFormatter.format(ts) | ||
} | ||
} | ||
|
||
|
@@ -100,10 +111,26 @@ class Iso8601TimestampFormatter( | |
*/ | ||
class FractionTimestampFormatter(zoneId: ZoneId) | ||
extends Iso8601TimestampFormatter( | ||
"", zoneId, TimestampFormatter.defaultLocale, needVarLengthSecondFraction = false) { | ||
TimestampFormatter.defaultPattern, | ||
zoneId, | ||
TimestampFormatter.defaultLocale, | ||
needVarLengthSecondFraction = false) { | ||
|
||
@transient | ||
override protected lazy val formatter = DateTimeFormatterHelper.fractionFormatter | ||
|
||
// Converts Timestamp to string according to Hive TimestampWritable convention. | ||
// The code is borrowed from Spark 2.4 DateTimeUtils.timestampToString | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. is it really needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The reason of making this PR is to pass java.sql.Timestamp to a legacy formatter which can accept the type but the legacy formatter of fractionFormatter is SimpleDateFormat which cannot omit tailing 0.
I think so. What do you propose instead of it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's make the comment more clear:
We don't need to mention hive at all. This is just for internal consistency. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks. I updated the comment. |
||
override def format(ts: Timestamp): String = { | ||
val timestampString = ts.toString | ||
val formatted = legacyFormatter.format(ts) | ||
|
||
if (timestampString.length > 19 && timestampString.substring(19) != ".0") { | ||
formatted + timestampString.substring(19) | ||
} else { | ||
formatted | ||
} | ||
} | ||
} | ||
|
||
/** | ||
|
@@ -149,7 +176,7 @@ class LegacyFastTimestampFormatter( | |
fastDateFormat.getTimeZone, | ||
fastDateFormat.getPattern.count(_ == 'S')) | ||
|
||
def parse(s: String): SQLTimestamp = { | ||
override def parse(s: String): SQLTimestamp = { | ||
cal.clear() // Clear the calendar because it can be re-used many times | ||
if (!fastDateFormat.parse(s, new ParsePosition(0), cal)) { | ||
throw new IllegalArgumentException(s"'$s' is an invalid timestamp") | ||
|
@@ -160,12 +187,20 @@ class LegacyFastTimestampFormatter( | |
rebaseJulianToGregorianMicros(julianMicros) | ||
} | ||
|
||
def format(timestamp: SQLTimestamp): String = { | ||
override def format(timestamp: SQLTimestamp): String = { | ||
val julianMicros = rebaseGregorianToJulianMicros(timestamp) | ||
cal.setTimeInMillis(Math.floorDiv(julianMicros, MICROS_PER_SECOND) * MILLIS_PER_SECOND) | ||
cal.setMicros(Math.floorMod(julianMicros, MICROS_PER_SECOND)) | ||
fastDateFormat.format(cal) | ||
} | ||
|
||
override def format(ts: Timestamp): String = { | ||
format(fromJavaTimestamp(ts)) | ||
} | ||
|
||
override def format(instant: Instant): String = { | ||
format(instantToMicros(instant)) | ||
} | ||
} | ||
|
||
class LegacySimpleTimestampFormatter( | ||
|
@@ -187,6 +222,14 @@ class LegacySimpleTimestampFormatter( | |
override def format(us: Long): String = { | ||
sdf.format(toJavaTimestamp(us)) | ||
} | ||
|
||
override def format(ts: Timestamp): String = { | ||
sdf.format(ts) | ||
} | ||
|
||
override def format(instant: Instant): String = { | ||
format(instantToMicros(instant)) | ||
} | ||
} | ||
|
||
object LegacyDateFormats extends Enumeration { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what's wrong if we still pass the empty string as the pattern?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
empty result
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok, it's used by the legacy formatter.