-
Notifications
You must be signed in to change notification settings - Fork 28.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-26456][SQL] Cast date/timestamp to string by Date/TimestampFormatter #23391
Changes from all commits
e09c972
697688a
fcffc12
f0a9fe7
5f0b0a3
17a32a3
9d90d3f
038ad80
f6308f6
9f85ac6
d348c07
36c9f9a
4580c25
56bdae4
8541602
ecf0e89
c3066e1
eed40d7
94cad6a
0de72c8
1156291
5ae58a3
78c3961
2397401
206c955
483e95b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,7 @@ sealed trait TimestampFormatter extends Serializable { | |
@throws(classOf[ParseException]) | ||
@throws(classOf[DateTimeParseException]) | ||
@throws(classOf[DateTimeException]) | ||
def parse(s: String): Long // returns microseconds since epoch | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why remove this comment? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. because it duplicates method description above. |
||
def parse(s: String): Long | ||
def format(us: Long): String | ||
} | ||
|
||
|
@@ -74,7 +74,18 @@ class Iso8601TimestampFormatter( | |
} | ||
|
||
object TimestampFormatter { | ||
val defaultPattern: String = "yyyy-MM-dd HH:mm:ss" | ||
val defaultLocale: Locale = Locale.US | ||
|
||
def apply(format: String, timeZone: TimeZone, locale: Locale): TimestampFormatter = { | ||
new Iso8601TimestampFormatter(format, timeZone, locale) | ||
} | ||
|
||
def apply(format: String, timeZone: TimeZone): TimestampFormatter = { | ||
apply(format, timeZone, defaultLocale) | ||
} | ||
|
||
def apply(timeZone: TimeZone): TimestampFormatter = { | ||
apply(defaultPattern, timeZone, defaultLocale) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ import java.nio.charset.StandardCharsets | |
import java.sql.{Date, Timestamp} | ||
|
||
import org.apache.spark.sql.Row | ||
import org.apache.spark.sql.catalyst.util.DateTimeUtils | ||
import org.apache.spark.sql.catalyst.util.{DateFormatter, DateTimeUtils, TimestampFormatter} | ||
import org.apache.spark.sql.execution.command.{DescribeTableCommand, ExecutedCommandExec, ShowTablesCommand} | ||
import org.apache.spark.sql.internal.SQLConf | ||
import org.apache.spark.sql.types._ | ||
|
@@ -77,6 +77,10 @@ object HiveResult { | |
TimestampType, | ||
BinaryType) | ||
|
||
private lazy val dateFormatter = DateFormatter() | ||
private lazy val timestampFormatter = TimestampFormatter( | ||
DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone)) | ||
|
||
/** Hive outputs fields of structs slightly differently than top level attributes. */ | ||
private def toHiveStructString(a: (Any, DataType)): String = a match { | ||
case (struct: Row, StructType(fields)) => | ||
|
@@ -111,11 +115,9 @@ object HiveResult { | |
toHiveStructString((key, kType)) + ":" + toHiveStructString((value, vType)) | ||
}.toSeq.sorted.mkString("{", ",", "}") | ||
case (null, _) => "NULL" | ||
case (d: Date, DateType) => | ||
DateTimeUtils.dateToString(DateTimeUtils.fromJavaDate(d)) | ||
case (d: Date, DateType) => dateFormatter.format(DateTimeUtils.fromJavaDate(d)) | ||
case (t: Timestamp, TimestampType) => | ||
val timeZone = DateTimeUtils.getTimeZone(SQLConf.get.sessionLocalTimeZone) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @MaxGekk @cloud-fan by moving getting the timezone from here to a lazy val in the object, it will be initialized only once by the first session that uses it. Another session with a different sessionLocalTimeZone set will get results in wrong timezone. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @juliuszsompolski Thank you for the bug report. I will fix the issue. I think it is ok to create formatters in place because they can be pulled from caches. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. good point! There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here is the PR #28024 |
||
DateTimeUtils.timestampToString(DateTimeUtils.fromJavaTimestamp(t), timeZone) | ||
DateTimeUtils.timestampToString(timestampFormatter, DateTimeUtils.fromJavaTimestamp(t)) | ||
case (bin: Array[Byte], BinaryType) => new String(bin, StandardCharsets.UTF_8) | ||
case (decimal: java.math.BigDecimal, DecimalType()) => formatDecimal(decimal) | ||
case (interval, CalendarIntervalType) => interval.toString | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Both formatters seems to use thread safe implementations. You could consider just returning cached instances here.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At the moment, both formatters are created per partition at least not per row. Do you think it makes sense to cache them?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, lets leave it for now.