Skip to content

Commit

Permalink
Respect of BQ time format spec (#850)
Browse files Browse the repository at this point in the history
  • Loading branch information
RustedBones committed Oct 26, 2023
1 parent 35de174 commit eeffff2
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 36 deletions.
114 changes: 78 additions & 36 deletions bigquery/src/main/scala/magnolify/bigquery/TimestampConverter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,58 +22,100 @@ import java.time.temporal.ChronoField

// https://github.com/googleapis/java-bigquery/blob/master/google-cloud-bigquery/src/main/java/com/google/cloud/bigquery/QueryParameterValue.java
private object TimestampConverter {
// TIME
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type
// [H]H:[M]M:[S]S[.DDDDDD|.F]
private val timeFormatter = new DateTimeFormatterBuilder()
.appendValue(ChronoField.HOUR_OF_DAY, 1, 2, SignStyle.NEVER)
.appendLiteral(':')
.appendValue(ChronoField.MINUTE_OF_HOUR, 1, 2, SignStyle.NEVER)
.appendLiteral(':')
.appendValue(ChronoField.SECOND_OF_MINUTE, 1, 2, SignStyle.NEVER)
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 0, 6, true)
.optionalStart()
.toFormatter()

// DATE
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#date_type
// YYYY-[M]M-[D]D
private val dateFormatter = new DateTimeFormatterBuilder()
.appendValue(ChronoField.YEAR, 4)
.appendLiteral('-')
.appendValue(ChronoField.MONTH_OF_YEAR, 1, 2, SignStyle.NEVER)
.appendLiteral('-')
.appendValue(ChronoField.DAY_OF_MONTH, 1, 2, SignStyle.NEVER)
.toFormatter()

// TIMESTAMP
// YYYY-[M]M-[D]D[ [H]H:[M]M:[S]S[.DDDDDD]][time zone]
private val timestampFormatter =
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_type

// civil_date_part YYYY-[M]M-[D]D
private val civilDatePartFormatter = dateFormatter
// time_part { |T|t}[H]H:[M]M:[S]S[.F]
private val timePartFormatter =
new DateTimeFormatterBuilder()
.parseLenient()
.append(DateTimeFormatter.ISO_LOCAL_DATE)
.appendLiteral(' ')
.appendValue(ChronoField.HOUR_OF_DAY, 2)
.appendLiteral(':')
.appendValue(ChronoField.MINUTE_OF_HOUR, 2)
.optionalStart()
.appendLiteral(':')
.appendValue(ChronoField.SECOND_OF_MINUTE, 2)
.padNext(1)
.optionalStart()
.appendFraction(ChronoField.NANO_OF_SECOND, 6, 9, true)
.optionalStart()
.appendOffset("+HHMM", "+00:00")
.parseCaseInsensitive()
.appendLiteral('T')
.parseCaseSensitive()
.optionalEnd()
.toFormatter()
.withZone(ZoneOffset.UTC)
private val timestampValidator =
.append(timeFormatter)
.toFormatter

// time_zone_offset or utc_time_zone
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#time_zones
// {+|-}H[H][:M[M]] or {Z|z}
private val timeZoneOffsetFormatter = new DateTimeFormatterBuilder()
.parseCaseInsensitive()
.appendOffsetId()
.parseCaseSensitive()
.toFormatter

// time_zone
private val timeZoneFormatter = new DateTimeFormatterBuilder()
.appendZoneRegionId()
.toFormatter

// timestamp
// {
// civil_date_part[time_part [time_zone]] |
// civil_date_part[time_part[time_zone_offset]] |
// civil_date_part[time_part[utc_time_zone]]
// }
private val timestampFormatter =
new DateTimeFormatterBuilder()
.parseLenient()
.append(timestampFormatter)
.append(civilDatePartFormatter)
.append(timePartFormatter)
.optionalStart()
.appendOffsetId()
.append(timeZoneOffsetFormatter)
.optionalEnd()
.optionalStart()
.appendLiteral(' ')
.append(timeZoneFormatter)
.optionalEnd()
.toFormatter()
.toFormatter
.withZone(ZoneOffset.UTC)

// DATE
// YYYY-[M]M-[D]D
private val dateFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd")

// TIME
// [H]H:[M]M:[S]S[.DDDDDD]
private val timeFormatter = DateTimeFormatter.ofPattern("HH:mm:ss.SSSSSS")

// DATETIME
// YYYY-[M]M-[D]D[ [H]H:[M]M:[S]S[.DDDDDD]]
private val datetimeFormatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss.SSSSSS");
// https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types#datetime_type
// civil_date_part[time_part]
private val datetimeFormatter = new DateTimeFormatterBuilder()
.append(civilDatePartFormatter)
.appendOptional(timePartFormatter)
.toFormatter

def toInstant(v: Any): Instant = Instant.from(timestampValidator.parse(v.toString))
def fromInstant(i: Instant): Any = timestampFormatter.format(i)
def toInstant(v: Any): Instant = Instant.from(timestampFormatter.parse(v.toString))
def fromInstant(i: Instant): Any = DateTimeFormatter.ISO_INSTANT.format(i)

def toLocalDate(v: Any): LocalDate = LocalDate.from(dateFormatter.parse(v.toString))
def fromLocalDate(d: LocalDate): Any = dateFormatter.format(d)
def fromLocalDate(d: LocalDate): Any = DateTimeFormatter.ISO_LOCAL_DATE.format(d)

def toLocalTime(v: Any): LocalTime = LocalTime.from(timeFormatter.parse(v.toString))
def fromLocalTime(t: LocalTime): Any = timeFormatter.format(t)
def fromLocalTime(t: LocalTime): Any = DateTimeFormatter.ISO_LOCAL_TIME.format(t)

def toLocalDateTime(v: Any): LocalDateTime =
LocalDateTime.from(datetimeFormatter.parse(v.toString))
def fromLocalDateTime(dt: LocalDateTime): Any = datetimeFormatter.format(dt)
def fromLocalDateTime(dt: LocalDateTime): Any = DateTimeFormatter.ISO_LOCAL_DATE_TIME.format(dt)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package magnolify.bigquery

import scala.util.control.NonFatal

class TimestampConverterSuite extends munit.ScalaCheckSuite {

test("TIMESTAMP") {
Seq(
// this should be the default format sent by BQ API
"2023-01-01 10:11:12.123456 UTC",
// optional digits
"2023-01-01 10:11:12.123456",
"2023-01-01 10:11:12.1234",
"2023-01-01 10:11:12",
"2023-1-1 0:1:2",
// time separator
"2023-01-01T10:11:12.123456 UTC",
"2023-01-01t10:11:12.123456 UTC",
// offsets
"2023-01-01t10:11:12.123456Z",
"2023-01-01t10:11:12.123456z",
"2023-01-01t10:11:12.123456+01:02",
"2023-01-01t10:11:12.123456-01:02",
// "2023-01-01t10:11:12.123456+1:2", not possible to get +H:M offset pattern
// "2023-01-01t10:11:12.123456+1", skipped
// zone
"2023-01-01t10:11:12.123456 Europe/London",
"2023-01-01t10:11:12.123456 UTC+01:02",
"2023-01-01t10:11:12.123456 UTC-01:02",
// "2023-01-01t10:11:12.123456 UTC+1:2", not possible to get +H:M offset pattern
// "2023-01-01t10:11:12.123456 UTC+1", skipped
).foreach { ts =>
try {
TimestampConverter.toInstant(ts)
} catch {
case NonFatal(e) => throw new Exception(s"Failed parsing $ts", e)
}
}
// formatter uses ISO
val javaInstant = TimestampConverter.toInstant("2023-01-01 10:11:12.123456 UTC")
val actual = TimestampConverter.fromInstant(javaInstant)
assertEquals(actual, "2023-01-01T10:11:12.123456Z")
}

test("DATE") {
Seq(
"2023-01-02",
"2023-01-2",
"2023-1-2",
).foreach { date =>
try {
TimestampConverter.toLocalDate(date)
} catch {
case NonFatal(e) => throw new Exception(s"Failed parsing $date", e)
}
}
// formatter uses ISO
val javaLocalDate = TimestampConverter.toLocalDate("2023-01-02")
val actual = TimestampConverter.fromLocalDate(javaLocalDate)
assertEquals(actual, "2023-01-02")
}

test("TIME") {
Seq(
"10:11:12.123456",
"10:11:12.1234",
"10:11:12",
"0:1:2",
"0:1:2.123456"
).foreach { t =>
try {
TimestampConverter.toLocalTime(t)
} catch {
case NonFatal(e) => throw new Exception(s"Failed parsing $t", e)
}
}
// formatter uses ISO
val javaLocalTime = TimestampConverter.toLocalTime("16:03:57.029881")
val actual = TimestampConverter.fromLocalTime(javaLocalTime)
assertEquals(actual, "16:03:57.029881")
}

test("DATETIME") {
Seq(
"2023-01-01 10:11:12.123456",
// optional digits
"2023-01-01 10:11:12.123456",
"2023-01-01 10:11:12.1234",
"2023-01-01 10:11:12",
"2023-1-1 0:1:2",
// time separator
"2023-01-01T10:11:12.123456",
"2023-01-01t10:11:12.123456",
).foreach { datetime =>
try {
TimestampConverter.toLocalDateTime(datetime)
} catch {
case NonFatal(e) => throw new Exception(s"Failed parsing $datetime", e)
}
}
// formatter uses ISO
val javaLocalDateTime = TimestampConverter.toLocalDateTime("2023-01-01 10:11:12.123456")
val actual = TimestampConverter.fromLocalDateTime(javaLocalDateTime)
assertEquals(actual, "2023-01-01T10:11:12.123456")
}
}

0 comments on commit eeffff2

Please sign in to comment.