Skip to content

Commit

Permalink
Add support for single 'Z' character mapping to GMT in parse_datetime
Browse files Browse the repository at this point in the history
Summary: Single 'Z' character in input string should map to GMT when timezone specifier is supplied.

Reviewed By: pedroerp

Differential Revision: D38563981

fbshipit-source-id: ba8c0126da8a2b048b8b1ee57115c1275317ac1b
  • Loading branch information
Connor Devlin authored and facebook-github-bot committed Aug 10, 2022
1 parent f0cbb46 commit 483aa3f
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 38 deletions.
83 changes: 45 additions & 38 deletions velox/functions/lib/DateTimeFormatter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -243,48 +243,55 @@ int64_t parseTimezoneOffset(const char* cur, const char* end, Date& date) {
// "+00:00"
// 3. '+' or '-' followed by four digits:
// "+0000"
if (cur < end && (*cur == '-' || *cur == '+')) {
// Long format: "+00:00"
if ((end - cur) >= 6 && *(cur + 3) == ':') {
// Fast path for the common case ("+00:00" or "-00:00"), to prevent
// calling getTimeZoneID(), which does a map lookup.
if (std::strncmp(cur + 1, "00:00", 5) == 0) {
date.timezoneId = 0;
} else {
date.timezoneId = util::getTimeZoneID(std::string_view(cur, 6));
if (cur < end) {
if (*cur == '-' || *cur == '+') {
// Long format: "+00:00"
if ((end - cur) >= 6 && *(cur + 3) == ':') {
// Fast path for the common case ("+00:00" or "-00:00"), to prevent
// calling getTimeZoneID(), which does a map lookup.
if (std::strncmp(cur + 1, "00:00", 5) == 0) {
date.timezoneId = 0;
} else {
date.timezoneId = util::getTimeZoneID(std::string_view(cur, 6));
}
return 6;
}
return 6;
}
// Long format without colon: "+0000"
else if ((end - cur) >= 5 && *(cur + 3) != ':') {
// Same fast path described above.
if (std::strncmp(cur + 1, "0000", 4) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with ":" followed by the
// last 2 chars before calling getTimeZoneID, so we use a static
// thread_local buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], cur + 3, 2);
// Long format without colon: "+0000"
else if ((end - cur) >= 5 && *(cur + 3) != ':') {
// Same fast path described above.
if (std::strncmp(cur + 1, "0000", 4) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with ":" followed by the
// last 2 chars before calling getTimeZoneID, so we use a static
// thread_local buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], cur + 3, 2);

date.timezoneId = util::getTimeZoneID(timezoneBuffer);
date.timezoneId = util::getTimeZoneID(timezoneBuffer);
}
return 5;
}
return 5;
}
// Short format: "+00"
else if ((end - cur) >= 3) {
// Same fast path described above.
if (std::strncmp(cur + 1, "00", 2) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with a trailing ":00" before
// calling getTimeZoneID, so we use a static thread_local buffer to
// prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], defaultTrailingOffset, 2);
date.timezoneId = util::getTimeZoneID(timezoneBuffer);
// Short format: "+00"
else if ((end - cur) >= 3) {
// Same fast path described above.
if (std::strncmp(cur + 1, "00", 2) == 0) {
date.timezoneId = 0;
} else {
// We need to concatenate the 3 first chars with a trailing ":00"
// before calling getTimeZoneID, so we use a static thread_local
// buffer to prevent extra allocations.
std::memcpy(&timezoneBuffer[0], cur, 3);
std::memcpy(&timezoneBuffer[4], defaultTrailingOffset, 2);
date.timezoneId = util::getTimeZoneID(timezoneBuffer);
}
return 3;
}
return 3;
}
// Single 'Z' character maps to GMT
else if (*cur == 'Z') {
date.timezoneId = 0;
return 1;
}
}
throw std::runtime_error("Unable to parse timezone offset id.");
Expand Down
9 changes: 9 additions & 0 deletions velox/functions/lib/tests/DateTimeFormatterTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -833,10 +833,14 @@ TEST_F(JodaDateTimeFormatterTest, parseTimezone) {
EXPECT_THROW(parse("+00:", "ZZ"), VeloxUserError);
EXPECT_THROW(parse("+00:0", "ZZ"), VeloxUserError);
EXPECT_THROW(parse("12", "YYZZ"), VeloxUserError);
EXPECT_THROW(parse("ZZ", "Z"), VeloxUserError);
EXPECT_THROW(parse("ZZ", "ZZ"), VeloxUserError);

// GMT
EXPECT_EQ("+00:00", parseTZ("+00:00", "ZZ"));
EXPECT_EQ("+00:00", parseTZ("-00:00", "ZZ"));
EXPECT_EQ("+00:00", parseTZ("Z", "Z"));
EXPECT_EQ("+00:00", parseTZ("Z", "ZZ"));

// Valid long format:
EXPECT_EQ("+00:01", parseTZ("+00:01", "ZZ"));
Expand Down Expand Up @@ -910,6 +914,11 @@ TEST_F(JodaDateTimeFormatterTest, parseMixedYMDFormat) {
result = parseAll("+01332022-03-08+13:00", "ZZYYYY-MM-dd+HH:mm");
EXPECT_EQ(util::fromTimestampString("2022-03-08 13:00:00"), result.timestamp);
EXPECT_EQ("+01:33", util::getTimeZoneName(result.timezoneId));

// Z in the input means GMT in Joda.
EXPECT_EQ(
util::fromTimestampString("2022-07-29 20:03:54.667"),
parse("2022-07-29T20:03:54.667Z", "yyyy-MM-dd'T'HH:mm:ss.SSSZ"));
}

TEST_F(JodaDateTimeFormatterTest, parseMixedWeekFormat) {
Expand Down

0 comments on commit 483aa3f

Please sign in to comment.