Skip to content

Commit

Permalink
Fix Spark WeekFunction on long years (facebookincubator#10713)
Browse files Browse the repository at this point in the history
Summary:
Similar to facebookincubator#10599, in Spark, a long year is any year ending on Thursday and any
leap year ending on Friday. Since the only difference between Presto and Spark
is on return type, this PR extracts the 'getWeek' function to functions/lib and
changed to use 'weeknum' from external/date library for the week calculation.

Pull Request resolved: facebookincubator#10713

Reviewed By: DanielHunte

Differential Revision: D63269551

Pulled By: pedroerp

fbshipit-source-id: cf8cf8b4f2a917d7a7c16a03cc8feed5f3fed5d8
  • Loading branch information
zml1206 authored and facebook-github-bot committed Oct 1, 2024
1 parent fcc1b1c commit 5862a4d
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 97 deletions.
19 changes: 19 additions & 0 deletions velox/functions/lib/TimeUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#include <velox/type/Timestamp.h>
#include "velox/core/QueryConfig.h"
#include "velox/external/date/date.h"
#include "velox/external/date/iso_week.h"
#include "velox/functions/Macros.h"
#include "velox/type/tz/TimeZoneMap.h"

Expand Down Expand Up @@ -92,6 +94,23 @@ FOLLY_ALWAYS_INLINE int32_t getDayOfYear(const std::tm& time) {
return time.tm_yday + 1;
}

FOLLY_ALWAYS_INLINE uint32_t getWeek(
const Timestamp& timestamp,
const tz::TimeZone* timezone,
bool allowOverflow) {
// The computation of ISO week from date follows the algorithm here:
// https://en.wikipedia.org/wiki/ISO_week_date
Timestamp t = timestamp;
if (timezone) {
t.toTimezone(*timezone);
}
const auto timePoint = t.toTimePointMs(allowOverflow);
const auto daysTimePoint = date::floor<date::days>(timePoint);
const date::year_month_day calDate(daysTimePoint);
auto weekNum = date::iso_week::year_weeknum_weekday{calDate}.weeknum();
return (uint32_t)weekNum;
}

template <typename T>
struct InitSessionTimezone {
VELOX_DEFINE_FUNCTION_TYPES(T);
Expand Down
47 changes: 3 additions & 44 deletions velox/functions/prestosql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -196,62 +196,21 @@ struct WeekFunction : public InitSessionTimezone<T>,
public TimestampWithTimezoneSupport<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE int64_t getWeek(const std::tm& time) {
// The computation of ISO week from date follows the algorithm here:
// https://en.wikipedia.org/wiki/ISO_week_date
int64_t week = floor(
10 + (time.tm_yday + 1) -
(time.tm_wday ? time.tm_wday : kDaysInWeek)) /
kDaysInWeek;

if (week == 0) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
// Distance in days between the first day and the first Monday of the
// current year.
auto firstMondayOfYear =
1 + (mondayOfWeek + kDaysInWeek - 1) % kDaysInWeek;

// A long year is any year ending on Thursday and any leap year ending on
// Friday.
if ((util::isLeapYear(time.tm_year + 1900 - 1) &&
firstMondayOfYear == 3) ||
firstMondayOfYear == 4) {
week = 53;
} else {
week = 52;
}
} else if (week == 53) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
auto daysInYear = util::isLeapYear(time.tm_year + 1900) ? 366 : 365;
if (daysInYear - mondayOfWeek < 3) {
week = 1;
}
}

return week;
}

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<Timestamp>& timestamp) {
result = getWeek(getDateTime(timestamp, this->timeZone_));
result = getWeek(timestamp, this->timeZone_, false);
}

FOLLY_ALWAYS_INLINE void call(int64_t& result, const arg_type<Date>& date) {
result = getWeek(getDateTime(date));
result = getWeek(Timestamp::fromDate(date), nullptr, false);
}

FOLLY_ALWAYS_INLINE void call(
int64_t& result,
const arg_type<TimestampWithTimezone>& timestampWithTimezone) {
auto timestamp = this->toTimestamp(timestampWithTimezone);
result = getWeek(getDateTime(timestamp, nullptr));
result = getWeek(timestamp, nullptr, false);
}
};

Expand Down
12 changes: 6 additions & 6 deletions velox/functions/prestosql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ TEST_F(DateTimeFunctionsTest, week) {
VELOX_USER_FAIL("{}", status.message());
});
auto timestamp =
std::make_optional(Timestamp(ts.getSeconds() * 100'000'000, 0));
std::make_optional(Timestamp(ts.getSeconds() * 100'000, 0));

auto week = evaluateOnce<int64_t>("week(c0)", timestamp).value();
auto weekOfYear =
Expand All @@ -497,11 +497,11 @@ TEST_F(DateTimeFunctionsTest, week) {
};

EXPECT_EQ(1, weekTimestamp("T00:00:00"));
EXPECT_EQ(10, weekTimestamp("T11:59:59"));
EXPECT_EQ(51, weekTimestamp("T06:01:01"));
EXPECT_EQ(24, weekTimestamp("T06:59:59"));
EXPECT_EQ(27, weekTimestamp("T12:00:01"));
EXPECT_EQ(7, weekTimestamp("T12:59:59"));
EXPECT_EQ(47, weekTimestamp("T11:59:59"));
EXPECT_EQ(33, weekTimestamp("T06:01:01"));
EXPECT_EQ(44, weekTimestamp("T06:59:59"));
EXPECT_EQ(47, weekTimestamp("T12:00:01"));
EXPECT_EQ(16, weekTimestamp("T12:59:59"));
}

TEST_F(DateTimeFunctionsTest, weekTimestampWithTimezone) {
Expand Down
47 changes: 1 addition & 46 deletions velox/functions/sparksql/DateTimeFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,53 +64,8 @@ template <typename T>
struct WeekFunction : public InitSessionTimezone<T> {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE int32_t getWeek(const std::tm& time) {
// The computation of ISO week from date follows the algorithm here:
// https://en.wikipedia.org/wiki/ISO_week_date
int32_t week = floor(
10 + (time.tm_yday + 1) -
(time.tm_wday ? time.tm_wday : kDaysInWeek)) /
kDaysInWeek;

if (week == 0) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
// Distance in days between the first day and the first Monday of the
// current year.
auto firstMondayOfYear =
1 + (mondayOfWeek + kDaysInWeek - 1) % kDaysInWeek;

if ((util::isLeapYear(time.tm_year + 1900 - 1) &&
firstMondayOfYear == 2) ||
firstMondayOfYear == 3 || firstMondayOfYear == 4) {
week = 53;
} else {
week = 52;
}
} else if (week == 53) {
// Distance in days between the first day of the current year and the
// Monday of the current week.
auto mondayOfWeek =
time.tm_yday + 1 - (time.tm_wday + kDaysInWeek - 1) % kDaysInWeek;
auto daysInYear = util::isLeapYear(time.tm_year + 1900) ? 366 : 365;
if (daysInYear - mondayOfWeek < 3) {
week = 1;
}
}

return week;
}

FOLLY_ALWAYS_INLINE void call(
int32_t& result,
const arg_type<Timestamp>& timestamp) {
result = getWeek(getDateTime(timestamp, this->timeZone_));
}

FOLLY_ALWAYS_INLINE void call(int32_t& result, const arg_type<Date>& date) {
result = getWeek(getDateTime(date));
result = getWeek(Timestamp::fromDate(date), nullptr, false);
}
};

Expand Down
1 change: 0 additions & 1 deletion velox/functions/sparksql/Register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,6 @@ void registerFunctions(const std::string& prefix) {
// Register date functions.
registerFunction<YearFunction, int32_t, Timestamp>({prefix + "year"});
registerFunction<YearFunction, int32_t, Date>({prefix + "year"});
registerFunction<WeekFunction, int32_t, Timestamp>({prefix + "week_of_year"});
registerFunction<WeekFunction, int32_t, Date>({prefix + "week_of_year"});
registerFunction<YearOfWeekFunction, int32_t, Date>(
{prefix + "year_of_week"});
Expand Down
16 changes: 16 additions & 0 deletions velox/functions/sparksql/tests/DateTimeFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,22 @@ TEST_F(DateTimeFunctionsTest, weekOfYear) {
EXPECT_EQ(8, weekOfYear("2008-02-20"));
EXPECT_EQ(15, weekOfYear("2015-04-08"));
EXPECT_EQ(15, weekOfYear("2013-04-08"));

// Test various cases where the last week of the previous year extends into
// the next year.

// Leap year that ends on Thursday.
EXPECT_EQ(53, weekOfYear("2021-01-01"));
// Leap year that ends on Friday.
EXPECT_EQ(53, weekOfYear("2005-01-01"));
// Leap year that ends on Saturday.
EXPECT_EQ(52, weekOfYear("2017-01-01"));
// Common year that ends on Thursday.
EXPECT_EQ(53, weekOfYear("2016-01-01"));
// Common year that ends on Friday.
EXPECT_EQ(52, weekOfYear("2022-01-01"));
// Common year that ends on Saturday.
EXPECT_EQ(52, weekOfYear("2023-01-01"));
}

TEST_F(DateTimeFunctionsTest, unixDate) {
Expand Down
6 changes: 6 additions & 0 deletions velox/type/Timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,12 @@ Timestamp Timestamp::fromDaysAndNanos(int32_t days, int64_t nanos) {
return Timestamp(seconds, remainingNanos);
}

// static
Timestamp Timestamp::fromDate(int32_t date) {
int64_t seconds = (int64_t)date * kSecondsInDay;
return Timestamp(seconds, 0);
}

// static
Timestamp Timestamp::now() {
auto now = std::chrono::system_clock::now();
Expand Down
3 changes: 3 additions & 0 deletions velox/type/Timestamp.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,9 @@ struct Timestamp {
/// and the number of nanoseconds.
static Timestamp fromDaysAndNanos(int32_t days, int64_t nanos);

// date is the number of days since unix epoch.
static Timestamp fromDate(int32_t date);

// Returns the current unix timestamp (ms precision).
static Timestamp now();

Expand Down

0 comments on commit 5862a4d

Please sign in to comment.