Skip to content

Commit

Permalink
ARROW-16741: [C++] Add Benchmarks for Binary Temporal Operations (#13302
Browse files Browse the repository at this point in the history
)

Add all binary temporal benchmarks and documentation to `api_scalar.h`

Authored-by: Ivan Chau <ivan.m.chau@gmail.com>
Signed-off-by: David Li <li.davidm96@gmail.com>
  • Loading branch information
iChauster authored Jun 9, 2022
1 parent a4b14d3 commit 32054f7
Show file tree
Hide file tree
Showing 3 changed files with 219 additions and 0 deletions.
14 changes: 14 additions & 0 deletions cpp/src/arrow/compute/api_scalar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -840,6 +840,20 @@ Result<Datum> Week(const Datum& arg, WeekOptions options, ExecContext* ctx) {
return CallFunction("week", {arg}, &options, ctx);
}

SCALAR_EAGER_BINARY(YearsBetween, "years_between")
SCALAR_EAGER_BINARY(QuartersBetween, "quarters_between")
SCALAR_EAGER_BINARY(MonthsBetween, "month_interval_between")
SCALAR_EAGER_BINARY(WeeksBetween, "weeks_between")
SCALAR_EAGER_BINARY(MonthDayNanoBetween, "month_day_nano_interval_between")
SCALAR_EAGER_BINARY(DayTimeBetween, "day_time_interval_between")
SCALAR_EAGER_BINARY(DaysBetween, "days_between")
SCALAR_EAGER_BINARY(HoursBetween, "hours_between")
SCALAR_EAGER_BINARY(MinutesBetween, "minutes_between")
SCALAR_EAGER_BINARY(SecondsBetween, "seconds_between")
SCALAR_EAGER_BINARY(MillisecondsBetween, "milliseconds_between")
SCALAR_EAGER_BINARY(MicrosecondsBetween, "microseconds_between")
SCALAR_EAGER_BINARY(NanosecondsBetween, "nanoseconds_between")

// ----------------------------------------------------------------------
// Structural transforms
Result<Datum> MapLookup(const Datum& arg, MapLookupOptions options, ExecContext* ctx) {
Expand Down
157 changes: 157 additions & 0 deletions cpp/src/arrow/compute/api_scalar.h
Original file line number Diff line number Diff line change
Expand Up @@ -1467,6 +1467,163 @@ ARROW_EXPORT Result<Datum> AssumeTimezone(const Datum& values,
ARROW_EXPORT Result<Datum> IsDaylightSavings(const Datum& values,
ExecContext* ctx = NULLPTR);

/// \brief Years Between finds the number of years between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> YearsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Quarters Between finds the number of quarters between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> QuartersBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Months Between finds the number of month between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> MonthsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Weeks Between finds the number of weeks between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> WeeksBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Month Day Nano Between finds the number of months, days, and nonaseconds
/// between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> MonthDayNanoBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief DayTime Between finds the number of days and milliseconds between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> DayTimeBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Days Between finds the number of days between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> DaysBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Hours Between finds the number of hours between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> HoursBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Minutes Between finds the number of minutes between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> MinutesBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Seconds Between finds the number of hours between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> SecondsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Milliseconds Between finds the number of milliseconds between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> MillisecondsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Microseconds Between finds the number of microseconds between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> MicrosecondsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Nanoseconds Between finds the number of nanoseconds between two values
///
/// \param[in] left input treated as the start time
/// \param[in] right input treated as the end time
/// \param[in] ctx the function execution context, optional
/// \return the resulting datum
///
/// \since 8.0.0
/// \note API not yet finalized
ARROW_EXPORT Result<Datum> NanosecondsBetween(const Datum& left, const Datum& right,
ExecContext* ctx = NULLPTR);

/// \brief Finds either the FIRST, LAST, or ALL items with a key that matches the given
/// query key in a map.
///
Expand Down
48 changes: 48 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_temporal_benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ void SetArgs(benchmark::internal::Benchmark* bench) {
using UnaryRoundingOp = Result<Datum>(const Datum&, const RoundTemporalOptions,
ExecContext*);
using UnaryOp = Result<Datum>(const Datum&, ExecContext*);
using BinaryOp = Result<Datum>(const Datum&, const Datum&, ExecContext*);

template <UnaryRoundingOp& Op, std::shared_ptr<DataType>& timestamp_type,
RoundTemporalOptions& options>
Expand Down Expand Up @@ -84,6 +85,24 @@ static void BenchmarkTemporal(benchmark::State& state) {
state.SetItemsProcessed(state.iterations() * array_size);
}

template <BinaryOp& Op, std::shared_ptr<DataType>& timestamp_type>
static void BenchmarkTemporalBinary(benchmark::State& state) {
RegressionArgs args(state);
ExecContext* ctx = default_exec_context();

const int64_t array_size = args.size / sizeof(timestamp_type);

auto rand = random::RandomArrayGenerator(kSeed);
auto lhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);
auto rhs = rand.ArrayOf(timestamp_type, args.size, args.null_proportion);

for (auto _ : state) {
ABORT_NOT_OK(Op(lhs, rhs, ctx).status());
}

state.SetItemsProcessed(state.iterations() * array_size);
}

template <std::shared_ptr<DataType>& timestamp_type>
static void BenchmarkStrftime(benchmark::State& state) {
RegressionArgs args(state);
Expand Down Expand Up @@ -150,6 +169,10 @@ static void BenchmarkAssumeTimezone(benchmark::State& state) {

auto zoned = timestamp(TimeUnit::NANO, "Pacific/Marquesas");
auto non_zoned = timestamp(TimeUnit::NANO);
auto time32_type = time32(TimeUnit::MILLI);
auto time64_type = time64(TimeUnit::NANO);
auto date32_type = date32();
auto date64_type = date64();

#define DECLARE_TEMPORAL_ROUNDING_BENCHMARKS(OPTIONS) \
BENCHMARK_TEMPLATE(BenchmarkTemporalRounding, CeilTemporal, zoned, OPTIONS) \
Expand All @@ -172,6 +195,17 @@ auto non_zoned = timestamp(TimeUnit::NANO);
#define DECLARE_TEMPORAL_BENCHMARKS_ZONED(OP) \
BENCHMARK_TEMPLATE(BenchmarkTemporal, OP, zoned)->Apply(SetArgs);

#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP) \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, non_zoned)->Apply(SetArgs); \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, zoned)->Apply(SetArgs); \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date64_type)->Apply(SetArgs); \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, date32_type)->Apply(SetArgs);

#define DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(OP) \
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(OP); \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time32_type)->Apply(SetArgs); \
BENCHMARK_TEMPLATE(BenchmarkTemporalBinary, OP, time64_type)->Apply(SetArgs);

// Temporal rounding benchmarks
auto round_1_minute = RoundTemporalOptions(1, CalendarUnit::MINUTE);
auto round_10_minute = RoundTemporalOptions(10, CalendarUnit::MINUTE);
Expand Down Expand Up @@ -214,5 +248,19 @@ BENCHMARK_TEMPLATE(BenchmarkStrptime, non_zoned)->Apply(SetArgs);
BENCHMARK_TEMPLATE(BenchmarkStrptime, zoned)->Apply(SetArgs);
BENCHMARK(BenchmarkAssumeTimezone)->Apply(SetArgs);

// binary temporal benchmarks
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(YearsBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(QuartersBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(MonthsBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MonthDayNanoBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(WeeksBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(DayTimeBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_AND_TIMESTAMPS(DaysBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(HoursBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MinutesBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(SecondsBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MillisecondsBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(MicrosecondsBetween);
DECLARE_TEMPORAL_BINARY_BENCHMARKS_DATES_TIMES_AND_TIMESTAMPS(NanosecondsBetween);
} // namespace compute
} // namespace arrow

0 comments on commit 32054f7

Please sign in to comment.