From 16fc74f4db4654e49e26bf03ed822568f31ab8b2 Mon Sep 17 00:00:00 2001 From: Frank Du Date: Tue, 14 Jul 2020 01:19:10 +0000 Subject: [PATCH] Add random test for MinMax Signed-off-by: Frank Du --- cpp/src/arrow/compute/api_aggregate.h | 17 --- .../arrow/compute/kernels/aggregate_test.cc | 107 ++++++++++++++++++ 2 files changed, 107 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/compute/api_aggregate.h b/cpp/src/arrow/compute/api_aggregate.h index 5ae3cf9b5fe08..675bd25a6845b 100644 --- a/cpp/src/arrow/compute/api_aggregate.h +++ b/cpp/src/arrow/compute/api_aggregate.h @@ -130,23 +130,6 @@ Result MinMax(const Datum& value, const MinMaxOptions& options = MinMaxOptions::Defaults(), ExecContext* ctx = NULLPTR); -/// \brief Calculate the min / max of a numeric array. -/// -/// This function returns both the min and max as a collection. The resulting -/// datum thus consists of two scalar datums: {Datum(min), Datum(max)} -/// -/// \param[in] array input array -/// \param[in] options see MinMaxOptions for more information -/// \param[in] ctx the function execution context, optional -/// \return resulting datum containing a {min, max} collection -/// -/// \since 1.0.0 -/// \note API not yet finalized -ARROW_EXPORT -Result MinMax(const Array& array, - const MinMaxOptions& options = MinMaxOptions::Defaults(), - ExecContext* ctx = NULLPTR); - /// \brief Calculate the modal (most common) value of a numeric array /// /// This function returns both mode and count as a struct scalar, with type diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc b/cpp/src/arrow/compute/kernels/aggregate_test.cc index e51579b7710e8..a22087e50ce69 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_test.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc @@ -685,5 +685,112 @@ TYPED_TEST(TestFloatingModeKernel, Floats) { this->AssertModeIsNull("[]"); } +template +using MinMaxResult = std::pair; + +template +static enable_if_integer> NaiveMinMax( + const Array& array) { + using T = typename ArrowType::c_type; + using ArrayType = typename TypeTraits::ArrayType; + + const auto& array_numeric = reinterpret_cast(array); + const auto values = array_numeric.raw_values(); + + if (array.length() <= array.null_count()) { // All null values + return {static_cast(0), static_cast(0)}; + } + + T min = std::numeric_limits::max(); + T max = std::numeric_limits::min(); + if (array.null_count() != 0) { // Some values are null + internal::BitmapReader reader(array.null_bitmap_data(), array.offset(), + array.length()); + for (int64_t i = 0; i < array.length(); i++) { + if (reader.IsSet()) { + min = std::min(min, values[i]); + max = std::max(max, values[i]); + } + reader.Next(); + } + } else { // All true values + for (int64_t i = 0; i < array.length(); i++) { + min = std::min(min, values[i]); + max = std::max(max, values[i]); + } + } + + return {min, max}; +} + +template +static enable_if_floating_point> NaiveMinMax( + const Array& array) { + using T = typename ArrowType::c_type; + using ArrayType = typename TypeTraits::ArrayType; + + const auto& array_numeric = reinterpret_cast(array); + const auto values = array_numeric.raw_values(); + + if (array.length() <= array.null_count()) { // All null values + return {static_cast(0), static_cast(0)}; + } + + T min = std::numeric_limits::infinity(); + T max = -std::numeric_limits::infinity(); + if (array.null_count() != 0) { // Some values are null + internal::BitmapReader reader(array.null_bitmap_data(), array.offset(), + array.length()); + for (int64_t i = 0; i < array.length(); i++) { + if (reader.IsSet()) { + min = std::fmin(min, values[i]); + max = std::fmax(max, values[i]); + } + reader.Next(); + } + } else { // All true values + for (int64_t i = 0; i < array.length(); i++) { + min = std::fmin(min, values[i]); + max = std::fmax(max, values[i]); + } + } + + return {min, max}; +} + +template +void ValidateMinMax(const Array& array) { + using Traits = TypeTraits; + using ScalarType = typename Traits::ScalarType; + + ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array)); + const StructScalar& value = out.scalar_as(); + + auto expected = NaiveMinMax(array); + const auto& out_min = checked_cast(*value.value[0]); + ASSERT_EQ(expected.first, out_min.value); + + const auto& out_max = checked_cast(*value.value[1]); + ASSERT_EQ(expected.second, out_max.value); +} + +template +class TestRandomNumericMinMaxKernel : public ::testing::Test {}; + +TYPED_TEST_SUITE(TestRandomNumericMinMaxKernel, NumericArrowTypes); +TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) { + auto rand = random::RandomArrayGenerator(0x8afc055); + // Test size up to 1<<13 (8192). + for (size_t i = 3; i < 14; i += 2) { + for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) { + for (auto length_adjust : {-2, -1, 0, 1, 2}) { + int64_t length = (1UL << i) + length_adjust; + auto array = rand.Numeric(length, 0, 100, null_probability); + ValidateMinMax(*array); + } + } + } +} + } // namespace compute } // namespace arrow