Skip to content

Commit

Permalink
Add random test for MinMax
Browse files Browse the repository at this point in the history
Signed-off-by: Frank Du <frank.du@intel.com>
  • Loading branch information
frankdjx committed Aug 13, 2020
1 parent 12e31a5 commit 16fc74f
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 17 deletions.
17 changes: 0 additions & 17 deletions cpp/src/arrow/compute/api_aggregate.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,23 +130,6 @@ Result<Datum> MinMax(const Datum& value,
const MinMaxOptions& options = MinMaxOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Calculate the min / max of a numeric array.
///
/// This function returns both the min and max as a collection. The resulting
/// datum thus consists of two scalar datums: {Datum(min), Datum(max)}
///
/// \param[in] array input array
/// \param[in] options see MinMaxOptions for more information
/// \param[in] ctx the function execution context, optional
/// \return resulting datum containing a {min, max} collection
///
/// \since 1.0.0
/// \note API not yet finalized
ARROW_EXPORT
Result<Datum> MinMax(const Array& array,
const MinMaxOptions& options = MinMaxOptions::Defaults(),
ExecContext* ctx = NULLPTR);

/// \brief Calculate the modal (most common) value of a numeric array
///
/// This function returns both mode and count as a struct scalar, with type
Expand Down
107 changes: 107 additions & 0 deletions cpp/src/arrow/compute/kernels/aggregate_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -685,5 +685,112 @@ TYPED_TEST(TestFloatingModeKernel, Floats) {
this->AssertModeIsNull("[]");
}

template <typename ArrowType>
using MinMaxResult = std::pair<typename ArrowType::c_type, typename ArrowType::c_type>;

template <typename ArrowType>
static enable_if_integer<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
const Array& array) {
using T = typename ArrowType::c_type;
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
const auto values = array_numeric.raw_values();

if (array.length() <= array.null_count()) { // All null values
return {static_cast<T>(0), static_cast<T>(0)};
}

T min = std::numeric_limits<T>::max();
T max = std::numeric_limits<T>::min();
if (array.null_count() != 0) { // Some values are null
internal::BitmapReader reader(array.null_bitmap_data(), array.offset(),
array.length());
for (int64_t i = 0; i < array.length(); i++) {
if (reader.IsSet()) {
min = std::min(min, values[i]);
max = std::max(max, values[i]);
}
reader.Next();
}
} else { // All true values
for (int64_t i = 0; i < array.length(); i++) {
min = std::min(min, values[i]);
max = std::max(max, values[i]);
}
}

return {min, max};
}

template <typename ArrowType>
static enable_if_floating_point<ArrowType, MinMaxResult<ArrowType>> NaiveMinMax(
const Array& array) {
using T = typename ArrowType::c_type;
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

const auto& array_numeric = reinterpret_cast<const ArrayType&>(array);
const auto values = array_numeric.raw_values();

if (array.length() <= array.null_count()) { // All null values
return {static_cast<T>(0), static_cast<T>(0)};
}

T min = std::numeric_limits<T>::infinity();
T max = -std::numeric_limits<T>::infinity();
if (array.null_count() != 0) { // Some values are null
internal::BitmapReader reader(array.null_bitmap_data(), array.offset(),
array.length());
for (int64_t i = 0; i < array.length(); i++) {
if (reader.IsSet()) {
min = std::fmin(min, values[i]);
max = std::fmax(max, values[i]);
}
reader.Next();
}
} else { // All true values
for (int64_t i = 0; i < array.length(); i++) {
min = std::fmin(min, values[i]);
max = std::fmax(max, values[i]);
}
}

return {min, max};
}

template <typename ArrowType>
void ValidateMinMax(const Array& array) {
using Traits = TypeTraits<ArrowType>;
using ScalarType = typename Traits::ScalarType;

ASSERT_OK_AND_ASSIGN(Datum out, MinMax(array));
const StructScalar& value = out.scalar_as<StructScalar>();

auto expected = NaiveMinMax<ArrowType>(array);
const auto& out_min = checked_cast<const ScalarType&>(*value.value[0]);
ASSERT_EQ(expected.first, out_min.value);

const auto& out_max = checked_cast<const ScalarType&>(*value.value[1]);
ASSERT_EQ(expected.second, out_max.value);
}

template <typename ArrowType>
class TestRandomNumericMinMaxKernel : public ::testing::Test {};

TYPED_TEST_SUITE(TestRandomNumericMinMaxKernel, NumericArrowTypes);
TYPED_TEST(TestRandomNumericMinMaxKernel, RandomArrayMinMax) {
auto rand = random::RandomArrayGenerator(0x8afc055);
// Test size up to 1<<13 (8192).
for (size_t i = 3; i < 14; i += 2) {
for (auto null_probability : {0.0, 0.001, 0.1, 0.5, 0.999, 1.0}) {
for (auto length_adjust : {-2, -1, 0, 1, 2}) {
int64_t length = (1UL << i) + length_adjust;
auto array = rand.Numeric<TypeParam>(length, 0, 100, null_probability);
ValidateMinMax<TypeParam>(*array);
}
}
}
}

} // namespace compute
} // namespace arrow

0 comments on commit 16fc74f

Please sign in to comment.