Skip to content

Commit

Permalink
Assert the output of the REExREE filtering kernel
Browse files Browse the repository at this point in the history
  • Loading branch information
felipecrv committed May 16, 2023
1 parent 5208210 commit b0dbea8
Showing 1 changed file with 150 additions and 12 deletions.
162 changes: 150 additions & 12 deletions cpp/src/arrow/compute/kernels/vector_run_end_selection_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include "arrow/testing/gtest_util.h"

#include "arrow/array/array_run_end.h"
#include "arrow/array/concatenate.h"
#include "arrow/array/data.h"
#include "arrow/array/util.h"
Expand Down Expand Up @@ -54,20 +55,55 @@ std::unique_ptr<REEFilterExec> MakeREEFilterExec(const ArraySpan& values,
return exec;
}

Result<std::shared_ptr<Array>> REEFromJson(const std::shared_ptr<DataType>& run_end_type,
const std::shared_ptr<DataType>& value_type,
/// \brief A representation of REE data used in the tests.
struct REERep {
int64_t logical_length;
std::string run_ends_json;
std::string values_json;

REERep(int64_t logical_length, std::string run_ends_json, std::string values_json)
: logical_length(logical_length),
run_ends_json(std::move(run_ends_json)),
values_json(std::move(values_json)) {}

REERep() : REERep(0, "[]", "[]") {}

static REERep None() { return REERep{0, "", ""}; }

bool operator==(const REERep& other) const {
return logical_length == other.logical_length &&
run_ends_json == other.run_ends_json && values_json == other.values_json;
}
};

Result<std::shared_ptr<Array>> REEFromJson(const std::shared_ptr<DataType>& ree_type,
const std::string& json) {
auto array = ArrayFromJSON(value_type, json);
ARROW_ASSIGN_OR_RAISE(auto datum,
RunEndEncode(array, RunEndEncodeOptions{run_end_type}));
auto ree_type_ptr = checked_cast<const RunEndEncodedType*>(ree_type.get());
auto array = ArrayFromJSON(ree_type_ptr->value_type(), json);
ARROW_ASSIGN_OR_RAISE(
auto datum, RunEndEncode(array, RunEndEncodeOptions{ree_type_ptr->run_end_type()}));
return datum.make_array();
}

Result<std::shared_ptr<Array>> REEFromJson(const std::shared_ptr<DataType>& ree_type,
int64_t logical_length,
const std::string& run_ends_json,
const std::string& values_json) {
auto ree_type_ptr = checked_cast<const RunEndEncodedType*>(ree_type.get());
auto run_ends = ArrayFromJSON(ree_type_ptr->run_end_type(), run_ends_json);
auto values = ArrayFromJSON(ree_type_ptr->value_type(), values_json);
return RunEndEncodedArray::Make(logical_length, run_ends, values);
}

Result<std::shared_ptr<Array>> REEFromRep(const std::shared_ptr<DataType>& ree_type,
const REERep& rep) {
return REEFromJson(ree_type, rep.logical_length, rep.run_ends_json, rep.values_json);
}

Result<std::shared_ptr<Array>> FilterFromJson(
const std::shared_ptr<DataType>& filter_type, const std::string& json) {
if (filter_type->id() == Type::RUN_END_ENCODED) {
auto& ree_type = checked_cast<const RunEndEncodedType&>(*filter_type);
return REEFromJson(ree_type.run_end_type(), ree_type.value_type(), json);
return REEFromJson(filter_type, json);
} else {
return ArrayFromJSON(filter_type, json);
}
Expand Down Expand Up @@ -141,6 +177,60 @@ void DoAssertOutputSize(const std::shared_ptr<Array>& values,
expected_physical_output_size);
}

void DoAssertFilterOutput(const std::shared_ptr<Array>& values,
const std::shared_ptr<Array>& filter,
const FilterOptions& null_options,
const std::shared_ptr<Array>& expected) {
auto values_span = ArraySpan(*values->data());
auto filter_span = ArraySpan(*filter->data());
auto filter_exec = MakeREEFilterExec(values_span, filter_span, null_options);

auto output = ArrayData::Make(values->type(), 0, {nullptr});
ASSERT_OK(filter_exec->Exec(output.get()));
auto output_array = MakeArray(output);
ASSERT_ARRAYS_EQUAL(*output_array, *expected);
}

void DoAssertFilterSlicedOutput(const std::shared_ptr<Array>& values,
const std::shared_ptr<Array>& filter,
const FilterOptions& null_options,
const std::shared_ptr<Array>& expected) {
constexpr auto M = 3;
constexpr auto N = 2;
// Check slicing: add M dummy values at the start and end of `values`,
// add N dummy values at the start and end of `filter`.
ARROW_SCOPED_TRACE("for sliced values and filter");
ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(values->type(), M));
ASSERT_OK_AND_ASSIGN(auto filter_filler,
FilterFromJson(filter->type(), "[true, false]"));
ASSERT_OK_AND_ASSIGN(auto values_with_filler,
Concatenate({values_filler, values, values_filler}));
ASSERT_OK_AND_ASSIGN(auto filter_with_filler,
Concatenate({filter_filler, filter, filter_filler}));
auto values_sliced = values_with_filler->Slice(M, values->length());
auto filter_sliced = filter_with_filler->Slice(N, filter->length());
DoAssertFilterOutput(values_sliced, filter_sliced, null_options, expected);
}

void DoAssertOutput(const std::shared_ptr<Array>& values,
const std::shared_ptr<Array>& filter,
const FilterOptions& null_options, const REERep& expected_rep) {
ARROW_SCOPED_TRACE("assert output");
ARROW_SCOPED_TRACE(null_options.null_selection_behavior ==
FilterOptions::NullSelectionBehavior::DROP
? "while dropping nulls"
: "while emitting nulls");
auto values_span = ArraySpan(*values->data());
auto filter_span = ArraySpan(*filter->data());
auto filter_exec = MakeREEFilterExec(values_span, filter_span, null_options);
ASSERT_OK_AND_ASSIGN(auto expected, REEFromRep(values->type(), expected_rep));
{
ARROW_SCOPED_TRACE("for full values and filter");
DoAssertFilterOutput(values, filter, null_options, expected);
}
DoAssertFilterSlicedOutput(values, filter, null_options, expected);
}

template <typename RunEndTypes>
struct REExREEFilterTest : public ::testing::Test {
using ValueRunEndType = typename RunEndTypes::ValueRunEndType;
Expand All @@ -149,20 +239,24 @@ struct REExREEFilterTest : public ::testing::Test {
std::shared_ptr<DataType> _value_run_end_type;
std::shared_ptr<DataType> _filter_run_end_type;

std::shared_ptr<DataType> _values_type;
std::shared_ptr<DataType> _filter_type;

REExREEFilterTest() {
_value_run_end_type = TypeTraits<ValueRunEndType>::type_singleton();
_filter_run_end_type = TypeTraits<FilterRunEndType>::type_singleton();
_filter_type = run_end_encoded(_filter_run_end_type, boolean());
}

void AssertOutputSize(const std::shared_ptr<DataType>& value_type,
const std::string& values_json, const std::string& filter_json,
std::pair<int64_t, int64_t> expected_output_size_drop_nulls,
std::pair<int64_t, int64_t> expected_output_size_emit_nulls = {
-1, -1}) {
ASSERT_OK_AND_ASSIGN(auto values,
REEFromJson(_value_run_end_type, value_type, values_json));
ASSERT_OK_AND_ASSIGN(auto filter,
REEFromJson(_filter_run_end_type, boolean(), filter_json));
ASSERT_OK_AND_ASSIGN(
auto values,
REEFromJson(run_end_encoded(_value_run_end_type, value_type), values_json));
ASSERT_OK_AND_ASSIGN(auto filter, REEFromJson(_filter_type, filter_json));
DoAssertOutputSize(values, filter, kDropNulls, expected_output_size_drop_nulls.first,
expected_output_size_drop_nulls.second);
if (expected_output_size_emit_nulls == std::pair<int64_t, int64_t>{-1, -1}) {
Expand All @@ -171,6 +265,22 @@ struct REExREEFilterTest : public ::testing::Test {
DoAssertOutputSize(values, filter, kEmitNulls, expected_output_size_emit_nulls.first,
expected_output_size_emit_nulls.second);
}

void AssertOutput(const std::shared_ptr<DataType>& value_type,
const std::string& values_json, const std::string& filter_json,
const REERep& expected_with_drop_nulls,
const REERep& expected_with_emit_nulls = REERep::None()) {
ASSERT_OK_AND_ASSIGN(
auto values,
REEFromJson(run_end_encoded(_value_run_end_type, value_type), values_json));
ASSERT_OK_AND_ASSIGN(auto filter, REEFromJson(_filter_type, filter_json));
DoAssertOutput(values, filter, kDropNulls, expected_with_drop_nulls);
if (expected_with_emit_nulls == REERep::None()) {
DoAssertOutput(values, filter, kEmitNulls, expected_with_drop_nulls);
} else {
DoAssertOutput(values, filter, kEmitNulls, expected_with_emit_nulls);
}
}
};
TYPED_TEST_SUITE_P(REExREEFilterTest);

Expand Down Expand Up @@ -255,8 +365,36 @@ TYPED_TEST_P(REExREEFilterTest, SizeOutputWithBooleans) {
"[null, 0, null, 1, 0, null, 1, 1, 1, 0, 1]", {5, 4}, {8, 5});
}

TYPED_TEST_P(REExREEFilterTest, FilteredOutputWithAFewTypes) {
auto data_type = boolean();
this->AssertOutput(data_type, "[false]", "[1]", {1, "[1]", "[false]"});
this->AssertOutput(data_type, "[false]", "[0]", {});

this->AssertOutput(data_type, "[true]", "[1]", {1, "[1]", "[true]"});
this->AssertOutput(data_type, "[true]", "[0]", {});
this->AssertOutput(data_type, "[false]", "[null]", {}, {1, "[1]", "[null]"});
this->AssertOutput(data_type, "[true]", "[null]", {}, {1, "[1]", "[null]"});

this->AssertOutput(data_type, "[true, false, true, false]", "[0, 1, 1, 0]",
{2, "[1, 2]", "[false, true]"});
this->AssertOutput(data_type, "[false, true, false, true]", "[1, 1, 0, 1]",
{3, "[1, 4]", "[false, true]"});

this->AssertOutput(data_type, "[true, true, true, false]", "[null, 0, 1, 0]",
{1, "[1]", "[true]"}, {2, "[1, 2]", "[null, true]"});
this->AssertOutput(data_type, "[false, true, true, true]", "[1, 1, 0, null]",
{2, "[1, 2]", "[false, true]"},
{3, "[1, 2, 3]", "[false, true, null]"});

this->AssertOutput(data_type, // linebreak for alignment
"[ 1, 0, 0, 1, 1, 1, null, 1, 0, 1, 0]",
"[null, 0, null, 1, 0, null, 1, 1, 1, 0, 1]",
{5, "[1, 2, 3, 5]", "[1, null, 1, 0]"},
{8, "[2, 3, 5, 6, 8]", "[null, 1, null, 1, 0]"});
}

REGISTER_TYPED_TEST_SUITE_P(REExREEFilterTest, SizeOutputWithNulls,
SizeOutputWithBooleans);
SizeOutputWithBooleans, FilteredOutputWithAFewTypes);

template <typename V, typename F>
struct RunEndTypes {
Expand Down

0 comments on commit b0dbea8

Please sign in to comment.