From b0dbea8df777e172b1240c45e548872b322b1733 Mon Sep 17 00:00:00 2001 From: Felipe Oliveira Carvalho Date: Mon, 15 May 2023 23:00:27 -0300 Subject: [PATCH] Assert the output of the REExREE filtering kernel --- .../kernels/vector_run_end_selection_test.cc | 162 ++++++++++++++++-- 1 file changed, 150 insertions(+), 12 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/vector_run_end_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_run_end_selection_test.cc index db293fd85d0d0..6afdfeeace9a8 100644 --- a/cpp/src/arrow/compute/kernels/vector_run_end_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_run_end_selection_test.cc @@ -19,6 +19,7 @@ #include "arrow/testing/gtest_util.h" +#include "arrow/array/array_run_end.h" #include "arrow/array/concatenate.h" #include "arrow/array/data.h" #include "arrow/array/util.h" @@ -54,20 +55,55 @@ std::unique_ptr MakeREEFilterExec(const ArraySpan& values, return exec; } -Result> REEFromJson(const std::shared_ptr& run_end_type, - const std::shared_ptr& value_type, +/// \brief A representation of REE data used in the tests. +struct REERep { + int64_t logical_length; + std::string run_ends_json; + std::string values_json; + + REERep(int64_t logical_length, std::string run_ends_json, std::string values_json) + : logical_length(logical_length), + run_ends_json(std::move(run_ends_json)), + values_json(std::move(values_json)) {} + + REERep() : REERep(0, "[]", "[]") {} + + static REERep None() { return REERep{0, "", ""}; } + + bool operator==(const REERep& other) const { + return logical_length == other.logical_length && + run_ends_json == other.run_ends_json && values_json == other.values_json; + } +}; + +Result> REEFromJson(const std::shared_ptr& ree_type, const std::string& json) { - auto array = ArrayFromJSON(value_type, json); - ARROW_ASSIGN_OR_RAISE(auto datum, - RunEndEncode(array, RunEndEncodeOptions{run_end_type})); + auto ree_type_ptr = checked_cast(ree_type.get()); + auto array = ArrayFromJSON(ree_type_ptr->value_type(), json); + ARROW_ASSIGN_OR_RAISE( + auto datum, RunEndEncode(array, RunEndEncodeOptions{ree_type_ptr->run_end_type()})); return datum.make_array(); } +Result> REEFromJson(const std::shared_ptr& ree_type, + int64_t logical_length, + const std::string& run_ends_json, + const std::string& values_json) { + auto ree_type_ptr = checked_cast(ree_type.get()); + auto run_ends = ArrayFromJSON(ree_type_ptr->run_end_type(), run_ends_json); + auto values = ArrayFromJSON(ree_type_ptr->value_type(), values_json); + return RunEndEncodedArray::Make(logical_length, run_ends, values); +} + +Result> REEFromRep(const std::shared_ptr& ree_type, + const REERep& rep) { + return REEFromJson(ree_type, rep.logical_length, rep.run_ends_json, rep.values_json); +} + Result> FilterFromJson( const std::shared_ptr& filter_type, const std::string& json) { if (filter_type->id() == Type::RUN_END_ENCODED) { - auto& ree_type = checked_cast(*filter_type); - return REEFromJson(ree_type.run_end_type(), ree_type.value_type(), json); + return REEFromJson(filter_type, json); } else { return ArrayFromJSON(filter_type, json); } @@ -141,6 +177,60 @@ void DoAssertOutputSize(const std::shared_ptr& values, expected_physical_output_size); } +void DoAssertFilterOutput(const std::shared_ptr& values, + const std::shared_ptr& filter, + const FilterOptions& null_options, + const std::shared_ptr& expected) { + auto values_span = ArraySpan(*values->data()); + auto filter_span = ArraySpan(*filter->data()); + auto filter_exec = MakeREEFilterExec(values_span, filter_span, null_options); + + auto output = ArrayData::Make(values->type(), 0, {nullptr}); + ASSERT_OK(filter_exec->Exec(output.get())); + auto output_array = MakeArray(output); + ASSERT_ARRAYS_EQUAL(*output_array, *expected); +} + +void DoAssertFilterSlicedOutput(const std::shared_ptr& values, + const std::shared_ptr& filter, + const FilterOptions& null_options, + const std::shared_ptr& expected) { + constexpr auto M = 3; + constexpr auto N = 2; + // Check slicing: add M dummy values at the start and end of `values`, + // add N dummy values at the start and end of `filter`. + ARROW_SCOPED_TRACE("for sliced values and filter"); + ASSERT_OK_AND_ASSIGN(auto values_filler, MakeArrayOfNull(values->type(), M)); + ASSERT_OK_AND_ASSIGN(auto filter_filler, + FilterFromJson(filter->type(), "[true, false]")); + ASSERT_OK_AND_ASSIGN(auto values_with_filler, + Concatenate({values_filler, values, values_filler})); + ASSERT_OK_AND_ASSIGN(auto filter_with_filler, + Concatenate({filter_filler, filter, filter_filler})); + auto values_sliced = values_with_filler->Slice(M, values->length()); + auto filter_sliced = filter_with_filler->Slice(N, filter->length()); + DoAssertFilterOutput(values_sliced, filter_sliced, null_options, expected); +} + +void DoAssertOutput(const std::shared_ptr& values, + const std::shared_ptr& filter, + const FilterOptions& null_options, const REERep& expected_rep) { + ARROW_SCOPED_TRACE("assert output"); + ARROW_SCOPED_TRACE(null_options.null_selection_behavior == + FilterOptions::NullSelectionBehavior::DROP + ? "while dropping nulls" + : "while emitting nulls"); + auto values_span = ArraySpan(*values->data()); + auto filter_span = ArraySpan(*filter->data()); + auto filter_exec = MakeREEFilterExec(values_span, filter_span, null_options); + ASSERT_OK_AND_ASSIGN(auto expected, REEFromRep(values->type(), expected_rep)); + { + ARROW_SCOPED_TRACE("for full values and filter"); + DoAssertFilterOutput(values, filter, null_options, expected); + } + DoAssertFilterSlicedOutput(values, filter, null_options, expected); +} + template struct REExREEFilterTest : public ::testing::Test { using ValueRunEndType = typename RunEndTypes::ValueRunEndType; @@ -149,9 +239,13 @@ struct REExREEFilterTest : public ::testing::Test { std::shared_ptr _value_run_end_type; std::shared_ptr _filter_run_end_type; + std::shared_ptr _values_type; + std::shared_ptr _filter_type; + REExREEFilterTest() { _value_run_end_type = TypeTraits::type_singleton(); _filter_run_end_type = TypeTraits::type_singleton(); + _filter_type = run_end_encoded(_filter_run_end_type, boolean()); } void AssertOutputSize(const std::shared_ptr& value_type, @@ -159,10 +253,10 @@ struct REExREEFilterTest : public ::testing::Test { std::pair expected_output_size_drop_nulls, std::pair expected_output_size_emit_nulls = { -1, -1}) { - ASSERT_OK_AND_ASSIGN(auto values, - REEFromJson(_value_run_end_type, value_type, values_json)); - ASSERT_OK_AND_ASSIGN(auto filter, - REEFromJson(_filter_run_end_type, boolean(), filter_json)); + ASSERT_OK_AND_ASSIGN( + auto values, + REEFromJson(run_end_encoded(_value_run_end_type, value_type), values_json)); + ASSERT_OK_AND_ASSIGN(auto filter, REEFromJson(_filter_type, filter_json)); DoAssertOutputSize(values, filter, kDropNulls, expected_output_size_drop_nulls.first, expected_output_size_drop_nulls.second); if (expected_output_size_emit_nulls == std::pair{-1, -1}) { @@ -171,6 +265,22 @@ struct REExREEFilterTest : public ::testing::Test { DoAssertOutputSize(values, filter, kEmitNulls, expected_output_size_emit_nulls.first, expected_output_size_emit_nulls.second); } + + void AssertOutput(const std::shared_ptr& value_type, + const std::string& values_json, const std::string& filter_json, + const REERep& expected_with_drop_nulls, + const REERep& expected_with_emit_nulls = REERep::None()) { + ASSERT_OK_AND_ASSIGN( + auto values, + REEFromJson(run_end_encoded(_value_run_end_type, value_type), values_json)); + ASSERT_OK_AND_ASSIGN(auto filter, REEFromJson(_filter_type, filter_json)); + DoAssertOutput(values, filter, kDropNulls, expected_with_drop_nulls); + if (expected_with_emit_nulls == REERep::None()) { + DoAssertOutput(values, filter, kEmitNulls, expected_with_drop_nulls); + } else { + DoAssertOutput(values, filter, kEmitNulls, expected_with_emit_nulls); + } + } }; TYPED_TEST_SUITE_P(REExREEFilterTest); @@ -255,8 +365,36 @@ TYPED_TEST_P(REExREEFilterTest, SizeOutputWithBooleans) { "[null, 0, null, 1, 0, null, 1, 1, 1, 0, 1]", {5, 4}, {8, 5}); } +TYPED_TEST_P(REExREEFilterTest, FilteredOutputWithAFewTypes) { + auto data_type = boolean(); + this->AssertOutput(data_type, "[false]", "[1]", {1, "[1]", "[false]"}); + this->AssertOutput(data_type, "[false]", "[0]", {}); + + this->AssertOutput(data_type, "[true]", "[1]", {1, "[1]", "[true]"}); + this->AssertOutput(data_type, "[true]", "[0]", {}); + this->AssertOutput(data_type, "[false]", "[null]", {}, {1, "[1]", "[null]"}); + this->AssertOutput(data_type, "[true]", "[null]", {}, {1, "[1]", "[null]"}); + + this->AssertOutput(data_type, "[true, false, true, false]", "[0, 1, 1, 0]", + {2, "[1, 2]", "[false, true]"}); + this->AssertOutput(data_type, "[false, true, false, true]", "[1, 1, 0, 1]", + {3, "[1, 4]", "[false, true]"}); + + this->AssertOutput(data_type, "[true, true, true, false]", "[null, 0, 1, 0]", + {1, "[1]", "[true]"}, {2, "[1, 2]", "[null, true]"}); + this->AssertOutput(data_type, "[false, true, true, true]", "[1, 1, 0, null]", + {2, "[1, 2]", "[false, true]"}, + {3, "[1, 2, 3]", "[false, true, null]"}); + + this->AssertOutput(data_type, // linebreak for alignment + "[ 1, 0, 0, 1, 1, 1, null, 1, 0, 1, 0]", + "[null, 0, null, 1, 0, null, 1, 1, 1, 0, 1]", + {5, "[1, 2, 3, 5]", "[1, null, 1, 0]"}, + {8, "[2, 3, 5, 6, 8]", "[null, 1, null, 1, 0]"}); +} + REGISTER_TYPED_TEST_SUITE_P(REExREEFilterTest, SizeOutputWithNulls, - SizeOutputWithBooleans); + SizeOutputWithBooleans, FilteredOutputWithAFewTypes); template struct RunEndTypes {