From e965e1af24a0a5746958b252b253537ef5a45192 Mon Sep 17 00:00:00 2001 From: Jin Shang Date: Tue, 10 Jan 2023 11:34:41 +0000 Subject: [PATCH] GH-15290: [C++][Compute] Optimize IfElse kernel AAS/ASA case when the scalar is null --- .../arrow/compute/kernels/scalar_if_else.cc | 41 +++++++++++++++++-- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/compute/kernels/scalar_if_else.cc b/cpp/src/arrow/compute/kernels/scalar_if_else.cc index bb3ac6635e080..d36f1aa2ed36b 100644 --- a/cpp/src/arrow/compute/kernels/scalar_if_else.cc +++ b/cpp/src/arrow/compute/kernels/scalar_if_else.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include #include "arrow/array/builder_nested.h" #include "arrow/array/builder_primitive.h" #include "arrow/array/builder_time.h" @@ -22,6 +23,8 @@ #include "arrow/compute/api.h" #include "arrow/compute/kernels/codegen_internal.h" #include "arrow/compute/kernels/copy_data_internal.h" +#include "arrow/result.h" +#include "arrow/status.h" #include "arrow/util/bit_block_counter.h" #include "arrow/util/bit_run_reader.h" #include "arrow/util/bitmap.h" @@ -470,6 +473,10 @@ struct IfElseFunctor(1), right.length * sizeof(T)); + if (!left.is_valid) { // left is null scalar, only need to copy right data to out + return Status::OK(); + } + // selectively copy values from left data T left_data = internal::UnboxScalar::Unbox(left); @@ -490,6 +497,10 @@ struct IfElseFunctor(1); std::memcpy(out_values, left_data, left.length * sizeof(T)); + if (!right.is_valid) { // right is null scalar, only need to copy left data to out + return Status::OK(); + } + T right_data = internal::UnboxScalar::Unbox(right); RunIfElseLoopInverted(cond, [&](int64_t data_offset, int64_t num_elems) { @@ -723,12 +734,24 @@ struct IfElseFunctor> { // ASA static Status Call(KernelContext* ctx, const ArraySpan& cond, const Scalar& left, const ArraySpan& right, ExecResult* out) { - std::string_view left_data = internal::UnboxScalar::Unbox(left); - auto left_size = static_cast(left_data.size()); - const auto* right_offsets = right.GetValues(1); const uint8_t* right_data = right.buffers[2].data; + if (!left.is_valid) { // left is null scalar, only need to copy left data to out + auto& out_data = *out->array_data(); + auto offset_length = (cond.length + 1) * sizeof(OffsetType); + ARROW_ASSIGN_OR_RAISE(out_data.buffers[1], ctx->Allocate(offset_length)); + memcpy(out_data.buffers[1]->mutable_data(), right_offsets, offset_length); + + auto right_data_length = right_offsets[right.length] - right_offsets[0]; + ARROW_ASSIGN_OR_RAISE(out_data.buffers[2], ctx->Allocate(right_data_length)); + memcpy(out_data.buffers[2]->mutable_data(), right_data, right_data_length); + return Status::OK(); + } + + std::string_view left_data = internal::UnboxScalar::Unbox(left); + auto left_size = static_cast(left_data.size()); + // allocate data buffer conservatively int64_t data_buff_alloc = left_size * cond.length + right_offsets[right.length] - right_offsets[0]; @@ -754,6 +777,18 @@ struct IfElseFunctor> { const auto* left_offsets = left.GetValues(1); const uint8_t* left_data = left.buffers[2].data; + if (!right.is_valid) { // right is null scalar, only need to copy left data to out + auto& out_data = *out->array_data(); + auto offset_length = (cond.length + 1) * sizeof(OffsetType); + ARROW_ASSIGN_OR_RAISE(out_data.buffers[1], ctx->Allocate(offset_length)); + memcpy(out_data.buffers[1]->mutable_data(), left_offsets, offset_length); + + auto left_data_length = left_offsets[left.length] - left_offsets[0]; + ARROW_ASSIGN_OR_RAISE(out_data.buffers[2], ctx->Allocate(left_data_length)); + memcpy(out_data.buffers[2]->mutable_data(), left_data, left_data_length); + return Status::OK(); + } + std::string_view right_data = internal::UnboxScalar::Unbox(right); auto right_size = static_cast(right_data.size());