diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 91e67fb423a23..3be8cad0b7432 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -371,17 +371,17 @@ if(ARROW_COMPUTE) compute/kernels/vector_sort.cc) if(CXX_SUPPORTS_AVX2) - list(APPEND ARROW_SRCS compute/kernels/aggregate_sum_avx2.cc) - set_source_files_properties(compute/kernels/aggregate_sum_avx2.cc PROPERTIES + list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx2.cc) + set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON) - set_source_files_properties(compute/kernels/aggregate_sum_avx2.cc PROPERTIES + set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES COMPILE_FLAGS ${ARROW_AVX2_FLAG}) endif() if(CXX_SUPPORTS_AVX512) - list(APPEND ARROW_SRCS compute/kernels/aggregate_sum_avx512.cc) - set_source_files_properties(compute/kernels/aggregate_sum_avx512.cc PROPERTIES + list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx512.cc) + set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES SKIP_PRECOMPILE_HEADERS ON) - set_source_files_properties(compute/kernels/aggregate_sum_avx512.cc PROPERTIES + set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES COMPILE_FLAGS ${ARROW_AVX512_FLAG}) endif() endif() diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc b/cpp/src/arrow/compute/kernels/aggregate_basic.cc index 97fcc2fdaa839..b0e4267ac3384 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc @@ -131,9 +131,9 @@ std::unique_ptr MeanInit(KernelContext* ctx, const KernelInitArgs& // MinMax implementation std::unique_ptr MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) { - MinMaxInitState visitor(ctx, *args.inputs[0].type, - args.kernel->signature->out_type().type(), - static_cast(*args.options)); + MinMaxInitState visitor( + ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(), + static_cast(*args.options)); return visitor.Create(); } @@ -160,8 +160,7 @@ void AddBasicAggKernels(KernelInit init, void AddMinMaxKernels(KernelInit init, const std::vector>& types, - ScalarAggregateFunction* func, - SimdLevel::type simd_level = SimdLevel::NONE) { + ScalarAggregateFunction* func, SimdLevel::type simd_level) { for (const auto& ty : types) { // array[T] -> scalar[struct] auto out_ty = struct_({field("min", ty), field("max", ty)}); @@ -228,6 +227,18 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) { &default_minmax_options); aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get()); aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get()); + // Add the SIMD variants for min max +#if defined(ARROW_HAVE_RUNTIME_AVX2) + if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) { + aggregate::AddMinMaxAvx2AggKernels(func.get()); + } +#endif +#if defined(ARROW_HAVE_RUNTIME_AVX512) + if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) { + aggregate::AddMinMaxAvx512AggKernels(func.get()); + } +#endif + DCHECK_OK(registry->AddFunction(std::move(func))); } diff --git a/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc similarity index 80% rename from cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc rename to cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc index 2811c4cd86539..e0c1118c71494 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_sum_avx2.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx2.cc @@ -67,6 +67,17 @@ std::unique_ptr MeanInitAvx2(KernelContext* ctx, return visitor.Create(); } +// ---------------------------------------------------------------------- +// MinMax implementation + +std::unique_ptr MinMaxInitAvx2(KernelContext* ctx, + const KernelInitArgs& args) { + MinMaxInitState visitor( + ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(), + static_cast(*args.options)); + return visitor.Create(); +} + void AddSumAvx2AggKernels(ScalarAggregateFunction* func) { AddBasicAggKernels(SumInitAvx2, internal::SignedIntTypes(), int64(), func, SimdLevel::AVX2); @@ -81,6 +92,12 @@ void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) { SimdLevel::AVX2); } +void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) { + // Enable int types for AVX2 variants. + // No auto vectorize for float/double as it use fmin/fmax which has NaN handling. + AddMinMaxKernels(MinMaxInitAvx2, internal::IntTypes(), func, SimdLevel::AVX2); +} + } // namespace aggregate } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc similarity index 80% rename from cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc rename to cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc index 00408027e1ff3..c2c748d3af7d2 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_sum_avx512.cc +++ b/cpp/src/arrow/compute/kernels/aggregate_basic_avx512.cc @@ -68,6 +68,17 @@ std::unique_ptr MeanInitAvx512(KernelContext* ctx, return visitor.Create(); } +// ---------------------------------------------------------------------- +// MinMax implementation + +std::unique_ptr MinMaxInitAvx512(KernelContext* ctx, + const KernelInitArgs& args) { + MinMaxInitState visitor( + ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(), + static_cast(*args.options)); + return visitor.Create(); +} + void AddSumAvx512AggKernels(ScalarAggregateFunction* func) { AddBasicAggKernels(SumInitAvx512, internal::SignedIntTypes(), int64(), func, SimdLevel::AVX512); @@ -82,6 +93,12 @@ void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) { SimdLevel::AVX512); } +void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) { + // Enable 32/64 int types for avx512 variants, no advantage on 8/16 int. + AddMinMaxKernels(MinMaxInitAvx512, {int32(), uint32(), int64(), uint64()}, func, + SimdLevel::AVX512); +} + } // namespace aggregate } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h index e5676cfbf40a4..a80d4c9579f87 100644 --- a/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h +++ b/cpp/src/arrow/compute/kernels/aggregate_basic_internal.h @@ -40,20 +40,27 @@ void AddBasicAggKernels(KernelInit init, std::shared_ptr out_ty, ScalarAggregateFunction* func, SimdLevel::type simd_level = SimdLevel::NONE); +void AddMinMaxKernels(KernelInit init, + const std::vector>& types, + ScalarAggregateFunction* func, + SimdLevel::type simd_level = SimdLevel::NONE); + // SIMD variants for kernels void AddSumAvx2AggKernels(ScalarAggregateFunction* func); void AddMeanAvx2AggKernels(ScalarAggregateFunction* func); +void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func); void AddSumAvx512AggKernels(ScalarAggregateFunction* func); void AddMeanAvx512AggKernels(ScalarAggregateFunction* func); +void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func); // ---------------------------------------------------------------------- // Sum implementation -template +template struct SumState { using SumType = typename FindAccumulatorType::Type; - using ThisType = SumState; + using ThisType = SumState; using T = typename TypeTraits::CType; using ArrayType = typename TypeTraits::ArrayType; @@ -214,10 +221,10 @@ struct SumState { } }; -template -struct SumState { +template +struct SumState { using SumType = typename FindAccumulatorType::Type; - using ThisType = SumState; + using ThisType = SumState; ThisType& operator+=(const ThisType& rhs) { this->count += rhs.count; @@ -236,10 +243,10 @@ struct SumState { typename SumType::c_type sum = 0; }; -template +template struct SumImpl : public ScalarAggregator { using ArrayType = typename TypeTraits::ArrayType; - using ThisType = SumImpl; + using ThisType = SumImpl; using SumType = typename FindAccumulatorType::Type; using OutputType = typename TypeTraits::ScalarType; @@ -260,11 +267,11 @@ struct SumImpl : public ScalarAggregator { } } - SumState state; + SumState state; }; -template -struct MeanImpl : public SumImpl { +template +struct MeanImpl : public SumImpl { void Finalize(KernelContext*, Datum* out) override { const bool is_valid = this->state.count > 0; const double divisor = static_cast(is_valid ? this->state.count : 1UL); @@ -312,12 +319,12 @@ struct SumLikeInit { // ---------------------------------------------------------------------- // MinMax implementation -template +template struct MinMaxState {}; -template -struct MinMaxState> { - using ThisType = MinMaxState; +template +struct MinMaxState> { + using ThisType = MinMaxState; using T = typename ArrowType::c_type; ThisType& operator+=(const ThisType& rhs) { @@ -339,9 +346,9 @@ struct MinMaxState> { bool has_values = false; }; -template -struct MinMaxState> { - using ThisType = MinMaxState; +template +struct MinMaxState> { + using ThisType = MinMaxState; using T = typename ArrowType::c_type; ThisType& operator+=(const ThisType& rhs) { @@ -363,9 +370,9 @@ struct MinMaxState> { bool has_values = false; }; -template -struct MinMaxState> { - using ThisType = MinMaxState; +template +struct MinMaxState> { + using ThisType = MinMaxState; using T = typename ArrowType::c_type; ThisType& operator+=(const ThisType& rhs) { @@ -387,11 +394,11 @@ struct MinMaxState> { bool has_values = false; }; -template +template struct MinMaxImpl : public ScalarAggregator { using ArrayType = typename TypeTraits::ArrayType; - using ThisType = MinMaxImpl; - using StateType = MinMaxState; + using ThisType = MinMaxImpl; + using StateType = MinMaxState; MinMaxImpl(const std::shared_ptr& out_type, const MinMaxOptions& options) : out_type(out_type), options(options) {} @@ -442,7 +449,7 @@ struct MinMaxImpl : public ScalarAggregator { std::shared_ptr out_type; MinMaxOptions options; - MinMaxState state; + MinMaxState state; private: StateType ConsumeWithNulls(const ArrayType& arr) const { @@ -503,8 +510,12 @@ struct MinMaxImpl : public ScalarAggregator { } }; -struct BooleanMinMaxImpl : public MinMaxImpl { - using MinMaxImpl::MinMaxImpl; +template +struct BooleanMinMaxImpl : public MinMaxImpl { + using StateType = MinMaxState; + using ArrayType = typename TypeTraits::ArrayType; + using MinMaxImpl::MinMaxImpl; + using MinMaxImpl::options; void Consume(KernelContext*, const ExecBatch& batch) override { StateType local; @@ -530,6 +541,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl { } }; +template struct MinMaxInitState { std::unique_ptr state; KernelContext* ctx; @@ -550,13 +562,13 @@ struct MinMaxInitState { } Status Visit(const BooleanType&) { - state.reset(new BooleanMinMaxImpl(out_type, options)); + state.reset(new BooleanMinMaxImpl(out_type, options)); return Status::OK(); } template enable_if_number Visit(const Type&) { - state.reset(new MinMaxImpl(out_type, options)); + state.reset(new MinMaxImpl(out_type, options)); return Status::OK(); }