Skip to content

Commit

Permalink
Add avx version
Browse files Browse the repository at this point in the history
Signed-off-by: Frank Du <frank.du@intel.com>
  • Loading branch information
frankdjx committed Aug 10, 2020
1 parent 10d2217 commit 553266e
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 39 deletions.
12 changes: 6 additions & 6 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -371,17 +371,17 @@ if(ARROW_COMPUTE)
compute/kernels/vector_sort.cc)

if(CXX_SUPPORTS_AVX2)
list(APPEND ARROW_SRCS compute/kernels/aggregate_sum_avx2.cc)
set_source_files_properties(compute/kernels/aggregate_sum_avx2.cc PROPERTIES
list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx2.cc)
set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
SKIP_PRECOMPILE_HEADERS ON)
set_source_files_properties(compute/kernels/aggregate_sum_avx2.cc PROPERTIES
set_source_files_properties(compute/kernels/aggregate_basic_avx2.cc PROPERTIES
COMPILE_FLAGS ${ARROW_AVX2_FLAG})
endif()
if(CXX_SUPPORTS_AVX512)
list(APPEND ARROW_SRCS compute/kernels/aggregate_sum_avx512.cc)
set_source_files_properties(compute/kernels/aggregate_sum_avx512.cc PROPERTIES
list(APPEND ARROW_SRCS compute/kernels/aggregate_basic_avx512.cc)
set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
SKIP_PRECOMPILE_HEADERS ON)
set_source_files_properties(compute/kernels/aggregate_sum_avx512.cc PROPERTIES
set_source_files_properties(compute/kernels/aggregate_basic_avx512.cc PROPERTIES
COMPILE_FLAGS ${ARROW_AVX512_FLAG})
endif()
endif()
Expand Down
21 changes: 16 additions & 5 deletions cpp/src/arrow/compute/kernels/aggregate_basic.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,9 @@ std::unique_ptr<KernelState> MeanInit(KernelContext* ctx, const KernelInitArgs&
// MinMax implementation

std::unique_ptr<KernelState> MinMaxInit(KernelContext* ctx, const KernelInitArgs& args) {
MinMaxInitState visitor(ctx, *args.inputs[0].type,
args.kernel->signature->out_type().type(),
static_cast<const MinMaxOptions&>(*args.options));
MinMaxInitState<SimdLevel::NONE> visitor(
ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
static_cast<const MinMaxOptions&>(*args.options));
return visitor.Create();
}

Expand All @@ -160,8 +160,7 @@ void AddBasicAggKernels(KernelInit init,

void AddMinMaxKernels(KernelInit init,
const std::vector<std::shared_ptr<DataType>>& types,
ScalarAggregateFunction* func,
SimdLevel::type simd_level = SimdLevel::NONE) {
ScalarAggregateFunction* func, SimdLevel::type simd_level) {
for (const auto& ty : types) {
// array[T] -> scalar[struct<min: T, max: T>]
auto out_ty = struct_({field("min", ty), field("max", ty)});
Expand Down Expand Up @@ -228,6 +227,18 @@ void RegisterScalarAggregateBasic(FunctionRegistry* registry) {
&default_minmax_options);
aggregate::AddMinMaxKernels(aggregate::MinMaxInit, {boolean()}, func.get());
aggregate::AddMinMaxKernels(aggregate::MinMaxInit, NumericTypes(), func.get());
// Add the SIMD variants for min max
#if defined(ARROW_HAVE_RUNTIME_AVX2)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX2)) {
aggregate::AddMinMaxAvx2AggKernels(func.get());
}
#endif
#if defined(ARROW_HAVE_RUNTIME_AVX512)
if (cpu_info->IsSupported(arrow::internal::CpuInfo::AVX512)) {
aggregate::AddMinMaxAvx512AggKernels(func.get());
}
#endif

DCHECK_OK(registry->AddFunction(std::move(func)));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,17 @@ std::unique_ptr<KernelState> MeanInitAvx2(KernelContext* ctx,
return visitor.Create();
}

// ----------------------------------------------------------------------
// MinMax implementation

std::unique_ptr<KernelState> MinMaxInitAvx2(KernelContext* ctx,
const KernelInitArgs& args) {
MinMaxInitState<SimdLevel::AVX2> visitor(
ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
static_cast<const MinMaxOptions&>(*args.options));
return visitor.Create();
}

void AddSumAvx2AggKernels(ScalarAggregateFunction* func) {
AddBasicAggKernels(SumInitAvx2, internal::SignedIntTypes(), int64(), func,
SimdLevel::AVX2);
Expand All @@ -81,6 +92,12 @@ void AddMeanAvx2AggKernels(ScalarAggregateFunction* func) {
SimdLevel::AVX2);
}

void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func) {
// Enable int types for AVX2 variants.
// No auto vectorize for float/double as it use fmin/fmax which has NaN handling.
AddMinMaxKernels(MinMaxInitAvx2, internal::IntTypes(), func, SimdLevel::AVX2);
}

} // namespace aggregate
} // namespace compute
} // namespace arrow
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ std::unique_ptr<KernelState> MeanInitAvx512(KernelContext* ctx,
return visitor.Create();
}

// ----------------------------------------------------------------------
// MinMax implementation

std::unique_ptr<KernelState> MinMaxInitAvx512(KernelContext* ctx,
const KernelInitArgs& args) {
MinMaxInitState<SimdLevel::AVX512> visitor(
ctx, *args.inputs[0].type, args.kernel->signature->out_type().type(),
static_cast<const MinMaxOptions&>(*args.options));
return visitor.Create();
}

void AddSumAvx512AggKernels(ScalarAggregateFunction* func) {
AddBasicAggKernels(SumInitAvx512, internal::SignedIntTypes(), int64(), func,
SimdLevel::AVX512);
Expand All @@ -82,6 +93,12 @@ void AddMeanAvx512AggKernels(ScalarAggregateFunction* func) {
SimdLevel::AVX512);
}

void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func) {
// Enable 32/64 int types for avx512 variants, no advantage on 8/16 int.
AddMinMaxKernels(MinMaxInitAvx512, {int32(), uint32(), int64(), uint64()}, func,
SimdLevel::AVX512);
}

} // namespace aggregate
} // namespace compute
} // namespace arrow
68 changes: 40 additions & 28 deletions cpp/src/arrow/compute/kernels/aggregate_basic_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,20 +40,27 @@ void AddBasicAggKernels(KernelInit init,
std::shared_ptr<DataType> out_ty, ScalarAggregateFunction* func,
SimdLevel::type simd_level = SimdLevel::NONE);

void AddMinMaxKernels(KernelInit init,
const std::vector<std::shared_ptr<DataType>>& types,
ScalarAggregateFunction* func,
SimdLevel::type simd_level = SimdLevel::NONE);

// SIMD variants for kernels
void AddSumAvx2AggKernels(ScalarAggregateFunction* func);
void AddMeanAvx2AggKernels(ScalarAggregateFunction* func);
void AddMinMaxAvx2AggKernels(ScalarAggregateFunction* func);

void AddSumAvx512AggKernels(ScalarAggregateFunction* func);
void AddMeanAvx512AggKernels(ScalarAggregateFunction* func);
void AddMinMaxAvx512AggKernels(ScalarAggregateFunction* func);

// ----------------------------------------------------------------------
// Sum implementation

template <int64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
template <int64_t kRoundSize, typename ArrowType, SimdLevel::type SimdLevel>
struct SumState {
using SumType = typename FindAccumulatorType<ArrowType>::Type;
using ThisType = SumState<kRoundSize, ArrowType, simd_level>;
using ThisType = SumState<kRoundSize, ArrowType, SimdLevel>;
using T = typename TypeTraits<ArrowType>::CType;
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;

Expand Down Expand Up @@ -214,10 +221,10 @@ struct SumState {
}
};

template <int64_t kRoundSize, SimdLevel::type simd_level>
struct SumState<kRoundSize, BooleanType, simd_level> {
template <int64_t kRoundSize, SimdLevel::type SimdLevel>
struct SumState<kRoundSize, BooleanType, SimdLevel> {
using SumType = typename FindAccumulatorType<BooleanType>::Type;
using ThisType = SumState<kRoundSize, BooleanType, simd_level>;
using ThisType = SumState<kRoundSize, BooleanType, SimdLevel>;

ThisType& operator+=(const ThisType& rhs) {
this->count += rhs.count;
Expand All @@ -236,10 +243,10 @@ struct SumState<kRoundSize, BooleanType, simd_level> {
typename SumType::c_type sum = 0;
};

template <uint64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
template <uint64_t kRoundSize, typename ArrowType, SimdLevel::type SimdLevel>
struct SumImpl : public ScalarAggregator {
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
using ThisType = SumImpl<kRoundSize, ArrowType, simd_level>;
using ThisType = SumImpl<kRoundSize, ArrowType, SimdLevel>;
using SumType = typename FindAccumulatorType<ArrowType>::Type;
using OutputType = typename TypeTraits<SumType>::ScalarType;

Expand All @@ -260,11 +267,11 @@ struct SumImpl : public ScalarAggregator {
}
}

SumState<kRoundSize, ArrowType, simd_level> state;
SumState<kRoundSize, ArrowType, SimdLevel> state;
};

template <int64_t kRoundSize, typename ArrowType, SimdLevel::type simd_level>
struct MeanImpl : public SumImpl<kRoundSize, ArrowType, simd_level> {
template <int64_t kRoundSize, typename ArrowType, SimdLevel::type SimdLevel>
struct MeanImpl : public SumImpl<kRoundSize, ArrowType, SimdLevel> {
void Finalize(KernelContext*, Datum* out) override {
const bool is_valid = this->state.count > 0;
const double divisor = static_cast<double>(is_valid ? this->state.count : 1UL);
Expand Down Expand Up @@ -312,12 +319,12 @@ struct SumLikeInit {
// ----------------------------------------------------------------------
// MinMax implementation

template <typename ArrowType, typename Enable = void>
template <typename ArrowType, SimdLevel::type SimdLevel, typename Enable = void>
struct MinMaxState {};

template <typename ArrowType>
struct MinMaxState<ArrowType, enable_if_boolean<ArrowType>> {
using ThisType = MinMaxState<ArrowType>;
template <typename ArrowType, SimdLevel::type SimdLevel>
struct MinMaxState<ArrowType, SimdLevel, enable_if_boolean<ArrowType>> {
using ThisType = MinMaxState<ArrowType, SimdLevel>;
using T = typename ArrowType::c_type;

ThisType& operator+=(const ThisType& rhs) {
Expand All @@ -339,9 +346,9 @@ struct MinMaxState<ArrowType, enable_if_boolean<ArrowType>> {
bool has_values = false;
};

template <typename ArrowType>
struct MinMaxState<ArrowType, enable_if_integer<ArrowType>> {
using ThisType = MinMaxState<ArrowType>;
template <typename ArrowType, SimdLevel::type SimdLevel>
struct MinMaxState<ArrowType, SimdLevel, enable_if_integer<ArrowType>> {
using ThisType = MinMaxState<ArrowType, SimdLevel>;
using T = typename ArrowType::c_type;

ThisType& operator+=(const ThisType& rhs) {
Expand All @@ -363,9 +370,9 @@ struct MinMaxState<ArrowType, enable_if_integer<ArrowType>> {
bool has_values = false;
};

template <typename ArrowType>
struct MinMaxState<ArrowType, enable_if_floating_point<ArrowType>> {
using ThisType = MinMaxState<ArrowType>;
template <typename ArrowType, SimdLevel::type SimdLevel>
struct MinMaxState<ArrowType, SimdLevel, enable_if_floating_point<ArrowType>> {
using ThisType = MinMaxState<ArrowType, SimdLevel>;
using T = typename ArrowType::c_type;

ThisType& operator+=(const ThisType& rhs) {
Expand All @@ -387,11 +394,11 @@ struct MinMaxState<ArrowType, enable_if_floating_point<ArrowType>> {
bool has_values = false;
};

template <typename ArrowType>
template <typename ArrowType, SimdLevel::type SimdLevel>
struct MinMaxImpl : public ScalarAggregator {
using ArrayType = typename TypeTraits<ArrowType>::ArrayType;
using ThisType = MinMaxImpl<ArrowType>;
using StateType = MinMaxState<ArrowType>;
using ThisType = MinMaxImpl<ArrowType, SimdLevel>;
using StateType = MinMaxState<ArrowType, SimdLevel>;

MinMaxImpl(const std::shared_ptr<DataType>& out_type, const MinMaxOptions& options)
: out_type(out_type), options(options) {}
Expand Down Expand Up @@ -442,7 +449,7 @@ struct MinMaxImpl : public ScalarAggregator {

std::shared_ptr<DataType> out_type;
MinMaxOptions options;
MinMaxState<ArrowType> state;
MinMaxState<ArrowType, SimdLevel> state;

private:
StateType ConsumeWithNulls(const ArrayType& arr) const {
Expand Down Expand Up @@ -503,8 +510,12 @@ struct MinMaxImpl : public ScalarAggregator {
}
};

struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType> {
using MinMaxImpl::MinMaxImpl;
template <SimdLevel::type SimdLevel>
struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType, SimdLevel> {
using StateType = MinMaxState<BooleanType, SimdLevel>;
using ArrayType = typename TypeTraits<BooleanType>::ArrayType;
using MinMaxImpl<BooleanType, SimdLevel>::MinMaxImpl;
using MinMaxImpl<BooleanType, SimdLevel>::options;

void Consume(KernelContext*, const ExecBatch& batch) override {
StateType local;
Expand All @@ -530,6 +541,7 @@ struct BooleanMinMaxImpl : public MinMaxImpl<BooleanType> {
}
};

template <SimdLevel::type SimdLevel>
struct MinMaxInitState {
std::unique_ptr<KernelState> state;
KernelContext* ctx;
Expand All @@ -550,13 +562,13 @@ struct MinMaxInitState {
}

Status Visit(const BooleanType&) {
state.reset(new BooleanMinMaxImpl(out_type, options));
state.reset(new BooleanMinMaxImpl<SimdLevel>(out_type, options));
return Status::OK();
}

template <typename Type>
enable_if_number<Type, Status> Visit(const Type&) {
state.reset(new MinMaxImpl<Type>(out_type, options));
state.reset(new MinMaxImpl<Type, SimdLevel>(out_type, options));
return Status::OK();
}

Expand Down

0 comments on commit 553266e

Please sign in to comment.