Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-927] Add macro __AVX512BW__ check for different CPU architecture #975

Merged
merged 2 commits into from
Jun 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion native-sql-engine/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -495,7 +495,7 @@ file(COPY codegen/common/hash_relation_number.h DESTINATION ${root_directory}/re

add_definitions(-DNATIVESQL_SRC_PATH="${root_directory}/releases")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-attributes")
set(NATIVE_AVX512_FLAG "-march=icelake-server")
set(NATIVE_AVX512_FLAG "-march=native")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${NATIVE_AVX512_FLAG}")
set(SPARK_COLUMNAR_PLUGIN_SRCS
jni/jni_wrapper.cc
Expand Down
19 changes: 16 additions & 3 deletions native-sql-engine/cpp/src/operators/columnar_to_row_converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ arrow::Status ColumnarToRowConverter::Init(
int32_t j = 0;
int32_t* length_data = lengths_.data();

#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
__m256i x7_8x = _mm256_load_si256((__m256i*)x_7);
__m256i x8_8x = _mm256_load_si256((__m256i*)x_8);
Expand Down Expand Up @@ -172,6 +173,7 @@ arrow::Status ColumnarToRowConverter::Init(
_mm_prefetch(&offsetarray[j + (128 + 128) / sizeof(offset_type)], _MM_HINT_T0);
}
}
#endif

for (j; j < num_rows_; j++) {
offset_type length = offsetarray[j + 1] - offsetarray[j];
Expand All @@ -192,10 +194,13 @@ arrow::Status ColumnarToRowConverter::Init(
// allocate one more cache line to ease avx operations
if (buffer_ == nullptr || buffer_->capacity() < total_memory_size + 64) {
ARROW_ASSIGN_OR_RAISE(buffer_, AllocateBuffer(total_memory_size + 64, memory_pool_));
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
memset(buffer_->mutable_data() + total_memory_size, 0,
buffer_->capacity() - total_memory_size);
} else {
} else
#endif
{
memset(buffer_->mutable_data(), 0, buffer_->capacity());
}
}
Expand Down Expand Up @@ -384,6 +389,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
std::vector<uint8_t>& typewidth,
std::vector<std::shared_ptr<arrow::Array>>& arrays,
bool support_avx512) {
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
__m256i fill_0_8x;
fill_0_8x = _mm256_xor_si256(fill_0_8x, fill_0_8x);
Expand All @@ -395,6 +401,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
}
}
}
#endif

for (auto col_index = 0; col_index < num_cols; col_index++) {
auto& array = arrays[col_index];
Expand Down Expand Up @@ -427,6 +434,7 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
offset_type length = BinaryOffsets[j + 1] - BinaryOffsets[j];
auto value = &dataptrs[col_index][2][BinaryOffsets[j]];

#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
// write the variable value
offset_type k;
Expand All @@ -440,7 +448,9 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
__m256i v = _mm256_maskz_loadu_epi8(mask, value + k);
_mm256_mask_storeu_epi8(buffer_address + offsets[j] + buffer_cursor[j] + k,
mask, v);
} else {
} else
#endif
{
// write the variable value
memcpy(buffer_address + offsets[j] + buffer_cursor[j], value, length);
}
Expand Down Expand Up @@ -508,11 +518,14 @@ inline arrow::Status FillBuffer(int32_t& row_start, int32_t batch_rows,
for (auto j = row_start; j < row_start + batch_rows; j++) {
if (nullvec[col_index] || (!array->IsNull(j))) {
const uint8_t* srcptr = dataptr + (j << shift);
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512)) {
__m256i v = _mm256_maskz_loadu_epi8(mask, srcptr);
_mm256_mask_storeu_epi8(buffer_address_tmp + offsets[j], mask, v);
_mm_prefetch(srcptr + 64, _MM_HINT_T0);
} else {
} else
#endif
{
memcpy(buffer_address_tmp + offsets[j], srcptr, typewidth[col_index]);
}
} else {
Expand Down
5 changes: 4 additions & 1 deletion native-sql-engine/cpp/src/shuffle/splitter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,7 @@ arrow::Status Splitter::SplitBinaryType(const uint8_t* src_addr, const T* src_of
<< " strlen = " << strlength << std::endl;
}
auto value_src_ptr = src_addr + src_offset_addr[src_offset];
#ifdef __AVX512BW__
if (ARROW_PREDICT_TRUE(support_avx512_)) {
// write the variable value
T k;
Expand All @@ -1260,7 +1261,9 @@ arrow::Status Splitter::SplitBinaryType(const uint8_t* src_addr, const T* src_of
auto mask = (1L << (strlength - k)) - 1;
__m256i v = _mm256_maskz_loadu_epi8(mask, value_src_ptr + k);
_mm256_mask_storeu_epi8(dst_value_base + k, mask, v);
} else {
} else
#endif
{
memcpy(dst_value_base, value_src_ptr, strlength);
}
dst_value_base += strlength;
Expand Down