Skip to content

Commit

Permalink
Vectorize find_first_of for 4 and 8 byte elements (#4587)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
AlexGuteniev and StephanTLavavej authored Apr 19, 2024
1 parent f54203f commit 1b06c52
Show file tree
Hide file tree
Showing 3 changed files with 349 additions and 120 deletions.
22 changes: 9 additions & 13 deletions benchmarks/src/find_first_of.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,18 +33,14 @@ void bm(benchmark::State& state) {
}
}

#define ARGS \
Args({2, 3}) \
->Args({7, 4}) \
->Args({9, 3}) \
->Args({22, 5}) \
->Args({58, 2}) \
->Args({102, 4}) \
->Args({325, 1}) \
->Args({1011, 11}) \
->Args({3056, 7});

BENCHMARK(bm<uint8_t>)->ARGS;
BENCHMARK(bm<uint16_t>)->ARGS;
void common_args(auto bm) {
bm->Args({2, 3})->Args({7, 4})->Args({9, 3})->Args({22, 5})->Args({58, 2});
bm->Args({102, 4})->Args({325, 1})->Args({1011, 11})->Args({3056, 7});
}

BENCHMARK(bm<uint8_t>)->Apply(common_args);
BENCHMARK(bm<uint16_t>)->Apply(common_args);
BENCHMARK(bm<uint32_t>)->Apply(common_args);
BENCHMARK(bm<uint64_t>)->Apply(common_args);

BENCHMARK_MAIN();
14 changes: 11 additions & 3 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,10 @@ const void* __stdcall __std_find_first_of_trivial_1(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_2(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_4(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;
const void* __stdcall __std_find_first_of_trivial_8(
const void* _First1, const void* _Last1, const void* _First2, const void* _Last2) noexcept;

__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept;
Expand Down Expand Up @@ -202,6 +206,12 @@ _Ty1* _Find_first_of_vectorized(
} else if constexpr (sizeof(_Ty1) == 2) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_2(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 4) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_4(_First1, _Last1, _First2, _Last2)));
} else if constexpr (sizeof(_Ty1) == 8) {
return const_cast<_Ty1*>(
static_cast<const _Ty1*>(::__std_find_first_of_trivial_8(_First1, _Last1, _First2, _Last2)));
} else {
static_assert(false, "Unexpected size");
}
Expand Down Expand Up @@ -230,9 +240,7 @@ _INLINE_VAR constexpr ptrdiff_t _Threshold_find_first_of = 16;

// Can we activate the vector algorithms for find_first_of?
template <class _It1, class _It2, class _Pr>
constexpr bool _Vector_alg_in_find_first_of_is_safe =
_Equal_memcmp_is_safe<_It1, _It2, _Pr> // can replace value comparison with bitwise comparison
&& sizeof(_Iter_value_t<_It1>) <= 2; // pcmpestri compatible size
constexpr bool _Vector_alg_in_find_first_of_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>;

// Can we activate the vector algorithms for replace?
template <class _Iter, class _Ty1>
Expand Down
Loading

0 comments on commit 1b06c52

Please sign in to comment.