Skip to content

Commit

Permalink
Vectorize bitset from string construction (#4839)
Browse files Browse the repository at this point in the history
Co-authored-by: Stephan T. Lavavej <stl@nuwen.net>
  • Loading branch information
AlexGuteniev and StephanTLavavej authored Oct 12, 2024
1 parent faccf00 commit ab555ad
Show file tree
Hide file tree
Showing 5 changed files with 423 additions and 14 deletions.
1 change: 1 addition & 0 deletions benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ function(add_benchmark name)
target_link_libraries(benchmark-${name} PRIVATE benchmark::benchmark)
endfunction()

add_benchmark(bitset_from_string src/bitset_from_string.cpp)
add_benchmark(bitset_to_string src/bitset_to_string.cpp)
add_benchmark(efficient_nonlocking_print src/efficient_nonlocking_print.cpp)
add_benchmark(find_and_count src/find_and_count.cpp)
Expand Down
89 changes: 89 additions & 0 deletions benchmarks/src/bitset_from_string.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <array>
#include <benchmark/benchmark.h>
#include <bitset>
#include <cstddef>
#include <random>

using namespace std;

namespace {
template <size_t N, class charT, size_t Min_length>
auto random_digits_init() {
mt19937_64 rnd{};
uniform_int_distribution<> dis('0', '1');

constexpr size_t number_of_bitsets = (Min_length + N - 1) / N;
static_assert(number_of_bitsets != 0);

constexpr size_t actual_size = number_of_bitsets * (N + 1); // +1 for \0

array<charT, actual_size> result;

for (size_t i = 0; i < actual_size; ++i) {
if (i % (N + 1) == N) {
result[i] = charT{'\0'}; // write null terminators
} else {
result[i] = static_cast<charT>(dis(rnd)); // fill random digits
}
}

return result;
}

enum class length_type : bool { char_count, null_term };

template <size_t N, class charT>
const auto random_digits = random_digits_init<N, charT, 2048>();

template <length_type Length, size_t N, class charT>
void BM_bitset_from_string(benchmark::State& state) {
const auto& digit_array = random_digits<N, charT>;
for (auto _ : state) {
benchmark::DoNotOptimize(digit_array);
const auto arr_data = digit_array.data();
const auto arr_size = digit_array.size();
for (size_t pos = 0; pos != arr_size; pos += N + 1) {
if constexpr (Length == length_type::char_count) {
bitset<N> bs(arr_data + pos, N);
benchmark::DoNotOptimize(bs);
} else {
bitset<N> bs(arr_data + pos);
benchmark::DoNotOptimize(bs);
}
}
}
}
} // namespace

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::char_count, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::char_count, 2048, wchar_t>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, char>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, char>);

BENCHMARK(BM_bitset_from_string<length_type::null_term, 15, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 16, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 36, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 64, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 512, wchar_t>);
BENCHMARK(BM_bitset_from_string<length_type::null_term, 2048, wchar_t>);

BENCHMARK_MAIN();
36 changes: 34 additions & 2 deletions stl/inc/bitset
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,19 @@ _STL_DISABLE_CLANG_WARNINGS
#endif // !defined(_STD_BITSET_TO_STREAM_STACK_RESERVATION)

#if _USE_STD_VECTOR_ALGORITHMS
// These bitset functions sometimes assume that the bit array has zero padding to a multiple of 2 or 4 bytes.
// The assumptions hold true even for the vNext suggestion to use smaller types for small bitsets (see GH-1498)
// due to vectorization thresholds.

extern "C" {
__declspec(noalias) void __stdcall __std_bitset_to_string_1(
char* _Dest, const void* _Src, size_t _Size_bits, char _Elem0, char _Elem1) noexcept;
__declspec(noalias) void __stdcall __std_bitset_to_string_2(
wchar_t* _Dest, const void* _Src, size_t _Size_bits, wchar_t _Elem0, wchar_t _Elem1) noexcept;
__declspec(noalias) bool __stdcall __std_bitset_from_string_1(void* _Dest, const char* _Src, size_t _Size_bytes,
size_t _Size_bits, size_t _Size_chars, char _Elem0, char _Elem1) noexcept;
__declspec(noalias) bool __stdcall __std_bitset_from_string_2(void* _Dest, const wchar_t* _Src, size_t _Size_bytes,
size_t _Size_bits, size_t _Size_chars, wchar_t _Elem0, wchar_t _Elem1) noexcept;
} // extern "C"
#endif // _USE_STD_VECTOR_ALGORITHMS

Expand Down Expand Up @@ -115,6 +123,30 @@ public:
private:
template <class _Traits, class _Elem>
_CONSTEXPR23 void _Construct(const _Elem* const _Ptr, size_t _Count, const _Elem _Elem0, const _Elem _Elem1) {
#if _USE_STD_VECTOR_ALGORITHMS
constexpr size_t _Bitset_from_string_vector_threshold = 16;
if constexpr (_Bits >= _Bitset_from_string_vector_threshold
&& _Is_implementation_handled_char_traits<_Traits> && sizeof(_Elem) <= 2) {
if (!_STD _Is_constant_evaluated()) {
bool _Result;

if constexpr (sizeof(_Elem) == 1) {
_Result = __std_bitset_from_string_1(_Array, reinterpret_cast<const char*>(_Ptr), sizeof(_Array),
_Bits, _Count, static_cast<char>(_Elem0), static_cast<char>(_Elem1));
} else {
_STL_INTERNAL_STATIC_ASSERT(sizeof(_Elem) == 2);
_Result = __std_bitset_from_string_2(_Array, reinterpret_cast<const wchar_t*>(_Ptr), sizeof(_Array),
_Bits, _Count, static_cast<wchar_t>(_Elem0), static_cast<wchar_t>(_Elem1));
}

if (!_Result) {
_Xinv();
}

return;
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
if (_Count > _Bits) {
for (size_t _Idx = _Bits; _Idx < _Count; ++_Idx) {
const auto _Ch = _Ptr[_Idx];
Expand Down Expand Up @@ -462,8 +494,8 @@ public:
_CONSTEXPR23 void _To_string(
_Elem* const _Buf, const size_t _Len, const _Elem _Elem0, const _Elem _Elem1) const noexcept {
#if _USE_STD_VECTOR_ALGORITHMS
constexpr size_t _Bitset_vector_threshold = 32;
if constexpr (_Bits >= _Bitset_vector_threshold && is_integral_v<_Elem> && sizeof(_Elem) <= 2) {
constexpr size_t _Bitset_to_string_vector_threshold = 32;
if constexpr (_Bits >= _Bitset_to_string_vector_threshold && is_integral_v<_Elem> && sizeof(_Elem) <= 2) {
if (!_Is_constant_evaluated()) {
if constexpr (sizeof(_Elem) == 1) {
__std_bitset_to_string_1(reinterpret_cast<char*>(_Buf), _Array, _Len, static_cast<char>(_Elem0),
Expand Down
Loading

0 comments on commit ab555ad

Please sign in to comment.