Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dedicated ranges::minmax vectorization that does not unnecessarily track element pointer #4384

Merged
merged 36 commits into from
Feb 16, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
2082d8f
initial implementation
AlexGuteniev Feb 10, 2024
e38454c
benchmark
AlexGuteniev Feb 10, 2024
108ed21
move vectorization out
AlexGuteniev Feb 10, 2024
838da2a
missing max
AlexGuteniev Feb 10, 2024
1b61bb0
bemchmark copypaste error
AlexGuteniev Feb 10, 2024
15cd2d4
_val is less confusing
AlexGuteniev Feb 10, 2024
6ea297a
valement
AlexGuteniev Feb 10, 2024
ddedb4a
format
AlexGuteniev Feb 10, 2024
09469ab
format
AlexGuteniev Feb 10, 2024
461ca2b
no top level const in declaration
AlexGuteniev Feb 10, 2024
80e2470
header unit
AlexGuteniev Feb 10, 2024
5448180
ADL
AlexGuteniev Feb 10, 2024
0eb5322
check projection
AlexGuteniev Feb 10, 2024
939dc33
copypasta cleanup
AlexGuteniev Feb 11, 2024
be2aa9b
Improve fallback
AlexGuteniev Feb 11, 2024
1ecc70e
format
AlexGuteniev Feb 11, 2024
b5be1e4
Update stl/inc/__msvc_minmax.hpp
AlexGuteniev Feb 11, 2024
c01bf7d
turn tails
AlexGuteniev Feb 11, 2024
99917b4
Use SSE responsible
AlexGuteniev Feb 11, 2024
e55e7f2
empty vector check
AlexGuteniev Feb 11, 2024
35626ce
Don't mimic `_Minmax_element` fallback
AlexGuteniev Feb 11, 2024
47bc608
clear pointers
AlexGuteniev Feb 11, 2024
c3ba612
no horizontal position
AlexGuteniev Feb 11, 2024
def1e7a
unload extra _Load
AlexGuteniev Feb 11, 2024
f149a83
We'll hide Slavic accent
AlexGuteniev Feb 11, 2024
5a91a03
non-type template param is already const enough
AlexGuteniev Feb 11, 2024
459ae03
constant result
AlexGuteniev Feb 11, 2024
e9185a9
range of `is_constant_evaluated()`
AlexGuteniev Feb 11, 2024
cf1e3da
<!> scope for `_M_ARM64EC` <!>
AlexGuteniev Feb 11, 2024
1743e20
the who understands English articles
AlexGuteniev Feb 11, 2024
1603f0b
Don't need `_To_address` for `initializer_list`.
StephanTLavavej Feb 11, 2024
13155b3
`_STL_ASSERT` non-empty inputs, update citations.
StephanTLavavej Feb 11, 2024
7c678db
Cleanup comment punctuation.
StephanTLavavej Feb 11, 2024
c201a3f
Avoid risk by always guarding with `_Use_sse42()`.
StephanTLavavej Feb 11, 2024
35fd4b7
Make room for comments by renaming lambda `_First` and `_Second` to `…
StephanTLavavej Feb 12, 2024
d604bfd
Comment all `_Minmax_traits_MEOW` intrinsics above SSE2.
StephanTLavavej Feb 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 39 additions & 0 deletions benchmarks/src/minmax_element.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ enum class Op {
Min,
Max,
Both,
Min_val,
Max_val,
Both_val,
};

using namespace std;
Expand All @@ -38,49 +41,85 @@ void bm(benchmark::State& state) {
benchmark::DoNotOptimize(ranges::max_element(a));
} else if constexpr (Operation == Op::Both) {
benchmark::DoNotOptimize(ranges::minmax_element(a));
} else if constexpr (Operation == Op::Min_val) {
benchmark::DoNotOptimize(ranges::min(a));
} else if constexpr (Operation == Op::Max_val) {
benchmark::DoNotOptimize(ranges::max(a));
} else if constexpr (Operation == Op::Both_val) {
benchmark::DoNotOptimize(ranges::minmax(a));
}
}
}

BENCHMARK(bm<uint8_t, 8021, Op::Min>);
BENCHMARK(bm<uint8_t, 8021, Op::Max>);
BENCHMARK(bm<uint8_t, 8021, Op::Both>);
BENCHMARK(bm<uint8_t, 8021, Op::Min_val>);
BENCHMARK(bm<uint8_t, 8021, Op::Max_val>);
BENCHMARK(bm<uint8_t, 8021, Op::Both_val>);

BENCHMARK(bm<uint16_t, 8021, Op::Min>);
BENCHMARK(bm<uint16_t, 8021, Op::Max>);
BENCHMARK(bm<uint16_t, 8021, Op::Both>);
BENCHMARK(bm<uint16_t, 8021, Op::Min_val>);
BENCHMARK(bm<uint16_t, 8021, Op::Max_val>);
BENCHMARK(bm<uint16_t, 8021, Op::Both_val>);

BENCHMARK(bm<uint32_t, 8021, Op::Min>);
BENCHMARK(bm<uint32_t, 8021, Op::Max>);
BENCHMARK(bm<uint32_t, 8021, Op::Both>);
BENCHMARK(bm<uint32_t, 8021, Op::Min_val>);
BENCHMARK(bm<uint32_t, 8021, Op::Max_val>);
BENCHMARK(bm<uint32_t, 8021, Op::Both_val>);

BENCHMARK(bm<uint64_t, 8021, Op::Min>);
BENCHMARK(bm<uint64_t, 8021, Op::Max>);
BENCHMARK(bm<uint64_t, 8021, Op::Both>);
BENCHMARK(bm<uint64_t, 8021, Op::Min_val>);
BENCHMARK(bm<uint64_t, 8021, Op::Max_val>);
BENCHMARK(bm<uint64_t, 8021, Op::Both_val>);

BENCHMARK(bm<int8_t, 8021, Op::Min>);
BENCHMARK(bm<int8_t, 8021, Op::Max>);
BENCHMARK(bm<int8_t, 8021, Op::Both>);
BENCHMARK(bm<int8_t, 8021, Op::Min_val>);
BENCHMARK(bm<int8_t, 8021, Op::Max_val>);
BENCHMARK(bm<int8_t, 8021, Op::Both_val>);

BENCHMARK(bm<int16_t, 8021, Op::Min>);
BENCHMARK(bm<int16_t, 8021, Op::Max>);
BENCHMARK(bm<int16_t, 8021, Op::Both>);
BENCHMARK(bm<int16_t, 8021, Op::Min_val>);
BENCHMARK(bm<int16_t, 8021, Op::Max_val>);
BENCHMARK(bm<int16_t, 8021, Op::Both_val>);

BENCHMARK(bm<int32_t, 8021, Op::Min>);
BENCHMARK(bm<int32_t, 8021, Op::Max>);
BENCHMARK(bm<int32_t, 8021, Op::Both>);
BENCHMARK(bm<int32_t, 8021, Op::Min_val>);
BENCHMARK(bm<int32_t, 8021, Op::Max_val>);
BENCHMARK(bm<int32_t, 8021, Op::Both_val>);

BENCHMARK(bm<int64_t, 8021, Op::Min>);
BENCHMARK(bm<int64_t, 8021, Op::Max>);
BENCHMARK(bm<int64_t, 8021, Op::Both>);
BENCHMARK(bm<int64_t, 8021, Op::Min_val>);
BENCHMARK(bm<int64_t, 8021, Op::Max_val>);
BENCHMARK(bm<int64_t, 8021, Op::Both_val>);

BENCHMARK(bm<float, 8021, Op::Min>);
BENCHMARK(bm<float, 8021, Op::Max>);
BENCHMARK(bm<float, 8021, Op::Both>);
BENCHMARK(bm<float, 8021, Op::Min_val>);
BENCHMARK(bm<float, 8021, Op::Max_val>);
BENCHMARK(bm<float, 8021, Op::Both_val>);

BENCHMARK(bm<double, 8021, Op::Min>);
BENCHMARK(bm<double, 8021, Op::Max>);
BENCHMARK(bm<double, 8021, Op::Both>);
BENCHMARK(bm<double, 8021, Op::Min_val>);
BENCHMARK(bm<double, 8021, Op::Max_val>);
BENCHMARK(bm<double, 8021, Op::Both_val>);


BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions stl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set(HEADERS
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_formatter.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_int128.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_iter_core.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_minmax.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_print.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_sanitizer_annotate_container.hpp
${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_system_error_abi.hpp
Expand Down
86 changes: 86 additions & 0 deletions stl/inc/__msvc_minmax.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// __msvc_minmax.hpp internal header (core)

// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef __MSVC_MINMAX_HPP
#define __MSVC_MINMAX_HPP
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <cstdint>

#pragma pack(push, _CRT_PACKING)
#pragma warning(push, _STL_WARNING_LEVEL)
#pragma warning(disable : _STL_DISABLED_WARNINGS)
_STL_DISABLE_CLANG_WARNINGS
#pragma push_macro("new")
#undef new

extern "C" {
struct _Min_max_element_t {
const void* _Min;
const void* _Max;
};

struct _Min_max_1i {
int8_t _Min;
int8_t _Max;
};

struct _Min_max_1u {
uint8_t _Min;
uint8_t _Max;
};

struct _Min_max_2i {
int16_t _Min;
int16_t _Max;
};

struct _Min_max_2u {
uint16_t _Min;
uint16_t _Max;
};

struct _Min_max_4i {
int32_t _Min;
int32_t _Max;
};

struct _Min_max_4u {
uint32_t _Min;
uint32_t _Max;
};

struct _Min_max_8i {
int64_t _Min;
int64_t _Max;
};

struct _Min_max_8u {
uint64_t _Min;
uint64_t _Max;
};

struct _Min_max_f {
float _Min;
float _Max;
};

struct _Min_max_d {
double _Min;
double _Max;
};

struct _Min_max_p {
void* _Min;
void* _Max;
};
} // extern "C"

#pragma pop_macro("new")
_STL_RESTORE_CLANG_WARNINGS
#pragma warning(pop)
#pragma pack(pop)
#endif // _STL_COMPILER_PREPROCESSOR
#endif // __MSVC_MINMAX_HPP
76 changes: 69 additions & 7 deletions stl/inc/algorithm
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#define _ALGORITHM_
#include <yvals_core.h>
#if _STL_COMPILER_PREPROCESSOR
#include <__msvc_minmax.hpp>
#include <xmemory>

#if _HAS_CXX23
Expand All @@ -29,11 +30,6 @@ _STL_DISABLE_CLANG_WARNINGS
#if _USE_STD_VECTOR_ALGORITHMS

extern "C" {
struct _Min_max_element_t {
const void* _Min;
const void* _Max;
};

// The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms
// won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by
// those pointers. The optimizer also assumes in that case that a pointer parameter is not returned to the caller via
Expand Down Expand Up @@ -61,6 +57,17 @@ const void* __stdcall __std_find_last_trivial_1(const void* _First, const void*
const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept;
const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept;
const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept;

__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_2u __stdcall __std_minmax_2u(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_4i __stdcall __std_minmax_4i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_4u __stdcall __std_minmax_4u(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_8i __stdcall __std_minmax_8i(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_8u __stdcall __std_minmax_8u(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_f __stdcall __std_minmax_f(const void* _First, const void* _Last) noexcept;
__declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* _First, const void* _Last) noexcept;
} // extern "C"

_STD_BEGIN
Expand Down Expand Up @@ -89,6 +96,53 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept {
return {const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Min)), const_cast<_Ty*>(static_cast<const _Ty*>(_Res._Max))};
}

template <class _Ty>
auto __std_minmax(_Ty* _First, _Ty* _Last) noexcept {
constexpr bool _Signed = _STD is_signed_v<_Ty>;

if constexpr (_STD is_pointer_v<_Ty>) {
if constexpr (sizeof(_Ty) == 4) {
auto _Result = ::__std_minmax_4u(_First, _Last);
return _Min_max_p{reinterpret_cast<void*>(_Result._Min), reinterpret_cast<void*>(_Result._Max)};
} else if constexpr (sizeof(_Ty) == 8) {
auto _Result = ::__std_minmax_8u(_First, _Last);
return _Min_max_p{reinterpret_cast<void*>(_Result._Min), reinterpret_cast<void*>(_Result._Max)};
AlexGuteniev marked this conversation as resolved.
Show resolved Hide resolved
} else {
static_assert(_STD _Always_false<_Ty>, "Unexpected size");
}
AlexGuteniev marked this conversation as resolved.
Show resolved Hide resolved
} else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) {
return ::__std_minmax_f(_First, _Last);
} else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) {
return ::__std_minmax_d(_First, _Last);
} else if constexpr (sizeof(_Ty) == 1) {
if constexpr (_Signed) {
return ::__std_minmax_1i(_First, _Last);
} else {
return ::__std_minmax_1u(_First, _Last);
}
} else if constexpr (sizeof(_Ty) == 2) {
if constexpr (_Signed) {
return ::__std_minmax_2i(_First, _Last);
} else {
return ::__std_minmax_2u(_First, _Last);
}
} else if constexpr (sizeof(_Ty) == 4) {
if constexpr (_Signed) {
return ::__std_minmax_4i(_First, _Last);
} else {
return ::__std_minmax_4u(_First, _Last);
}
} else if constexpr (sizeof(_Ty) == 8) {
if constexpr (_Signed) {
return ::__std_minmax_8i(_First, _Last);
} else {
return ::__std_minmax_8u(_First, _Last);
}
} else {
static_assert(_STD _Always_false<_Ty>, "Unexpected size");
}
}

template <class _Ty, class _TVal>
_Ty* __std_find_last_trivial(_Ty* _First, _Ty* _Last, const _TVal _Val) noexcept {
if constexpr (_STD is_pointer_v<_TVal> || _STD is_null_pointer_v<_TVal>) {
Expand Down Expand Up @@ -10070,6 +10124,14 @@ _NODISCARD constexpr pair<const _Ty&, const _Ty&> minmax(const _Ty& _Left _MSVC_
_EXPORT_STD template <class _Ty, class _Pr>
_NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pred) {
// return {leftmost/smallest, rightmost/largest}
#if _USE_STD_VECTOR_ALGORITHMS
if constexpr (_Is_min_max_optimization_safe<const _Ty*, _Pr>) {
if (!_STD _Is_constant_evaluated()) {
const auto _Result = _STD __std_minmax(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()));
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
StephanTLavavej marked this conversation as resolved.
Show resolved Hide resolved
return {static_cast<_Ty>(_Result._Min), static_cast<_Ty>(_Result._Max)};
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
pair<const _Ty*, const _Ty*> _Res =
_STD _Minmax_element_unchecked(_Ilist.begin(), _Ilist.end(), _STD _Pass_fn(_Pred));
return pair<_Ty, _Ty>(*_Res.first, *_Res.second);
Expand Down Expand Up @@ -10197,8 +10259,8 @@ namespace ranges {
if (!_STD is_constant_evaluated()) {
const auto _First_ptr = _STD to_address(_First);
const auto _Last_ptr = _First_ptr + (_Last - _First);
const auto _Result = _STD __std_minmax_element(_First_ptr, _Last_ptr);
return {static_cast<_Vty>(*_Result.first), static_cast<_Vty>(*_Result.second)};
const auto _Result = _STD __std_minmax(_First_ptr, _Last_ptr);
return {static_cast<_Vty>(_Result._Min), static_cast<_Vty>(_Result._Max)};
}
}
#endif // _USE_STD_VECTOR_ALGORITHMS
Expand Down
1 change: 1 addition & 0 deletions stl/inc/header-units.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
"__msvc_formatter.hpp",
"__msvc_int128.hpp",
"__msvc_iter_core.hpp",
"__msvc_minmax.hpp",
"__msvc_print.hpp",
"__msvc_sanitizer_annotate_container.hpp",
"__msvc_system_error_abi.hpp",
Expand Down
Loading