From 2082d8f4fa0aa4dfef2f9097f9c27d611c812e6a Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 17:36:18 +0200 Subject: [PATCH 01/36] initial implementation --- stl/CMakeLists.txt | 1 + stl/inc/__msvc_minmax.hpp | 86 ++++ stl/inc/algorithm | 76 +++- stl/inc/xutility | 135 ++++++ stl/src/vector_algorithms.cpp | 385 +++++++++++++++++- .../include/test_min_max_element_support.hpp | 10 + .../VSO_0000000_vector_algorithms/test.cpp | 5 + 7 files changed, 673 insertions(+), 25 deletions(-) create mode 100644 stl/inc/__msvc_minmax.hpp diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index 207cc6704f..31e1ead9de 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -15,6 +15,7 @@ set(HEADERS ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_formatter.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_int128.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_iter_core.hpp + ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_minmax.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_print.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_sanitizer_annotate_container.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_system_error_abi.hpp diff --git a/stl/inc/__msvc_minmax.hpp b/stl/inc/__msvc_minmax.hpp new file mode 100644 index 0000000000..4602e44724 --- /dev/null +++ b/stl/inc/__msvc_minmax.hpp @@ -0,0 +1,86 @@ +// __msvc_minmax.hpp internal header (core) + +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef __MSVC_MINMAX_HPP +#define __MSVC_MINMAX_HPP +#include +#if _STL_COMPILER_PREPROCESSOR +#include + +#pragma pack(push, _CRT_PACKING) +#pragma warning(push, _STL_WARNING_LEVEL) +#pragma warning(disable : _STL_DISABLED_WARNINGS) +_STL_DISABLE_CLANG_WARNINGS +#pragma push_macro("new") +#undef new + +extern "C" { +struct _Min_max_element_t { + const void* _Min; + const void* _Max; +}; + +struct _Min_max_1i { + int8_t _Min; + int8_t _Max; +}; + +struct _Min_max_1u { + uint8_t _Min; + uint8_t _Max; +}; + +struct _Min_max_2i { + int16_t _Min; + int16_t _Max; +}; + +struct _Min_max_2u { + uint16_t _Min; + uint16_t _Max; +}; + +struct _Min_max_4i { + int32_t _Min; + int32_t _Max; +}; + +struct _Min_max_4u { + uint32_t _Min; + uint32_t _Max; +}; + +struct _Min_max_8i { + int64_t _Min; + int64_t _Max; +}; + +struct _Min_max_8u { + uint64_t _Min; + uint64_t _Max; +}; + +struct _Min_max_f { + float _Min; + float _Max; +}; + +struct _Min_max_d { + double _Min; + double _Max; +}; + +struct _Min_max_p { + void* _Min; + void* _Max; +}; +} // extern "C" + +#pragma pop_macro("new") +_STL_RESTORE_CLANG_WARNINGS +#pragma warning(pop) +#pragma pack(pop) +#endif // _STL_COMPILER_PREPROCESSOR +#endif // __MSVC_MINMAX_HPP diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 5140b26406..a32828166d 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -7,6 +7,7 @@ #define _ALGORITHM_ #include #if _STL_COMPILER_PREPROCESSOR +#include <__msvc_minmax.hpp> #include #if _HAS_CXX23 @@ -29,11 +30,6 @@ _STL_DISABLE_CLANG_WARNINGS #if _USE_STD_VECTOR_ALGORITHMS extern "C" { -struct _Min_max_element_t { - const void* _Min; - const void* _Max; -}; - // The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms // won't be stored beyond the lifetime of the function, and that the function will only reference arrays denoted by // those pointers. The optimizer also assumes in that case that a pointer parameter is not returned to the caller via @@ -61,6 +57,17 @@ const void* __stdcall __std_find_last_trivial_1(const void* _First, const void* const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* _Last, uint16_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; + +__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_2u __stdcall __std_minmax_2u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_4i __stdcall __std_minmax_4i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_4u __stdcall __std_minmax_4u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_8i __stdcall __std_minmax_8i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_8u __stdcall __std_minmax_8u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_f __stdcall __std_minmax_f(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* const _First, const void* const _Last) noexcept; } // extern "C" _STD_BEGIN @@ -89,6 +96,53 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept { return {const_cast<_Ty*>(static_cast(_Res._Min)), const_cast<_Ty*>(static_cast(_Res._Max))}; } +template +auto __std_minmax(_Ty* _First, _Ty* _Last) noexcept { + constexpr bool _Signed = _STD is_signed_v<_Ty>; + + if constexpr (_STD is_pointer_v<_Ty>) { + if constexpr (sizeof(_Ty) == 4) { + auto _Result = ::__std_minmax_4u(_First, _Last); + return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; + } else if constexpr (sizeof(_Ty) == 8) { + auto _Result = ::__std_minmax_8u(_First, _Last); + return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { + return ::__std_minmax_f(_First, _Last); + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { + return ::__std_minmax_d(_First, _Last); + } else if constexpr (sizeof(_Ty) == 1) { + if constexpr (_Signed) { + return ::__std_minmax_1i(_First, _Last); + } else { + return ::__std_minmax_1u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 2) { + if constexpr (_Signed) { + return ::__std_minmax_2i(_First, _Last); + } else { + return ::__std_minmax_2u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 4) { + if constexpr (_Signed) { + return ::__std_minmax_4i(_First, _Last); + } else { + return ::__std_minmax_4u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 8) { + if constexpr (_Signed) { + return ::__std_minmax_8i(_First, _Last); + } else { + return ::__std_minmax_8u(_First, _Last); + } + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } +} + template _Ty* __std_find_last_trivial(_Ty* _First, _Ty* _Last, const _TVal _Val) noexcept { if constexpr (_STD is_pointer_v<_TVal> || _STD is_null_pointer_v<_TVal>) { @@ -10070,6 +10124,14 @@ _NODISCARD constexpr pair minmax(const _Ty& _Left _MSVC_ _EXPORT_STD template _NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return {leftmost/smallest, rightmost/largest} +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_min_max_optimization_safe) { + if (!_STD _Is_constant_evaluated()) { + const auto _Result = _STD __std_minmax(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end())); + return {static_cast<_Ty>(_Result._Min), static_cast<_Ty>(_Result._Max)}; + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS pair _Res = _STD _Minmax_element_unchecked(_Ilist.begin(), _Ilist.end(), _STD _Pass_fn(_Pred)); return pair<_Ty, _Ty>(*_Res.first, *_Res.second); @@ -10197,8 +10259,8 @@ namespace ranges { if (!_STD is_constant_evaluated()) { const auto _First_ptr = _STD to_address(_First); const auto _Last_ptr = _First_ptr + (_Last - _First); - const auto _Result = _STD __std_minmax_element(_First_ptr, _Last_ptr); - return {static_cast<_Vty>(*_Result.first), static_cast<_Vty>(*_Result.second)}; + const auto _Result = _STD __std_minmax(_First_ptr, _Last_ptr); + return {static_cast<_Vty>(_Result._Min), static_cast<_Vty>(_Result._Max)}; } } #endif // _USE_STD_VECTOR_ALGORITHMS diff --git a/stl/inc/xutility b/stl/inc/xutility index f2634616e0..b436b2a1a1 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -108,6 +108,27 @@ const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; + +__declspec(noalias) int8_t __stdcall __std_min_1i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint8_t __stdcall __std_min_1u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int16_t __stdcall __std_min_2i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint16_t __stdcall __std_min_2u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int32_t __stdcall __std_min_4i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint32_t __stdcall __std_min_4u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int64_t __stdcall __std_min_8i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint64_t __stdcall __std_min_8u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) float __stdcall __std_min_f(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) double __stdcall __std_min_d(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int8_t __stdcall __std_max_1i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint8_t __stdcall __std_max_1u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int16_t __stdcall __std_max_2i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint16_t __stdcall __std_max_2u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int32_t __stdcall __std_max_4i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint32_t __stdcall __std_max_4u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int64_t __stdcall __std_max_8i(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) uint64_t __stdcall __std_max_8u(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) float __stdcall __std_max_f(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) double __stdcall __std_max_d(const void* const _First, const void* const _Last) noexcept; } // extern "C" _STD_BEGIN @@ -211,6 +232,96 @@ _Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept { static_assert(_STD _Always_false<_Ty>, "Unexpected size"); } } + +template +auto __std_min(_Ty* _First, _Ty* _Last) noexcept { + constexpr bool _Signed = _STD is_signed_v<_Ty>; + + if constexpr (_STD is_pointer_v<_Ty>) { + if constexpr (sizeof(_Ty) == 4) { + return reinterpret_cast(::__std_min_4u(_First, _Last)); + } else if constexpr (sizeof(_Ty) == 8) { + return reinterpret_cast(::__std_min_8u(_First, _Last)); + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { + return ::__std_min_f(_First, _Last); + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { + return ::__std_min_d(_First, _Last); + } else if constexpr (sizeof(_Ty) == 1) { + if constexpr (_Signed) { + return ::__std_min_1i(_First, _Last); + } else { + return ::__std_min_1u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 2) { + if constexpr (_Signed) { + return ::__std_min_2i(_First, _Last); + } else { + return ::__std_min_2u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 4) { + if constexpr (_Signed) { + return ::__std_min_4i(_First, _Last); + } else { + return ::__std_min_4u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 8) { + if constexpr (_Signed) { + return ::__std_min_8i(_First, _Last); + } else { + return ::__std_min_8u(_First, _Last); + } + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } +} + +template +auto __std_max(_Ty* _First, _Ty* _Last) noexcept { + constexpr bool _Signed = _STD is_signed_v<_Ty>; + + if constexpr (_STD is_pointer_v<_Ty>) { + if constexpr (sizeof(_Ty) == 4) { + return reinterpret_cast(::__std_max_4u(_First, _Last)); + } else if constexpr (sizeof(_Ty) == 8) { + return reinterpret_cast(::__std_max_8u(_First, _Last)); + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { + return ::__std_max_f(_First, _Last); + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { + return ::__std_max_d(_First, _Last); + } else if constexpr (sizeof(_Ty) == 1) { + if constexpr (_Signed) { + return ::__std_max_1i(_First, _Last); + } else { + return ::__std_max_1u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 2) { + if constexpr (_Signed) { + return ::__std_max_2i(_First, _Last); + } else { + return ::__std_max_2u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 4) { + if constexpr (_Signed) { + return ::__std_max_4i(_First, _Last); + } else { + return ::__std_max_4u(_First, _Last); + } + } else if constexpr (sizeof(_Ty) == 8) { + if constexpr (_Signed) { + return ::__std_max_8i(_First, _Last); + } else { + return ::__std_max_8u(_First, _Last); + } + } else { + static_assert(_STD _Always_false<_Ty>, "Unexpected size"); + } +} _STD_END #endif // _USE_STD_VECTOR_ALGORITHMS @@ -6971,6 +7082,13 @@ namespace ranges { _EXPORT_STD template _NODISCARD constexpr _Ty(min)(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return leftmost/smallest +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_min_max_optimization_safe) { + if (!_Is_constant_evaluated()) { + return static_cast<_Ty>(_STD __std_min(_To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS const _Ty* _Res = _STD _Min_element_unchecked(_Ilist.begin(), _Ilist.end(), _STD _Pass_fn(_Pred)); return *_Res; } @@ -7004,6 +7122,13 @@ namespace ranges { const auto _Last = _Range.end(); _STL_ASSERT(_First != _Last, "An initializer_list passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_min_max_optimization_safe) { + if (!_Is_constant_evaluated()) { + return static_cast<_Ty>(_STD __std_min(_STD _To_address(_First), _STD _To_address(_Last))); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS return *_RANGES _Min_element_unchecked(_First, _Last, _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj)); } @@ -7017,6 +7142,16 @@ namespace ranges { _STL_ASSERT( _UFirst != _ULast, "A range passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); if constexpr (forward_range<_Rng> && _Prefer_iterator_copies>) { +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe + && sized_sentinel_for) { + if (!_STD is_constant_evaluated()) { + const auto _First_ptr = _STD to_address(_UFirst); + const auto _Last_ptr = _First_ptr + (_ULast - _UFirst); + return static_cast>(_STD __std_min(_First_ptr, _Last_ptr)); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS return static_cast>(*_RANGES _Min_element_unchecked( _STD move(_UFirst), _STD move(_ULast), _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj))); } else { diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 66f8895758..be63f41322 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -6,8 +6,10 @@ #endif #if defined(_M_IX86) || defined(_M_X64) // NB: includes _M_ARM64EC +#include <__msvc_minmax.hpp> #include #include +#include #ifndef _M_ARM64EC #include #include @@ -89,12 +91,6 @@ namespace { } // unnamed namespace extern "C" { -// Must be in sync with _Min_max_element_t in -struct _Min_max_element_t { - const void* _Min; - const void* _Max; -}; - __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias( void* _First1, void* _Last1, void* _First2) noexcept { #ifndef _M_ARM64EC @@ -538,6 +534,46 @@ namespace { return _Res; } + template + _Ty _Min_tail_v(const void* const _First, const void* const _Last, _Ty _Cur) noexcept { + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (*_Ptr < _Cur) { + _Cur = *_Ptr; + } + } + + return _Cur; + } + + template + _Ty _Max_tail_v(const void* const _First, const void* const _Last, _Ty _Cur) noexcept { + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (_Cur < *_Ptr) { + _Cur = *_Ptr; + } + } + + return _Cur; + } + + template + _Rx _Both_tail_v( + const void* const _First, const void* const _Last, _Ty _Cur_min, _Ty _Cur_max) noexcept { + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (*_Ptr < _Cur_min) { + _Cur_min = *_Ptr; + } + // Not else! + // * Needed for correctness if start with maximum, as we don't handle specially the first element. + // * Promote branchless code generation. + if (_Cur_max <= *_Ptr) { + _Cur_max = *_Ptr; + } + } + + return {_Cur_min, _Cur_max}; + } + enum _Min_max_mode { _Mode_min = 1 << 0, _Mode_max = 1 << 1, @@ -553,6 +589,9 @@ namespace { static constexpr _Signed_t _Init_min_val = static_cast<_Signed_t>(0x7F); static constexpr _Signed_t _Init_max_val = static_cast<_Signed_t>(0x80); + using _Minmax_i_t = _Min_max_1i; + using _Minmax_u_t = _Min_max_1u; + #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 256; @@ -620,17 +659,29 @@ namespace { return _mm_cmpeq_epi8(_First, _Second); } - static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_min_epi8(_First, _Second); } - static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_max_epi8(_First, _Second); } + static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_min_epu8(_First, _Second); + } + + static __m128i _Max_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_max_epu8(_First, _Second); + } + static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse2(); + } #endif // !_M_ARM64EC }; @@ -643,6 +694,9 @@ namespace { static constexpr _Signed_t _Init_min_val = static_cast<_Signed_t>(0x7FFF); static constexpr _Signed_t _Init_max_val = static_cast<_Signed_t>(0x8000); + using _Minmax_i_t = _Min_max_2i; + using _Minmax_u_t = _Min_max_2u; + #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 65536; @@ -711,17 +765,29 @@ namespace { return _mm_cmpeq_epi16(_First, _Second); } - static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_min_epi16(_First, _Second); } - static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_max_epi16(_First, _Second); } + static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_min_epu16(_First, _Second); + } + + static __m128i _Max_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_max_epu16(_First, _Second); + } + static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse2(); + } #endif // !_M_ARM64EC }; @@ -731,6 +797,9 @@ namespace { using _Signed_t = int32_t; using _Unsigned_t = uint32_t; + using _Minmax_i_t = _Min_max_4i; + using _Minmax_u_t = _Min_max_4u; + static constexpr _Signed_t _Init_min_val = static_cast<_Signed_t>(0x7FFF'FFFFUL); static constexpr _Signed_t _Init_max_val = static_cast<_Signed_t>(0x8000'0000UL); @@ -802,17 +871,29 @@ namespace { return _mm_cmpeq_epi32(_First, _Second); } - static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_min_epi32(_First, _Second); } - static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { + static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { return _mm_max_epi32(_First, _Second); } + static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_min_epu32(_First, _Second); + } + + static __m128i _Max_u(const __m128i _First, const __m128i _Second) noexcept { + return _mm_max_epu32(_First, _Second); + } + static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse2(); + } #endif // !_M_ARM64EC }; @@ -825,6 +906,9 @@ namespace { static constexpr _Signed_t _Init_min_val = static_cast<_Signed_t>(0x7FFF'FFFF'FFFF'FFFFULL); static constexpr _Signed_t _Init_max_val = static_cast<_Signed_t>(0x8000'0000'0000'0000ULL); + using _Minmax_i_t = _Min_max_8i; + using _Minmax_u_t = _Min_max_8u; + #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; @@ -904,20 +988,36 @@ namespace { return _mm_blendv_epi8(_First, _Second, _Mask); } + static __m128i _Min(const __m128i _First, const __m128i _Second) noexcept { + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_First, _Second)); + } + + static __m128i _Max(const __m128i _First, const __m128i _Second) noexcept { + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_Second, _First)); + } + static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse42(); + } #endif // !_M_ARM64EC }; struct _Minmax_traits_f { static constexpr bool _Is_floating = true; - using _Signed_t = float; + using _Signed_t = float; + using _Unsigned_t = void; static constexpr _Signed_t _Init_min_val = __builtin_huge_valf(); static constexpr _Signed_t _Init_max_val = -__builtin_huge_valf(); + using _Minmax_i_t = _Min_max_f; + using _Minmax_u_t = void; + #ifndef _M_ARM64EC #ifdef _M_IX86 static constexpr bool _Has_portion_max = false; @@ -992,28 +1092,36 @@ namespace { return _mm_cmpeq_epi32(_First, _Second); } - static __m128 _Min(const __m128 _First, const __m128 _Second, __m128) noexcept { + static __m128 _Min(const __m128 _First, const __m128 _Second, __m128 = _mm_undefined_ps()) noexcept { return _mm_min_ps(_First, _Second); } - static __m128 _Max(const __m128 _First, const __m128 _Second, __m128) noexcept { + static __m128 _Max(const __m128 _First, const __m128 _Second, __m128 = _mm_undefined_ps()) noexcept { return _mm_max_ps(_First, _Second); } static __m128i _Mask_cast(__m128 _Mask) noexcept { return _mm_castps_si128(_Mask); } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse2(); + } #endif // !_M_ARM64EC }; struct _Minmax_traits_d { static constexpr bool _Is_floating = true; - using _Signed_t = double; + using _Signed_t = double; + using _Unsigned_t = void; static constexpr _Signed_t _Init_min_val = __builtin_huge_val(); static constexpr _Signed_t _Init_max_val = -__builtin_huge_val(); + using _Minmax_i_t = _Min_max_d; + using _Minmax_u_t = void; + #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; @@ -1092,17 +1200,21 @@ namespace { return _mm_cmpeq_epi64(_First, _Second); } - static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d) noexcept { + static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d = _mm_undefined_pd()) noexcept { return _mm_min_pd(_First, _Second); } - static __m128d _Max(const __m128d _First, const __m128d _Second, __m128d) noexcept { + static __m128d _Max(const __m128d _First, const __m128d _Second, __m128d = _mm_undefined_pd()) noexcept { return _mm_max_pd(_First, _Second); } static __m128i _Mask_cast(__m128d _Mask) noexcept { return _mm_castpd_si128(_Mask); } + + static bool _Sse_plain_min_max_available() noexcept { + return _Use_sse2(); + } #endif // !_M_ARM64EC }; @@ -1327,6 +1439,122 @@ namespace { } } + // _Minmax has exactly the same signature as the extern "C" functions + // (__std_min_element_N, __std_max_element_N, __std_minmax_element_N), up to calling convention. + // This makes sure the template specialization is fused with the extern "C" function. + // In optimized builds it avoids an extra call, as this function is too large to inline. + template <_Min_max_mode _Mode, class _Traits, const bool _Sign> + auto __stdcall _Minmax(const void* _First, const void* const _Last) noexcept { + using _Ty = std::conditional_t<_Sign, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>; + + _Ty _Cur_min_val; // initialized in both of branches below + _Ty _Cur_max_val; // initialized in both of branches below + + // We don't have unsigned 64-bit stuff, so will use sign correction just for that case + constexpr bool _Sign_correction = sizeof(_Ty) == 8 && !_Sign; + +#ifndef _M_ARM64EC + if (_Byte_length(_First, _Last) >= 16 && _Traits::_Sse_plain_min_max_available()) { + size_t _Portion_byte_size = _Byte_length(_First, _Last) & ~size_t{0xF}; + + const void* _Stop_at = _First; + _Advance_bytes(_Stop_at, _Portion_byte_size); + + auto _Cur_vals = _Traits::_Load(_First); + + if constexpr (_Sign_correction) { + _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), false); + } + + auto _Cur_vals_min = _Cur_vals; // vector of vertical minimum values + auto _Cur_vals_max = _Cur_vals; // vector of vertical maximum values + + for (;;) { + _Advance_bytes(_First, 16); + + if (_First == _Stop_at) { + // Reached end or indices wrap around point. + // Compute horizontal min and/or max. Determine horizontal and vertical position of it. + + if constexpr ((_Mode & _Mode_min) != 0) { + if constexpr (_Sign || _Sign_correction) { + const auto _H_min = + _Traits::_H_min(_Cur_vals_min); // Vector populated by the smallest element + _Cur_min_val = _Traits::_Get_any(_H_min); // Get any element of it + } else { + const auto _H_min = + _Traits::_H_min_u(_Cur_vals_min); // Vector populated by the smallest element + _Cur_min_val = _Traits::_Get_any(_H_min); // Get any element of it + } + } + + if constexpr ((_Mode & _Mode_max) != 0) { + if constexpr (_Sign || _Sign_correction) { + const auto _H_max = + _Traits::_H_max(_Cur_vals_max); // Vector populated by the largest element + _Cur_max_val = _Traits::_Get_any(_H_max); // Get any element of it + } else { + const auto _H_max = + _Traits::_H_max_u(_Cur_vals_max); // Vector populated by the largest element + _Cur_max_val = _Traits::_Get_any(_H_max); // Get any element of it + } + } + + if constexpr (_Sign_correction) { + constexpr _Ty _Correction = _Ty{1} << (sizeof(_Ty) * 8 - 1); + + if constexpr ((_Mode & _Mode_min) != 0) { + _Cur_min_val += _Correction; + } + + if constexpr (_Mode & _Mode_max) { + _Cur_max_val += _Correction; + } + } + + break; + } + // This is the main part, finding vertical minimum/maximum + + _Cur_vals = _Traits::_Load(_First); + + if constexpr (_Sign_correction) { + _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), false); + } + + if constexpr ((_Mode & _Mode_min) != 0) { + if constexpr (_Sign || _Sign_correction) { + _Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals); // Update the current minimum + } else { + _Cur_vals_min = _Traits::_Min_u(_Cur_vals_min, _Cur_vals); // Update the current minimum + } + } + + if constexpr (_Mode & _Mode_max) { + if constexpr (_Sign || _Sign_correction) { + _Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals); // Update the current maximum + } else { + _Cur_vals_max = _Traits::_Max_u(_Cur_vals_max, _Cur_vals); // Update the current maximum + } + } + } + } else { + _Cur_min_val = *reinterpret_cast(_First); + _Cur_max_val = *reinterpret_cast(_First); + } + +#endif // !_M_ARM64EC + if constexpr (_Mode == _Mode_min) { + return _Min_tail_v(_First, _Last, static_cast<_Ty>(_Cur_min_val)); + } else if constexpr (_Mode == _Mode_max) { + return _Max_tail_v(_First, _Last, static_cast<_Ty>(_Cur_max_val)); + } else { + using _Rx = std::conditional_t<_Sign, typename _Traits::_Minmax_i_t, typename _Traits::_Minmax_u_t>; + + return _Both_tail_v<_Rx>(_First, _Last, static_cast<_Ty>(_Cur_min_val), static_cast<_Ty>(_Cur_max_val)); + } + } + } // unnamed namespace extern "C" { @@ -1420,6 +1648,127 @@ _Min_max_element_t __stdcall __std_minmax_element_d( const void* const _First, const void* const _Last, const bool _Unused) noexcept { return _Minmax_element<_Mode_both, _Minmax_traits_d>(_First, _Last, _Unused); } + +__declspec(noalias) int8_t __stdcall __std_min_1i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_1, true>(_First, _Last); +} + +__declspec(noalias) uint8_t __stdcall __std_min_1u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_1, false>(_First, _Last); +} + +__declspec(noalias) int16_t __stdcall __std_min_2i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_2, true>(_First, _Last); +} + +__declspec(noalias) uint16_t __stdcall __std_min_2u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_2, false>(_First, _Last); +} + +__declspec(noalias) int32_t __stdcall __std_min_4i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_4, true>(_First, _Last); +} + +__declspec(noalias) uint32_t __stdcall __std_min_4u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_4, false>(_First, _Last); +} + +__declspec(noalias) int64_t __stdcall __std_min_8i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_8, true>(_First, _Last); +} + +__declspec(noalias) uint64_t __stdcall __std_min_8u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_8, false>(_First, _Last); +} + +__declspec(noalias) float __stdcall __std_min_f(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_f, true>(_First, _Last); +} + +__declspec(noalias) double __stdcall __std_min_d(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_min, _Minmax_traits_d, true>(_First, _Last); +} + +__declspec(noalias) int8_t __stdcall __std_max_1i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_1, true>(_First, _Last); +} + +__declspec(noalias) uint8_t __stdcall __std_max_1u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_1, false>(_First, _Last); +} + +__declspec(noalias) int16_t __stdcall __std_max_2i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_2, true>(_First, _Last); +} + +__declspec(noalias) uint16_t __stdcall __std_max_2u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_2, false>(_First, _Last); +} + +__declspec(noalias) int32_t __stdcall __std_max_4i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_4, true>(_First, _Last); +} + +__declspec(noalias) uint32_t __stdcall __std_max_4u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_4, false>(_First, _Last); +} + +__declspec(noalias) int64_t __stdcall __std_max_8i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_8, true>(_First, _Last); +} + +__declspec(noalias) uint64_t __stdcall __std_max_8u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_8, false>(_First, _Last); +} + +__declspec(noalias) float __stdcall __std_max_f(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_f, true>(_First, _Last); +} + +__declspec(noalias) double __stdcall __std_max_d(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_max, _Minmax_traits_d, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_1, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_1, false>(_First, _Last); +} + +__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_2, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_2u __stdcall __std_minmax_2u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_2, false>(_First, _Last); +} + +__declspec(noalias) _Min_max_4i __stdcall __std_minmax_4i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_4, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_4u __stdcall __std_minmax_4u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_4, false>(_First, _Last); +} + +__declspec(noalias) _Min_max_8i __stdcall __std_minmax_8i(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_8, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_8u __stdcall __std_minmax_8u(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_8, false>(_First, _Last); +} + +__declspec(noalias) _Min_max_f __stdcall __std_minmax_f(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_f, true>(_First, _Last); +} + +__declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* const _First, const void* const _Last) noexcept { + return _Minmax<_Mode_both, _Minmax_traits_d, true>(_First, _Last); +} + } // extern "C" namespace { diff --git a/tests/std/include/test_min_max_element_support.hpp b/tests/std/include/test_min_max_element_support.hpp index 9ade4d6b89..03a5922b93 100644 --- a/tests/std/include/test_min_max_element_support.hpp +++ b/tests/std/include/test_min_max_element_support.hpp @@ -107,5 +107,15 @@ void test_case_min_max_element(const std::vector& input) { assert(expected_max == actual_max_sized_range); assert(expected_minmax.first == actual_minmax_sized_range.min); assert(expected_minmax.second == actual_minmax_sized_range.max); + + if (input.begin() != input.end()) { + auto actual_min_value = std::ranges::min(input); + auto actual_max_value = std::ranges::max(input); + auto actual_minmax_value = std::ranges::minmax(input); + assert(*expected_min == actual_min_value); + assert(*expected_max == actual_max_value); + assert(*expected_minmax.first == actual_minmax_value.min); + assert(*expected_minmax.second == actual_minmax_value.max); + } #endif // _HAS_CXX20 } diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index cafb850318..efd5ea08e8 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -572,6 +572,11 @@ constexpr bool test_constexpr() { assert(ranges::minmax_element(a).min == begin(a) + 1); assert(ranges::minmax_element(a).max == end(a) - 2); + assert(ranges::min(a) == 10); + assert(ranges::max(a) == 60); + assert(ranges::minmax(a).min == 10); + assert(ranges::minmax(a).max == 60); + int b[size(a)]; reverse_copy(begin(a), end(a), begin(b)); assert(equal(rbegin(a), rend(a), begin(b), end(b))); From e38454c0123ac7b84a703b07cd49664710bfd673 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 18:00:49 +0200 Subject: [PATCH 02/36] benchmark --- benchmarks/src/minmax_element.cpp | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index da6cf9aff4..b0143d3a37 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -13,6 +13,9 @@ enum class Op { Min, Max, Both, + Min_el, + Max_el, + Both_el, }; using namespace std; @@ -38,6 +41,12 @@ void bm(benchmark::State& state) { benchmark::DoNotOptimize(ranges::max_element(a)); } else if constexpr (Operation == Op::Both) { benchmark::DoNotOptimize(ranges::minmax_element(a)); + } else if constexpr (Operation == Op::Min_el) { + benchmark::DoNotOptimize(ranges::min(a)); + } else if constexpr (Operation == Op::Max_el) { + benchmark::DoNotOptimize(ranges::max(a)); + } else if constexpr (Operation == Op::Both_el) { + benchmark::DoNotOptimize(ranges::minmax(a)); } } } @@ -45,42 +54,72 @@ void bm(benchmark::State& state) { BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK_MAIN(); From 108ed217aba15a01d1ae6e8868a7ea824cc69a7e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 19:10:01 +0200 Subject: [PATCH 03/36] move vectorization out --- stl/inc/xutility | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index b436b2a1a1..38e5079252 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -7141,17 +7141,17 @@ namespace ranges { auto _ULast = _Uend(_Range); _STL_ASSERT( _UFirst != _ULast, "A range passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); - if constexpr (forward_range<_Rng> && _Prefer_iterator_copies>) { #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe - && sized_sentinel_for) { - if (!_STD is_constant_evaluated()) { - const auto _First_ptr = _STD to_address(_UFirst); - const auto _Last_ptr = _First_ptr + (_ULast - _UFirst); - return static_cast>(_STD __std_min(_First_ptr, _Last_ptr)); - } + if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe + && sized_sentinel_for) { + if (!_STD is_constant_evaluated()) { + const auto _First_ptr = _STD to_address(_UFirst); + const auto _Last_ptr = _First_ptr + (_ULast - _UFirst); + return static_cast>(_STD __std_min(_First_ptr, _Last_ptr)); } + } #endif // _USE_STD_VECTOR_ALGORITHMS + if constexpr (forward_range<_Rng> && _Prefer_iterator_copies>) { return static_cast>(*_RANGES _Min_element_unchecked( _STD move(_UFirst), _STD move(_ULast), _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj))); } else { From 838da2ada426b7c1f287088955f665b725c2c94b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 19:36:28 +0200 Subject: [PATCH 04/36] missing max --- stl/inc/xutility | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/stl/inc/xutility b/stl/inc/xutility index 38e5079252..04ec21ec2f 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6884,6 +6884,13 @@ namespace ranges { _EXPORT_STD template _NODISCARD constexpr _Ty(max)(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return leftmost/largest +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_min_max_optimization_safe) { + if (!_Is_constant_evaluated()) { + return static_cast<_Ty>(_STD __std_max(_To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS const _Ty* _Res = _STD _Max_element_unchecked(_Ilist.begin(), _Ilist.end(), _STD _Pass_fn(_Pred)); return *_Res; } @@ -6923,6 +6930,13 @@ namespace ranges { const auto _Last = _Range.end(); _STL_ASSERT(_First != _Last, "An initializer_list passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Is_min_max_optimization_safe) { + if (!_Is_constant_evaluated()) { + return static_cast<_Ty>(_STD __std_max(_STD _To_address(_First), _STD _To_address(_Last))); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS return *_RANGES _Max_element_unchecked(_First, _Last, _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj)); } @@ -6935,6 +6949,16 @@ namespace ranges { auto _ULast = _Uend(_Range); _STL_ASSERT( _UFirst != _ULast, "A range passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe + && sized_sentinel_for) { + if (!_STD is_constant_evaluated()) { + const auto _First_ptr = _STD to_address(_UFirst); + const auto _Last_ptr = _First_ptr + (_ULast - _UFirst); + return static_cast>(_STD __std_max(_First_ptr, _Last_ptr)); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS if constexpr (forward_range<_Rng> && _Prefer_iterator_copies>) { return static_cast>(*_RANGES _Max_element_unchecked( _STD move(_UFirst), _STD move(_ULast), _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj))); From 1b61bb045af73c2244f9ab18a67c81ef0b37d964 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 19:55:56 +0200 Subject: [PATCH 05/36] bemchmark copypaste error --- benchmarks/src/minmax_element.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index b0143d3a37..faafd6c09e 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -96,9 +96,9 @@ BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); From 15cd2d4860518189a1c79faa99c405be0fc4b1c1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 20:53:45 +0200 Subject: [PATCH 06/36] _val is less confusing --- benchmarks/src/minmax_element.cpp | 78 +++++++++++++++---------------- 1 file changed, 39 insertions(+), 39 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index faafd6c09e..a5f90c3722 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -13,9 +13,9 @@ enum class Op { Min, Max, Both, - Min_el, - Max_el, - Both_el, + Min_val, + Max_val, + Both_val, }; using namespace std; @@ -36,16 +36,16 @@ void bm(benchmark::State& state) { for (auto _ : state) { if constexpr (Operation == Op::Min) { - benchmark::DoNotOptimize(ranges::min_element(a)); + benchmark::DoNotOptimize(ranges::min_valement(a)); } else if constexpr (Operation == Op::Max) { - benchmark::DoNotOptimize(ranges::max_element(a)); + benchmark::DoNotOptimize(ranges::max_valement(a)); } else if constexpr (Operation == Op::Both) { - benchmark::DoNotOptimize(ranges::minmax_element(a)); - } else if constexpr (Operation == Op::Min_el) { + benchmark::DoNotOptimize(ranges::minmax_valement(a)); + } else if constexpr (Operation == Op::Min_val) { benchmark::DoNotOptimize(ranges::min(a)); - } else if constexpr (Operation == Op::Max_el) { + } else if constexpr (Operation == Op::Max_val) { benchmark::DoNotOptimize(ranges::max(a)); - } else if constexpr (Operation == Op::Both_el) { + } else if constexpr (Operation == Op::Both_val) { benchmark::DoNotOptimize(ranges::minmax(a)); } } @@ -54,72 +54,72 @@ void bm(benchmark::State& state) { BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); -BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK_MAIN(); From 6ea297af3b9aa597a5f45638d8aa4de864846b02 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:00:35 +0200 Subject: [PATCH 07/36] valement --- benchmarks/src/minmax_element.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index a5f90c3722..812b8c318b 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -36,11 +36,11 @@ void bm(benchmark::State& state) { for (auto _ : state) { if constexpr (Operation == Op::Min) { - benchmark::DoNotOptimize(ranges::min_valement(a)); + benchmark::DoNotOptimize(ranges::min_element(a)); } else if constexpr (Operation == Op::Max) { - benchmark::DoNotOptimize(ranges::max_valement(a)); + benchmark::DoNotOptimize(ranges::max_element(a)); } else if constexpr (Operation == Op::Both) { - benchmark::DoNotOptimize(ranges::minmax_valement(a)); + benchmark::DoNotOptimize(ranges::minmax_element(a)); } else if constexpr (Operation == Op::Min_val) { benchmark::DoNotOptimize(ranges::min(a)); } else if constexpr (Operation == Op::Max_val) { From ddedb4a5c80b889e10ac96c7630ecc1513ff05b4 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:06:25 +0200 Subject: [PATCH 08/36] format --- stl/src/vector_algorithms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index be63f41322..375b6acf14 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -557,8 +557,7 @@ namespace { } template - _Rx _Both_tail_v( - const void* const _First, const void* const _Last, _Ty _Cur_min, _Ty _Cur_max) noexcept { + _Rx _Both_tail_v(const void* const _First, const void* const _Last, _Ty _Cur_min, _Ty _Cur_max) noexcept { for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { if (*_Ptr < _Cur_min) { _Cur_min = *_Ptr; From 09469ab894cd02be66c9d9caf5d15faab1d18096 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:07:06 +0200 Subject: [PATCH 09/36] format --- tests/std/include/test_min_max_element_support.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/include/test_min_max_element_support.hpp b/tests/std/include/test_min_max_element_support.hpp index 03a5922b93..1e89dc7ddb 100644 --- a/tests/std/include/test_min_max_element_support.hpp +++ b/tests/std/include/test_min_max_element_support.hpp @@ -111,7 +111,7 @@ void test_case_min_max_element(const std::vector& input) { if (input.begin() != input.end()) { auto actual_min_value = std::ranges::min(input); auto actual_max_value = std::ranges::max(input); - auto actual_minmax_value = std::ranges::minmax(input); + auto actual_minmax_value = std::ranges::minmax(input); assert(*expected_min == actual_min_value); assert(*expected_max == actual_max_value); assert(*expected_minmax.first == actual_minmax_value.min); From 461ca2beab1a871d5c39803638c99936f428e708 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:10:30 +0200 Subject: [PATCH 10/36] no top level const in declaration --- stl/inc/algorithm | 20 ++++++++++---------- stl/inc/xutility | 40 ++++++++++++++++++++-------------------- 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index a32828166d..a9c9fc7672 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -58,16 +58,16 @@ const void* __stdcall __std_find_last_trivial_2(const void* _First, const void* const void* __stdcall __std_find_last_trivial_4(const void* _First, const void* _Last, uint32_t _Val) noexcept; const void* __stdcall __std_find_last_trivial_8(const void* _First, const void* _Last, uint64_t _Val) noexcept; -__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_2u __stdcall __std_minmax_2u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_4i __stdcall __std_minmax_4i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_4u __stdcall __std_minmax_4u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_8i __stdcall __std_minmax_8i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_8u __stdcall __std_minmax_8u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_f __stdcall __std_minmax_f(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) _Min_max_1i __stdcall __std_minmax_1i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_1u __stdcall __std_minmax_1u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_2i __stdcall __std_minmax_2i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_2u __stdcall __std_minmax_2u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_4i __stdcall __std_minmax_4i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_4u __stdcall __std_minmax_4u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_8i __stdcall __std_minmax_8i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_8u __stdcall __std_minmax_8u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_f __stdcall __std_minmax_f(const void* _First, const void* _Last) noexcept; +__declspec(noalias) _Min_max_d __stdcall __std_minmax_d(const void* _First, const void* _Last) noexcept; } // extern "C" _STD_BEGIN diff --git a/stl/inc/xutility b/stl/inc/xutility index 04ec21ec2f..ff3ca2bda3 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -109,26 +109,26 @@ const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; -__declspec(noalias) int8_t __stdcall __std_min_1i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint8_t __stdcall __std_min_1u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int16_t __stdcall __std_min_2i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint16_t __stdcall __std_min_2u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int32_t __stdcall __std_min_4i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint32_t __stdcall __std_min_4u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int64_t __stdcall __std_min_8i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint64_t __stdcall __std_min_8u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) float __stdcall __std_min_f(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) double __stdcall __std_min_d(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int8_t __stdcall __std_max_1i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint8_t __stdcall __std_max_1u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int16_t __stdcall __std_max_2i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint16_t __stdcall __std_max_2u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int32_t __stdcall __std_max_4i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint32_t __stdcall __std_max_4u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) int64_t __stdcall __std_max_8i(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) uint64_t __stdcall __std_max_8u(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) float __stdcall __std_max_f(const void* const _First, const void* const _Last) noexcept; -__declspec(noalias) double __stdcall __std_max_d(const void* const _First, const void* const _Last) noexcept; +__declspec(noalias) int8_t __stdcall __std_min_1i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint8_t __stdcall __std_min_1u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int16_t __stdcall __std_min_2i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint16_t __stdcall __std_min_2u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int32_t __stdcall __std_min_4i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint32_t __stdcall __std_min_4u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int64_t __stdcall __std_min_8i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint64_t __stdcall __std_min_8u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) float __stdcall __std_min_f(const void* _First, const void* _Last) noexcept; +__declspec(noalias) double __stdcall __std_min_d(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int8_t __stdcall __std_max_1i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint8_t __stdcall __std_max_1u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int16_t __stdcall __std_max_2i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint16_t __stdcall __std_max_2u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int32_t __stdcall __std_max_4i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint32_t __stdcall __std_max_4u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) int64_t __stdcall __std_max_8i(const void* _First, const void* _Last) noexcept; +__declspec(noalias) uint64_t __stdcall __std_max_8u(const void* _First, const void* _Last) noexcept; +__declspec(noalias) float __stdcall __std_max_f(const void* _First, const void* _Last) noexcept; +__declspec(noalias) double __stdcall __std_max_d(const void* _First, const void* _Last) noexcept; } // extern "C" _STD_BEGIN From 80e2470a9292ee14be66f9e30817cb0dae5fb7b9 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:23:10 +0200 Subject: [PATCH 11/36] header unit --- stl/inc/header-units.json | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/inc/header-units.json b/stl/inc/header-units.json index 74d6d4699e..f4bdd9ce8f 100644 --- a/stl/inc/header-units.json +++ b/stl/inc/header-units.json @@ -13,6 +13,7 @@ "__msvc_formatter.hpp", "__msvc_int128.hpp", "__msvc_iter_core.hpp", + "__msvc_minmax.hpp", "__msvc_print.hpp", "__msvc_sanitizer_annotate_container.hpp", "__msvc_system_error_abi.hpp", From 54481804436261cb62c586810efa47b6b74843c3 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 21:32:44 +0200 Subject: [PATCH 12/36] ADL --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index ff3ca2bda3..95c95ce4f8 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6887,7 +6887,7 @@ _NODISCARD constexpr _Ty(max)(initializer_list<_Ty> _Ilist, _Pr _Pred) { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_max(_To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + return static_cast<_Ty>(_STD __std_max(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); } } #endif // _USE_STD_VECTOR_ALGORITHMS @@ -7109,7 +7109,7 @@ _NODISCARD constexpr _Ty(min)(initializer_list<_Ty> _Ilist, _Pr _Pred) { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_min(_To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + return static_cast<_Ty>(_STD __std_min(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); } } #endif // _USE_STD_VECTOR_ALGORITHMS From 0eb53228f7e4a813c5c7dd6a2573c3ffa089f38f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 10 Feb 2024 22:08:30 +0200 Subject: [PATCH 13/36] check projection --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 95c95ce4f8..42896cb598 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6931,7 +6931,7 @@ namespace ranges { _STL_ASSERT(_First != _Last, "An initializer_list passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Is_min_max_optimization_safe) { + if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { return static_cast<_Ty>(_STD __std_max(_STD _To_address(_First), _STD _To_address(_Last))); } @@ -7147,7 +7147,7 @@ namespace ranges { _STL_ASSERT(_First != _Last, "An initializer_list passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Is_min_max_optimization_safe) { + if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { return static_cast<_Ty>(_STD __std_min(_STD _To_address(_First), _STD _To_address(_Last))); } From 939dc3347cd5f6256808beadb8189c16d14c90e1 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 11:14:25 +0200 Subject: [PATCH 14/36] copypasta cleanup --- stl/src/vector_algorithms.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 375b6acf14..68f222d78e 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1439,7 +1439,7 @@ namespace { } // _Minmax has exactly the same signature as the extern "C" functions - // (__std_min_element_N, __std_max_element_N, __std_minmax_element_N), up to calling convention. + // (__std_min_Nn, __std_max_Nn, __std_minmax_Nn), up to calling convention. // This makes sure the template specialization is fused with the extern "C" function. // In optimized builds it avoids an extra call, as this function is too large to inline. template <_Min_max_mode _Mode, class _Traits, const bool _Sign> @@ -1454,10 +1454,10 @@ namespace { #ifndef _M_ARM64EC if (_Byte_length(_First, _Last) >= 16 && _Traits::_Sse_plain_min_max_available()) { - size_t _Portion_byte_size = _Byte_length(_First, _Last) & ~size_t{0xF}; + const size_t _Sse_byte_size = _Byte_length(_First, _Last) & ~size_t{0xF}; const void* _Stop_at = _First; - _Advance_bytes(_Stop_at, _Portion_byte_size); + _Advance_bytes(_Stop_at, _Sse_byte_size); auto _Cur_vals = _Traits::_Load(_First); @@ -1472,7 +1472,7 @@ namespace { _Advance_bytes(_First, 16); if (_First == _Stop_at) { - // Reached end or indices wrap around point. + // Reached end. // Compute horizontal min and/or max. Determine horizontal and vertical position of it. if constexpr ((_Mode & _Mode_min) != 0) { @@ -1506,7 +1506,7 @@ namespace { _Cur_min_val += _Correction; } - if constexpr (_Mode & _Mode_max) { + if constexpr ((_Mode & _Mode_max) != 0) { _Cur_max_val += _Correction; } } @@ -1529,7 +1529,7 @@ namespace { } } - if constexpr (_Mode & _Mode_max) { + if constexpr ((_Mode & _Mode_max) != 0) { if constexpr (_Sign || _Sign_correction) { _Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals); // Update the current maximum } else { From be2aa9b333ecad847ffb828ba63f97fe6244e160 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 12:13:31 +0200 Subject: [PATCH 15/36] Improve fallback --- stl/src/vector_algorithms.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 68f222d78e..302a68ee11 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1540,6 +1540,8 @@ namespace { } else { _Cur_min_val = *reinterpret_cast(_First); _Cur_max_val = *reinterpret_cast(_First); + + _Advance_bytes(_First, sizeof(_Ty)); } #endif // !_M_ARM64EC From 1ecc70e938d4e11ca0dc91bf46dc9286124ecc14 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 12:18:12 +0200 Subject: [PATCH 16/36] format --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 302a68ee11..9e18f1aa61 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1540,7 +1540,7 @@ namespace { } else { _Cur_min_val = *reinterpret_cast(_First); _Cur_max_val = *reinterpret_cast(_First); - + _Advance_bytes(_First, sizeof(_Ty)); } From b5be1e4d48e9dfa3b59e6d7d96dad8663369a81e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 15:29:49 +0200 Subject: [PATCH 17/36] Update stl/inc/__msvc_minmax.hpp Co-authored-by: A. Jiang --- stl/inc/__msvc_minmax.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/__msvc_minmax.hpp b/stl/inc/__msvc_minmax.hpp index 4602e44724..7dce57945c 100644 --- a/stl/inc/__msvc_minmax.hpp +++ b/stl/inc/__msvc_minmax.hpp @@ -7,7 +7,7 @@ #define __MSVC_MINMAX_HPP #include #if _STL_COMPILER_PREPROCESSOR -#include +#include #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) From c01bf7dad9cedad72ac764dd954538c97a6ee8f5 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 15:50:14 +0200 Subject: [PATCH 18/36] turn tails --- stl/src/vector_algorithms.cpp | 67 +++++++++++++---------------------- 1 file changed, 24 insertions(+), 43 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 9e18f1aa61..65d1cf9a1e 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -534,45 +534,6 @@ namespace { return _Res; } - template - _Ty _Min_tail_v(const void* const _First, const void* const _Last, _Ty _Cur) noexcept { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { - if (*_Ptr < _Cur) { - _Cur = *_Ptr; - } - } - - return _Cur; - } - - template - _Ty _Max_tail_v(const void* const _First, const void* const _Last, _Ty _Cur) noexcept { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { - if (_Cur < *_Ptr) { - _Cur = *_Ptr; - } - } - - return _Cur; - } - - template - _Rx _Both_tail_v(const void* const _First, const void* const _Last, _Ty _Cur_min, _Ty _Cur_max) noexcept { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { - if (*_Ptr < _Cur_min) { - _Cur_min = *_Ptr; - } - // Not else! - // * Needed for correctness if start with maximum, as we don't handle specially the first element. - // * Promote branchless code generation. - if (_Cur_max <= *_Ptr) { - _Cur_max = *_Ptr; - } - } - - return {_Cur_min, _Cur_max}; - } - enum _Min_max_mode { _Mode_min = 1 << 0, _Mode_max = 1 << 1, @@ -1546,13 +1507,33 @@ namespace { #endif // !_M_ARM64EC if constexpr (_Mode == _Mode_min) { - return _Min_tail_v(_First, _Last, static_cast<_Ty>(_Cur_min_val)); + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (*_Ptr < _Cur_min_val) { + _Cur_min_val = *_Ptr; + } + } + return _Cur_min_val; } else if constexpr (_Mode == _Mode_max) { - return _Max_tail_v(_First, _Last, static_cast<_Ty>(_Cur_max_val)); + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (_Cur_max_val < *_Ptr) { + _Cur_max_val = *_Ptr; + } + } + return _Cur_max_val; } else { + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if (*_Ptr < _Cur_min_val) { + _Cur_min_val = *_Ptr; + } + // Not else! + // * Needed for correctness if start with maximum, as we don't handle specially the first element. + // * Promote branchless code generation. + if (_Cur_max_val <= *_Ptr) { + _Cur_max_val = *_Ptr; + } + } using _Rx = std::conditional_t<_Sign, typename _Traits::_Minmax_i_t, typename _Traits::_Minmax_u_t>; - - return _Both_tail_v<_Rx>(_First, _Last, static_cast<_Ty>(_Cur_min_val), static_cast<_Ty>(_Cur_max_val)); + return _Rx{_Cur_min_val, _Cur_max_val}; } } From 99917b4be8e8ae8654eb9c728f0081478ba268a2 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 22:40:42 +0200 Subject: [PATCH 19/36] Use SSE responsible --- stl/src/vector_algorithms.cpp | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 65d1cf9a1e..a12fd1ba3a 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -620,11 +620,11 @@ namespace { } static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_min_epi8(_First, _Second); + return _mm_min_epi8(_First, _Second); // SSE4.1 } static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_max_epi8(_First, _Second); + return _mm_max_epi8(_First, _Second); // SSE4.1 } static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { @@ -640,7 +640,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); + return _Use_sse42(); // _mm_min_epi8, _mm_max_epi8 } #endif // !_M_ARM64EC }; @@ -726,11 +726,11 @@ namespace { } static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_min_epi16(_First, _Second); + return _mm_min_epi16(_First, _Second); // SSE4.1 } static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_max_epi16(_First, _Second); + return _mm_max_epi16(_First, _Second); // SSE4.1 } static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { @@ -746,7 +746,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); + return _Use_sse42(); // _mm_cmpgt_epi16, _mm_min_epi16 } #endif // !_M_ARM64EC }; @@ -832,11 +832,11 @@ namespace { } static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_min_epi32(_First, _Second); + return _mm_min_epi32(_First, _Second); // SSE4.1 } static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_max_epi32(_First, _Second); + return _mm_max_epi32(_First, _Second); // SSE4.1 } static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { @@ -852,7 +852,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); + return _Use_sse42(); // _mm_cmpgt_epi32, _mm_min_epi32 } #endif // !_M_ARM64EC }; @@ -949,11 +949,11 @@ namespace { } static __m128i _Min(const __m128i _First, const __m128i _Second) noexcept { - return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_First, _Second)); + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_First, _Second)); // SSE4.2 } static __m128i _Max(const __m128i _First, const __m128i _Second) noexcept { - return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_Second, _First)); + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_Second, _First)); // SSE4.2 } static __m128i _Mask_cast(__m128i _Mask) noexcept { @@ -961,7 +961,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse42(); + return _Use_sse42(); // _mm_cmpgt_epi64 } #endif // !_M_ARM64EC }; @@ -1065,7 +1065,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); + return _Use_sse2(); // _mm_min_ps, _mm_max_ps, _mm_shuffle_ps } #endif // !_M_ARM64EC }; @@ -1173,7 +1173,7 @@ namespace { } static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); + return _Use_sse2(); // _mm_min_pd, _mm_max_pd, _mm_shuffle_pd } #endif // !_M_ARM64EC }; From e55e7f2c43baa41da204866f09768bc20eee6463 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 22:42:33 +0200 Subject: [PATCH 20/36] empty vector check --- tests/std/include/test_min_max_element_support.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/include/test_min_max_element_support.hpp b/tests/std/include/test_min_max_element_support.hpp index 1e89dc7ddb..a717c7ef85 100644 --- a/tests/std/include/test_min_max_element_support.hpp +++ b/tests/std/include/test_min_max_element_support.hpp @@ -108,7 +108,7 @@ void test_case_min_max_element(const std::vector& input) { assert(expected_minmax.first == actual_minmax_sized_range.min); assert(expected_minmax.second == actual_minmax_sized_range.max); - if (input.begin() != input.end()) { + if (!input.empty()) { auto actual_min_value = std::ranges::min(input); auto actual_max_value = std::ranges::max(input); auto actual_minmax_value = std::ranges::minmax(input); From 35626ce4ed3862588aeb5502ab8b24ad4a12934d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:22:13 +0200 Subject: [PATCH 21/36] Don't mimic `_Minmax_element` fallback --- stl/src/vector_algorithms.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index a12fd1ba3a..17231cba07 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1506,32 +1506,30 @@ namespace { } #endif // !_M_ARM64EC - if constexpr (_Mode == _Mode_min) { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + if constexpr ((_Mode & _Mode_min) != 0) { if (*_Ptr < _Cur_min_val) { _Cur_min_val = *_Ptr; } } - return _Cur_min_val; - } else if constexpr (_Mode == _Mode_max) { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { + + if constexpr ((_Mode & _Mode_max) != 0) { if (_Cur_max_val < *_Ptr) { _Cur_max_val = *_Ptr; } } + + // _Mode_both could have been handled separately with else + // We have _Cur_min_val / _Cur_max_val initialized by processing at least one element, + // so the 'else' would be correct here + // But still separate 'if' statements promote branchless codegen + } + + if constexpr (_Mode == _Mode_min) { + return _Cur_min_val; + } else if constexpr (_Mode == _Mode_max) { return _Cur_max_val; } else { - for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { - if (*_Ptr < _Cur_min_val) { - _Cur_min_val = *_Ptr; - } - // Not else! - // * Needed for correctness if start with maximum, as we don't handle specially the first element. - // * Promote branchless code generation. - if (_Cur_max_val <= *_Ptr) { - _Cur_max_val = *_Ptr; - } - } using _Rx = std::conditional_t<_Sign, typename _Traits::_Minmax_i_t, typename _Traits::_Minmax_u_t>; return _Rx{_Cur_min_val, _Cur_max_val}; } From 47bc60859d4aa7cc8ae96fa260ef695bef77acaa Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:30:03 +0200 Subject: [PATCH 22/36] clear pointers --- stl/inc/algorithm | 15 ++++++--------- stl/inc/xutility | 24 ++++++++++-------------- 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index a9c9fc7672..bac9b568ea 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -101,15 +101,12 @@ auto __std_minmax(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; if constexpr (_STD is_pointer_v<_Ty>) { - if constexpr (sizeof(_Ty) == 4) { - auto _Result = ::__std_minmax_4u(_First, _Last); - return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; - } else if constexpr (sizeof(_Ty) == 8) { - auto _Result = ::__std_minmax_8u(_First, _Last); - return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; - } else { - static_assert(_STD _Always_false<_Ty>, "Unexpected size"); - } +#ifdef _WIN64 + auto _Result = ::__std_minmax_8u(_First, _Last); +#else + auto _Result = ::__std_minmax_4u(_First, _Last); +#endif + return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return ::__std_minmax_f(_First, _Last); } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { diff --git a/stl/inc/xutility b/stl/inc/xutility index 42896cb598..3dcc69a1e2 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -238,13 +238,11 @@ auto __std_min(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; if constexpr (_STD is_pointer_v<_Ty>) { - if constexpr (sizeof(_Ty) == 4) { - return reinterpret_cast(::__std_min_4u(_First, _Last)); - } else if constexpr (sizeof(_Ty) == 8) { - return reinterpret_cast(::__std_min_8u(_First, _Last)); - } else { - static_assert(_STD _Always_false<_Ty>, "Unexpected size"); - } +#ifdef _WIN64 + return reinterpret_cast(::__std_min_8u(_First, _Last)); +#else + return reinterpret_cast(::__std_min_4u(_First, _Last)); +#endif } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return ::__std_min_f(_First, _Last); } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { @@ -283,13 +281,11 @@ auto __std_max(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; if constexpr (_STD is_pointer_v<_Ty>) { - if constexpr (sizeof(_Ty) == 4) { - return reinterpret_cast(::__std_max_4u(_First, _Last)); - } else if constexpr (sizeof(_Ty) == 8) { - return reinterpret_cast(::__std_max_8u(_First, _Last)); - } else { - static_assert(_STD _Always_false<_Ty>, "Unexpected size"); - } +#ifdef _WIN64 + return reinterpret_cast(::__std_max_8u(_First, _Last)); +#else + return reinterpret_cast(::__std_max_4u(_First, _Last)); +#endif } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return ::__std_max_f(_First, _Last); } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { From c3ba6126238fe77888fd72bdb398d0ee8334f917 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:36:09 +0200 Subject: [PATCH 23/36] no horizontal position --- stl/src/vector_algorithms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 17231cba07..2029188295 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1433,8 +1433,7 @@ namespace { _Advance_bytes(_First, 16); if (_First == _Stop_at) { - // Reached end. - // Compute horizontal min and/or max. Determine horizontal and vertical position of it. + // Reached end. Compute horizontal min and/or max. if constexpr ((_Mode & _Mode_min) != 0) { if constexpr (_Sign || _Sign_correction) { From def1e7acf9fb6f8007358308a515792d983bcea9 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:37:58 +0200 Subject: [PATCH 24/36] unload extra _Load --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 2029188295..cae55f8ff0 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1423,7 +1423,7 @@ namespace { auto _Cur_vals = _Traits::_Load(_First); if constexpr (_Sign_correction) { - _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), false); + _Cur_vals = _Traits::_Sign_correction(_Cur_vals, false); } auto _Cur_vals_min = _Cur_vals; // vector of vertical minimum values @@ -1478,7 +1478,7 @@ namespace { _Cur_vals = _Traits::_Load(_First); if constexpr (_Sign_correction) { - _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), false); + _Cur_vals = _Traits::_Sign_correction(_Cur_vals, false); } if constexpr ((_Mode & _Mode_min) != 0) { From f149a831d4d878a9d83f2bb69bc10028e3862848 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:42:11 +0200 Subject: [PATCH 25/36] We'll hide Slavic accent --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index cae55f8ff0..1cb658736c 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1410,7 +1410,7 @@ namespace { _Ty _Cur_min_val; // initialized in both of branches below _Ty _Cur_max_val; // initialized in both of branches below - // We don't have unsigned 64-bit stuff, so will use sign correction just for that case + // We don't have unsigned 64-bit stuff, so we'll use sign correction just for that case constexpr bool _Sign_correction = sizeof(_Ty) == 8 && !_Sign; #ifndef _M_ARM64EC From 5a91a03542d07094ad2adbc6986c74d06a88f08b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:44:19 +0200 Subject: [PATCH 26/36] non-type template param is already const enough --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 1cb658736c..af52746cb2 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1403,7 +1403,7 @@ namespace { // (__std_min_Nn, __std_max_Nn, __std_minmax_Nn), up to calling convention. // This makes sure the template specialization is fused with the extern "C" function. // In optimized builds it avoids an extra call, as this function is too large to inline. - template <_Min_max_mode _Mode, class _Traits, const bool _Sign> + template <_Min_max_mode _Mode, class _Traits, bool _Sign> auto __stdcall _Minmax(const void* _First, const void* const _Last) noexcept { using _Ty = std::conditional_t<_Sign, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>; From 459ae03a71c601c64901cc73d0b011e90b249083 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:45:51 +0200 Subject: [PATCH 27/36] constant result --- stl/inc/algorithm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index bac9b568ea..5618d88517 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -102,9 +102,9 @@ auto __std_minmax(_Ty* _First, _Ty* _Last) noexcept { if constexpr (_STD is_pointer_v<_Ty>) { #ifdef _WIN64 - auto _Result = ::__std_minmax_8u(_First, _Last); + const auto _Result = ::__std_minmax_8u(_First, _Last); #else - auto _Result = ::__std_minmax_4u(_First, _Last); + const auto _Result = ::__std_minmax_4u(_First, _Last); #endif return _Min_max_p{reinterpret_cast(_Result._Min), reinterpret_cast(_Result._Max)}; } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { From e9185a9956b165bac645f42dd6ef66a58f575885 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:50:08 +0200 Subject: [PATCH 28/36] range of `is_constant_evaluated()` --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 3dcc69a1e2..2dc4141738 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6928,7 +6928,7 @@ namespace ranges { "An initializer_list passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { - if (!_Is_constant_evaluated()) { + if (!_STD is_constant_evaluated()) { return static_cast<_Ty>(_STD __std_max(_STD _To_address(_First), _STD _To_address(_Last))); } } @@ -7144,7 +7144,7 @@ namespace ranges { "An initializer_list passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { - if (!_Is_constant_evaluated()) { + if (!_STD is_constant_evaluated()) { return static_cast<_Ty>(_STD __std_min(_STD _To_address(_First), _STD _To_address(_Last))); } } From cf1e3da1d28b53599078c027bee12eafecc43b77 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:55:34 +0200 Subject: [PATCH 29/36] scope for `_M_ARM64EC` This is a bug fix, not only a change to address a pedantic comment! --- stl/src/vector_algorithms.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index af52746cb2..06720d42cb 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1410,10 +1410,10 @@ namespace { _Ty _Cur_min_val; // initialized in both of branches below _Ty _Cur_max_val; // initialized in both of branches below +#ifndef _M_ARM64EC // We don't have unsigned 64-bit stuff, so we'll use sign correction just for that case constexpr bool _Sign_correction = sizeof(_Ty) == 8 && !_Sign; -#ifndef _M_ARM64EC if (_Byte_length(_First, _Last) >= 16 && _Traits::_Sse_plain_min_max_available()) { const size_t _Sse_byte_size = _Byte_length(_First, _Last) & ~size_t{0xF}; @@ -1497,14 +1497,15 @@ namespace { } } } - } else { + } else +#endif // !_M_ARM64EC + { _Cur_min_val = *reinterpret_cast(_First); _Cur_max_val = *reinterpret_cast(_First); _Advance_bytes(_First, sizeof(_Ty)); } -#endif // !_M_ARM64EC for (auto _Ptr = static_cast(_First); _Ptr != _Last; ++_Ptr) { if constexpr ((_Mode & _Mode_min) != 0) { if (*_Ptr < _Cur_min_val) { From 1743e200abc3493655f46d29da07e72c01533d4c Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 11 Feb 2024 23:56:11 +0200 Subject: [PATCH 30/36] the who understands English articles --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 06720d42cb..8df62c706c 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1407,8 +1407,8 @@ namespace { auto __stdcall _Minmax(const void* _First, const void* const _Last) noexcept { using _Ty = std::conditional_t<_Sign, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>; - _Ty _Cur_min_val; // initialized in both of branches below - _Ty _Cur_max_val; // initialized in both of branches below + _Ty _Cur_min_val; // initialized in both of the branches below + _Ty _Cur_max_val; // initialized in both of the branches below #ifndef _M_ARM64EC // We don't have unsigned 64-bit stuff, so we'll use sign correction just for that case From 1603f0b2e5541508556993bb0d78ae3be572773e Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 14:33:46 -0800 Subject: [PATCH 31/36] Don't need `_To_address` for `initializer_list`. --- stl/inc/algorithm | 2 +- stl/inc/xutility | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 5618d88517..e39248cb1b 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -10124,7 +10124,7 @@ _NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pr #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_STD _Is_constant_evaluated()) { - const auto _Result = _STD __std_minmax(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end())); + const auto _Result = _STD __std_minmax(_Ilist.begin(), _Ilist.end()); return {static_cast<_Ty>(_Result._Min), static_cast<_Ty>(_Result._Max)}; } } diff --git a/stl/inc/xutility b/stl/inc/xutility index 2dc4141738..fc5f1773e5 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6883,7 +6883,7 @@ _NODISCARD constexpr _Ty(max)(initializer_list<_Ty> _Ilist, _Pr _Pred) { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_max(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + return static_cast<_Ty>(_STD __std_max(_Ilist.begin(), _Ilist.end())); } } #endif // _USE_STD_VECTOR_ALGORITHMS @@ -6929,7 +6929,7 @@ namespace ranges { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_STD is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_max(_STD _To_address(_First), _STD _To_address(_Last))); + return static_cast<_Ty>(_STD __std_max(_First, _Last)); } } #endif // _USE_STD_VECTOR_ALGORITHMS @@ -7105,7 +7105,7 @@ _NODISCARD constexpr _Ty(min)(initializer_list<_Ty> _Ilist, _Pr _Pred) { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_min(_STD _To_address(_Ilist.begin()), _STD _To_address(_Ilist.end()))); + return static_cast<_Ty>(_STD __std_min(_Ilist.begin(), _Ilist.end())); } } #endif // _USE_STD_VECTOR_ALGORITHMS @@ -7145,7 +7145,7 @@ namespace ranges { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_STD is_constant_evaluated()) { - return static_cast<_Ty>(_STD __std_min(_STD _To_address(_First), _STD _To_address(_Last))); + return static_cast<_Ty>(_STD __std_min(_First, _Last)); } } #endif // _USE_STD_VECTOR_ALGORITHMS From 13155b3a0dba198e5372064fa58a3b945a457961 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 14:58:52 -0800 Subject: [PATCH 32/36] `_STL_ASSERT` non-empty inputs, update citations. --- stl/inc/algorithm | 6 ++++-- stl/inc/xutility | 12 ++++++++---- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index e39248cb1b..c6c0fdfc29 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -10121,6 +10121,8 @@ _NODISCARD constexpr pair minmax(const _Ty& _Left _MSVC_ _EXPORT_STD template _NODISCARD constexpr pair<_Ty, _Ty> minmax(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return {leftmost/smallest, rightmost/largest} + _STL_ASSERT( + _Ilist.size() != 0, "An initializer_list passed to std::minmax must not be empty. (N4971 [alg.min.max]/21)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_STD _Is_constant_evaluated()) { @@ -10178,7 +10180,7 @@ namespace ranges { const auto _First = _Range.begin(); const auto _Last = _Range.end(); _STL_ASSERT(_First != _Last, - "An initializer_list passed to std::ranges::minmax must not be empty. (N4950 [alg.min.max]/21)"); + "An initializer_list passed to std::ranges::minmax must not be empty. (N4971 [alg.min.max]/21)"); return _Minmax_fwd_unchecked(_First, _Last, _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj)); } @@ -10190,7 +10192,7 @@ namespace ranges { auto _UFirst = _RANGES _Ubegin(_Range); auto _ULast = _RANGES _Uend(_Range); _STL_ASSERT( - _UFirst != _ULast, "A range passed to std::ranges::minmax must not be empty. (N4950 [alg.min.max]/21)"); + _UFirst != _ULast, "A range passed to std::ranges::minmax must not be empty. (N4971 [alg.min.max]/21)"); if constexpr (forward_range<_Rng> && _Prefer_iterator_copies>) { return _Minmax_fwd_unchecked( _STD move(_UFirst), _STD move(_ULast), _STD _Pass_fn(_Pred), _STD _Pass_fn(_Proj)); diff --git a/stl/inc/xutility b/stl/inc/xutility index fc5f1773e5..b9667ebb7a 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6880,6 +6880,8 @@ namespace ranges { _EXPORT_STD template _NODISCARD constexpr _Ty(max)(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return leftmost/largest + _STL_ASSERT( + _Ilist.size() != 0, "An initializer_list passed to std::max must not be empty. (N4971 [alg.min.max]/13)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { @@ -6925,7 +6927,7 @@ namespace ranges { const auto _First = _Range.begin(); const auto _Last = _Range.end(); _STL_ASSERT(_First != _Last, - "An initializer_list passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); + "An initializer_list passed to std::ranges::max must not be empty. (N4971 [alg.min.max]/13)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_STD is_constant_evaluated()) { @@ -6944,7 +6946,7 @@ namespace ranges { auto _UFirst = _Ubegin(_Range); auto _ULast = _Uend(_Range); _STL_ASSERT( - _UFirst != _ULast, "A range passed to std::ranges::max must not be empty. (N4950 [alg.min.max]/13)"); + _UFirst != _ULast, "A range passed to std::ranges::max must not be empty. (N4971 [alg.min.max]/13)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe && sized_sentinel_for) { @@ -7102,6 +7104,8 @@ namespace ranges { _EXPORT_STD template _NODISCARD constexpr _Ty(min)(initializer_list<_Ty> _Ilist, _Pr _Pred) { // return leftmost/smallest + _STL_ASSERT( + _Ilist.size() != 0, "An initializer_list passed to std::min must not be empty. (N4971 [alg.min.max]/5)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Is_min_max_optimization_safe) { if (!_Is_constant_evaluated()) { @@ -7141,7 +7145,7 @@ namespace ranges { const auto _First = _Range.begin(); const auto _Last = _Range.end(); _STL_ASSERT(_First != _Last, - "An initializer_list passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); + "An initializer_list passed to std::ranges::min must not be empty. (N4971 [alg.min.max]/5)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe) { if (!_STD is_constant_evaluated()) { @@ -7160,7 +7164,7 @@ namespace ranges { auto _UFirst = _Ubegin(_Range); auto _ULast = _Uend(_Range); _STL_ASSERT( - _UFirst != _ULast, "A range passed to std::ranges::min must not be empty. (N4950 [alg.min.max]/5)"); + _UFirst != _ULast, "A range passed to std::ranges::min must not be empty. (N4971 [alg.min.max]/5)"); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (is_same_v<_Pj, identity> && _Is_min_max_optimization_safe && sized_sentinel_for) { From 7c678db92254c6e3ee572a5e34a9fc5555dcbfb9 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 15:26:21 -0800 Subject: [PATCH 33/36] Cleanup comment punctuation. --- stl/src/vector_algorithms.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 8df62c706c..b919364bf4 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1519,10 +1519,10 @@ namespace { } } - // _Mode_both could have been handled separately with else + // _Mode_both could have been handled separately with 'else'. // We have _Cur_min_val / _Cur_max_val initialized by processing at least one element, - // so the 'else' would be correct here - // But still separate 'if' statements promote branchless codegen + // so the 'else' would be correct here. + // But still separate 'if' statements promote branchless codegen. } if constexpr (_Mode == _Mode_min) { From c201a3f9c7022ca2389524a774757b7a1aa51f4a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 15:43:11 -0800 Subject: [PATCH 34/36] Avoid risk by always guarding with `_Use_sse42()`. --- stl/src/vector_algorithms.cpp | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index b919364bf4..b71a0302e4 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -638,10 +638,6 @@ namespace { static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse42(); // _mm_min_epi8, _mm_max_epi8 - } #endif // !_M_ARM64EC }; @@ -744,10 +740,6 @@ namespace { static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse42(); // _mm_cmpgt_epi16, _mm_min_epi16 - } #endif // !_M_ARM64EC }; @@ -850,10 +842,6 @@ namespace { static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse42(); // _mm_cmpgt_epi32, _mm_min_epi32 - } #endif // !_M_ARM64EC }; @@ -959,10 +947,6 @@ namespace { static __m128i _Mask_cast(__m128i _Mask) noexcept { return _Mask; } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse42(); // _mm_cmpgt_epi64 - } #endif // !_M_ARM64EC }; @@ -1063,10 +1047,6 @@ namespace { static __m128i _Mask_cast(__m128 _Mask) noexcept { return _mm_castps_si128(_Mask); } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); // _mm_min_ps, _mm_max_ps, _mm_shuffle_ps - } #endif // !_M_ARM64EC }; @@ -1171,10 +1151,6 @@ namespace { static __m128i _Mask_cast(__m128d _Mask) noexcept { return _mm_castpd_si128(_Mask); } - - static bool _Sse_plain_min_max_available() noexcept { - return _Use_sse2(); // _mm_min_pd, _mm_max_pd, _mm_shuffle_pd - } #endif // !_M_ARM64EC }; @@ -1414,7 +1390,7 @@ namespace { // We don't have unsigned 64-bit stuff, so we'll use sign correction just for that case constexpr bool _Sign_correction = sizeof(_Ty) == 8 && !_Sign; - if (_Byte_length(_First, _Last) >= 16 && _Traits::_Sse_plain_min_max_available()) { + if (_Byte_length(_First, _Last) >= 16 && _Use_sse42()) { const size_t _Sse_byte_size = _Byte_length(_First, _Last) & ~size_t{0xF}; const void* _Stop_at = _First; From 35fd4b73d28ac609727aec8cd256b87ff62618ad Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 16:25:58 -0800 Subject: [PATCH 35/36] Make room for comments by renaming lambda `_First` and `_Second` to `_Val1` and `_Val2`. Regex renamed: `\[\]\((__m128[id]?) _First, \1 _Second\) \{ return (\w+)\(_First, _Second\); \}` to: `[]($1 _Val1, $1 _Val2) { return $2(_Val1, _Val2); }` --- stl/src/vector_algorithms.cpp | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index b71a0302e4..9c7037ea4f 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -584,19 +584,19 @@ namespace { } static __m128i _H_min(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epi8(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi8(_Val1, _Val2); }); } static __m128i _H_max(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epi8(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi8(_Val1, _Val2); }); } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu8(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu8(_Val1, _Val2); }); } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu8(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu8(_Val1, _Val2); }); } static _Signed_t _Get_any(const __m128i _Cur) noexcept { @@ -683,19 +683,19 @@ namespace { } static __m128i _H_min(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epi16(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi16(_Val1, _Val2); }); } static __m128i _H_max(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epi16(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi16(_Val1, _Val2); }); } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu16(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu16(_Val1, _Val2); }); } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu16(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu16(_Val1, _Val2); }); } static _Signed_t _Get_any(const __m128i _Cur) noexcept { @@ -786,19 +786,19 @@ namespace { } static __m128i _H_min(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epi32(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi32(_Val1, _Val2); }); } static __m128i _H_max(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epi32(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi32(_Val1, _Val2); }); } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu32(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu32(_First, _Second); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); } static _Signed_t _Get_any(const __m128i _Cur) noexcept { @@ -999,19 +999,19 @@ namespace { } static __m128 _H_min(const __m128 _Cur) noexcept { - return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_min_ps(_First, _Second); }); + return _H_func(_Cur, [](__m128 _Val1, __m128 _Val2) { return _mm_min_ps(_Val1, _Val2); }); } static __m128 _H_max(const __m128 _Cur) noexcept { - return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_max_ps(_First, _Second); }); + return _H_func(_Cur, [](__m128 _Val1, __m128 _Val2) { return _mm_max_ps(_Val1, _Val2); }); } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu32(_First, _Second); }); + return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu32(_First, _Second); }); + return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); } static float _Get_any(const __m128 _Cur) noexcept { @@ -1095,11 +1095,11 @@ namespace { } static __m128d _H_min(const __m128d _Cur) noexcept { - return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_min_pd(_First, _Second); }); + return _H_func(_Cur, [](__m128d _Val1, __m128d _Val2) { return _mm_min_pd(_Val1, _Val2); }); } static __m128d _H_max(const __m128d _Cur) noexcept { - return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_max_pd(_First, _Second); }); + return _H_func(_Cur, [](__m128d _Val1, __m128d _Val2) { return _mm_max_pd(_Val1, _Val2); }); } static __m128i _H_min_u(const __m128i _Cur) noexcept { From d604bfd86eb16a86be593c6c81b0e9a6babac784 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Sun, 11 Feb 2024 16:32:26 -0800 Subject: [PATCH 36/36] Comment all `_Minmax_traits_MEOW` intrinsics above SSE2. Remove comments from `_mm_min_epi16` and `_mm_max_epi16` - they're SSE2, not SSE4.1. --- stl/src/vector_algorithms.cpp | 60 ++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 29 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 9c7037ea4f..4878e69455 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -578,17 +578,17 @@ namespace { __m128i _H_min_val = _Cur; _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2))); _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(2, 3, 0, 1))); - _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_words)); - _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_bytes)); + _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_words)); // SSSE3 + _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_bytes)); // SSSE3 return _H_min_val; } static __m128i _H_min(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi8(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi8(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_max(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi8(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi8(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_min_u(const __m128i _Cur) noexcept { @@ -604,6 +604,7 @@ namespace { } static _Unsigned_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept { + // _mm_shuffle_epi8 is SSSE3: return static_cast<_Unsigned_t>(_mm_cvtsi128_si32(_mm_shuffle_epi8(_Idx, _mm_cvtsi32_si128(_H_pos)))); } @@ -678,7 +679,7 @@ namespace { __m128i _H_min_val = _Cur; _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2))); _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(2, 3, 0, 1))); - _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_words)); + _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi8(_H_min_val, _Shuf_words)); // SSSE3 return _H_min_val; } @@ -691,11 +692,11 @@ namespace { } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu16(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu16(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu16(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu16(_Val1, _Val2); }); // SSE4.1 } static _Signed_t _Get_any(const __m128i _Cur) noexcept { @@ -705,6 +706,7 @@ namespace { static _Unsigned_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept { static constexpr _Unsigned_t _Shuf[] = {0x0100, 0x0302, 0x0504, 0x0706, 0x0908, 0x0B0A, 0x0D0C, 0x0F0E}; + // _mm_shuffle_epi8 is SSSE3: return static_cast<_Unsigned_t>( _mm_cvtsi128_si32(_mm_shuffle_epi8(_Idx, _mm_cvtsi32_si128(_Shuf[_H_pos >> 1])))); } @@ -722,19 +724,19 @@ namespace { } static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_min_epi16(_First, _Second); // SSE4.1 + return _mm_min_epi16(_First, _Second); } static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i = _mm_undefined_si128()) noexcept { - return _mm_max_epi16(_First, _Second); // SSE4.1 + return _mm_max_epi16(_First, _Second); } static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { - return _mm_min_epu16(_First, _Second); + return _mm_min_epu16(_First, _Second); // SSE4.1 } static __m128i _Max_u(const __m128i _First, const __m128i _Second) noexcept { - return _mm_max_epu16(_First, _Second); + return _mm_max_epu16(_First, _Second); // SSE4.1 } static __m128i _Mask_cast(__m128i _Mask) noexcept { @@ -786,19 +788,19 @@ namespace { } static __m128i _H_min(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi32(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epi32(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_max(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi32(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epi32(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); + return _H_func(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); // SSE4.1 } static _Signed_t _Get_any(const __m128i _Cur) noexcept { @@ -832,11 +834,11 @@ namespace { } static __m128i _Min_u(const __m128i _First, const __m128i _Second) noexcept { - return _mm_min_epu32(_First, _Second); + return _mm_min_epu32(_First, _Second); // SSE4.1 } static __m128i _Max_u(const __m128i _First, const __m128i _Second) noexcept { - return _mm_max_epu32(_First, _Second); + return _mm_max_epu32(_First, _Second); // SSE4.1 } static __m128i _Mask_cast(__m128i _Mask) noexcept { @@ -903,7 +905,7 @@ namespace { static _Signed_t _Get_any(const __m128i _Cur) noexcept { #ifdef _M_IX86 return static_cast<_Signed_t>( - (static_cast<_Unsigned_t>(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) + (static_cast<_Unsigned_t>(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) // SSE4.1 | static_cast<_Unsigned_t>(static_cast(_mm_cvtsi128_si32(_Cur)))); #else // ^^^ x86 / x64 vvv return static_cast<_Signed_t>(_mm_cvtsi128_si64(_Cur)); @@ -917,31 +919,31 @@ namespace { } static __m128i _Cmp_eq(const __m128i _First, const __m128i _Second) noexcept { - return _mm_cmpeq_epi64(_First, _Second); + return _mm_cmpeq_epi64(_First, _Second); // SSE4.1 } static __m128i _Cmp_gt(const __m128i _First, const __m128i _Second) noexcept { - return _mm_cmpgt_epi64(_First, _Second); + return _mm_cmpgt_epi64(_First, _Second); // SSE4.2 } static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { - return _mm_cmpeq_epi64(_First, _Second); + return _mm_cmpeq_epi64(_First, _Second); // SSE4.1 } static __m128i _Min(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept { - return _mm_blendv_epi8(_First, _Second, _Mask); + return _mm_blendv_epi8(_First, _Second, _Mask); // SSE4.1 } static __m128i _Max(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept { - return _mm_blendv_epi8(_First, _Second, _Mask); + return _mm_blendv_epi8(_First, _Second, _Mask); // SSE4.1 } static __m128i _Min(const __m128i _First, const __m128i _Second) noexcept { - return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_First, _Second)); // SSE4.2 + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_First, _Second)); // _Cmp_gt is SSE4.2 } static __m128i _Max(const __m128i _First, const __m128i _Second) noexcept { - return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_Second, _First)); // SSE4.2 + return _mm_blendv_epi8(_First, _Second, _Cmp_gt(_Second, _First)); // _Cmp_gt is SSE4.2 } static __m128i _Mask_cast(__m128i _Mask) noexcept { @@ -1007,11 +1009,11 @@ namespace { } static __m128i _H_min_u(const __m128i _Cur) noexcept { - return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); + return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_min_epu32(_Val1, _Val2); }); // SSE4.1 } static __m128i _H_max_u(const __m128i _Cur) noexcept { - return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); + return _H_func_u(_Cur, [](__m128i _Val1, __m128i _Val2) { return _mm_max_epu32(_Val1, _Val2); }); // SSE4.1 } static float _Get_any(const __m128 _Cur) noexcept { @@ -1115,7 +1117,7 @@ namespace { static uint64_t _Get_any_u(const __m128i _Cur) noexcept { #ifdef _M_IX86 - return (static_cast(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) + return (static_cast(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) // SSE4.1 | static_cast(static_cast(_mm_cvtsi128_si32(_Cur))); #else // ^^^ x86 / x64 vvv return static_cast(_mm_cvtsi128_si64(_Cur)); @@ -1137,7 +1139,7 @@ namespace { } static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { - return _mm_cmpeq_epi64(_First, _Second); + return _mm_cmpeq_epi64(_First, _Second); // SSE4.1 } static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d = _mm_undefined_pd()) noexcept {