From deca7cd56078ea532c18e4934e294375674dbf88 Mon Sep 17 00:00:00 2001 From: wjr <1966336874@qq.com> Date: Fri, 14 Jun 2024 15:52:00 +0800 Subject: [PATCH] opt tuple compile time --- godbolt/wjr.hpp | 19439 ++++++++++----------- include/wjr/assert.hpp | 49 +- include/wjr/container/intrusive/list.hpp | 24 +- include/wjr/math/convert.hpp | 1 + include/wjr/math/details.hpp | 30 +- include/wjr/math/div-impl.hpp | 13 +- include/wjr/math/div.hpp | 22 +- include/wjr/math/mul.hpp | 3 +- include/wjr/math/uint128_t.hpp | 98 + include/wjr/memory/memory_pool.hpp | 14 +- include/wjr/tuple.hpp | 14 +- include/wjr/type_traits.hpp | 8 - src/wjr/math/div.cpp | 1 + src/wjr/math/mul.cpp | 1 + src/wjr/x86/math/mul.cpp | 3 - 15 files changed, 9246 insertions(+), 10474 deletions(-) create mode 100644 include/wjr/math/uint128_t.hpp diff --git a/godbolt/wjr.hpp b/godbolt/wjr.hpp index 07b4d360..95d630fb 100644 --- a/godbolt/wjr.hpp +++ b/godbolt/wjr.hpp @@ -2508,45 +2508,32 @@ WJR_NORETURN extern void __assert_failed(const char *expr, const char *file, // LCOV_EXCL_START /// @private -class __assert_handler_t { -private: - WJR_NORETURN static void fn(const char *expr, const char *file, const char *func, - int line) noexcept { - __assert_failed(expr, file, func, line); - } - - template - WJR_NORETURN WJR_NOINLINE static void fn(const char *expr, const char *file, - const char *func, int line, - Args &&...args) noexcept { - std::cerr << "Additional information: "; - (void)(std::cerr << ... << std::forward(args)); - std::cerr << '\n'; - __assert_failed(expr, file, func, line); - } - -public: - template - WJR_NORETURN void operator()(const char *expr, const char *file, const char *func, - int line, Args &&...args) const noexcept { - fn(expr, file, func, line, std::forward(args)...); - } -}; +template +WJR_NORETURN WJR_NOINLINE void __assert_handler(const char *expr, const char *file, + const char *func, int line, + Args &&...args) noexcept { + std::cerr << "Additional information: "; + (void)(std::cerr << ... << std::forward(args)); + std::cerr << '\n'; + __assert_failed(expr, file, func, line); +} -inline constexpr __assert_handler_t __assert_handler{}; +/// @private +WJR_NORETURN inline void __assert_handler(const char *expr, const char *file, + const char *func, int line) noexcept { + __assert_failed(expr, file, func, line); +} // LCOV_EXCL_STOP -#define WJR_ASSERT_CHECK_I_HANDLER(handler, expr, ...) \ +#define WJR_ASSERT_CHECK_I(expr, ...) \ do { \ if (WJR_UNLIKELY(!(expr))) { \ - handler(#expr, WJR_FILE, WJR_CURRENT_FUNCTION, WJR_LINE, ##__VA_ARGS__); \ + ::wjr::__assert_handler(#expr, WJR_FILE, WJR_CURRENT_FUNCTION, WJR_LINE, \ + ##__VA_ARGS__); \ } \ } while (0) -#define WJR_ASSERT_CHECK_I(...) \ - WJR_ASSERT_CHECK_I_HANDLER(::wjr::__assert_handler, __VA_ARGS__) - // do nothing #define WJR_ASSERT_UNCHECK_I(expr, ...) \ do { \ @@ -3040,14 +3027,6 @@ using uint_t = typename __uint_selector::type; template using int_t = typename __int_selector::type; -#if WJR_HAS_FEATURE(INT128) -using int128_t = int_t<128>; -#endif - -#if WJR_HAS_FEATURE(INT128) -using uint128_t = uint_t<128>; -#endif - template using usint_t = std::conditional_t<__s, int_t, uint_t>; @@ -6156,12 +6135,8 @@ class de_bruijn { constexpr static uint8_t mv = digits == 32 ? 27 : 58; constexpr de_bruijn() noexcept : lookup(), lookupr() { initialize(); } - WJR_INTRINSIC_CONSTEXPR int get(T idx) const noexcept { - return lookup[(idx * seed) >> mv]; - } - WJR_INTRINSIC_CONSTEXPR int getr(T idx) const noexcept { - return lookupr[(idx * seed) >> mv]; - } + constexpr int get(T idx) const noexcept { return lookup[(idx * seed) >> mv]; } + constexpr int getr(T idx) const noexcept { return lookupr[(idx * seed) >> mv]; } private: constexpr void initialize() noexcept { @@ -6184,7 +6159,7 @@ inline constexpr de_bruijn de_bruijn64 = {}; // preview ... template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR bool is_zero_or_single_bit(T n) noexcept { +WJR_CONST constexpr bool is_zero_or_single_bit(T n) noexcept { return (n & (n - 1)) == 0; } @@ -6194,12 +6169,12 @@ WJR_CONST WJR_INTRINSIC_CONSTEXPR bool is_zero_or_single_bit(T n) noexcept { * @note `n & -n` is the lowest bit of n. */ template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T lowbit(T n) noexcept { +WJR_CONST constexpr T lowbit(T n) noexcept { return n & -n; } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T clear_lowbit(T n) noexcept { +WJR_CONST constexpr T clear_lowbit(T n) noexcept { return n & (n - 1); } @@ -6216,39 +6191,35 @@ WJR_CONST constexpr decltype(auto) to_unsigned(Value value) noexcept { // preview : template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR bool __has_high_bit(T n) noexcept { +WJR_CONST constexpr bool __has_high_bit(T n) noexcept { return n >> (std::numeric_limits::digits - 1); } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T __ceil_div(T n, type_identity_t div) noexcept { +WJR_CONST constexpr T __ceil_div(T n, type_identity_t div) noexcept { return (n + div - 1) / div; } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T __align_down(T n, - type_identity_t alignment) noexcept { +WJR_CONST constexpr T __align_down(T n, type_identity_t alignment) noexcept { WJR_ASSERT_ASSUME_L2(is_zero_or_single_bit(alignment)); return n & (-alignment); } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T -__align_down_offset(T n, type_identity_t alignment) noexcept { +WJR_CONST constexpr T __align_down_offset(T n, type_identity_t alignment) noexcept { WJR_ASSERT_ASSUME_L2(is_zero_or_single_bit(alignment)); return n & (alignment - 1); } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T __align_up(T n, - type_identity_t alignment) noexcept { +WJR_CONST constexpr T __align_up(T n, type_identity_t alignment) noexcept { WJR_ASSERT_ASSUME_L2(is_zero_or_single_bit(alignment)); return (n + alignment - 1) & (-alignment); } template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR T -__align_up_offset(T n, type_identity_t alignment) noexcept { +WJR_CONST constexpr T __align_up_offset(T n, type_identity_t alignment) noexcept { WJR_ASSERT_ASSUME_L2(is_zero_or_single_bit(alignment)); return (-n) & (alignment - 1); } @@ -6944,12 +6915,6 @@ inline constexpr bool is_list_tag_v = is_list_tag::value; template using list_obj_t = typename Tag::obj_type; -template )> -constexpr list_obj_t *get(list_node *node) noexcept; - -template )> -constexpr const list_obj_t *get(const list_node *node) noexcept; - template class list_node_const_iterator { using node_type = list_node; @@ -7114,22 +7079,22 @@ struct list_node { template )> constexpr list_obj_t *operator->() noexcept { - return wjr::get(this); + return static_cast *>(this); } template )> constexpr const list_obj_t *operator->() const noexcept { - return wjr::get(this); + return static_cast *>(this); } template )> constexpr list_obj_t &operator*() noexcept { - return *wjr::get(this); + return *operator->(); } template )> constexpr const list_obj_t &operator*() const noexcept { - return *wjr::get(this); + return *operator->(); } list_node *m_prev; @@ -7200,579 +7165,364 @@ constexpr void replace_uninit(list_node *from, list_node *to) noexcept { from->m_next->m_prev = to; } -template )> -constexpr list_obj_t *get(list_node *node) noexcept { - return static_cast *>(node); -} - -template )> -constexpr const list_obj_t *get(const list_node *node) noexcept { - return static_cast *>(node); -} - } // namespace wjr #endif // WJR_CONTAINER_INTRUSIVE_LIST_HPP__ -#ifndef WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ -#define WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ - // Already included namespace wjr { -WJR_REGISTER_HAS_TYPE(is_trivially_allocator, - std::declval(), Alloc); -WJR_REGISTER_HAS_TYPE( - is_trivially_allocator_constructible, - std::declval(), Alloc); +namespace memory_pool_details { -WJR_REGISTER_HAS_TYPE(is_trivially_allocator_destructible, - std::declval(), - Alloc); +static constexpr uint8_t __ctz_table[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +}; -/// @private -template -struct __is_trivially_allocator_impl : std::false_type {}; +} // namespace memory_pool_details -/// @private -template -struct __is_trivially_allocator_impl< - Alloc, std::enable_if_t>> - : Alloc::is_trivially_allocator {}; +class __default_alloc_template__ { +private: + union obj { + union obj *free_list_link; + char client_data[1]; + }; -/** - * @brief Default construct, destruct allocator. - * - * @details If `Alloc::is_trivially_allocator` is not defined or - * `Alloc::is_trivially_allocator` is `std::false_type`, derive from `std::false_type`. \n - * If is_trivially_allocator_v is true, then `construct_at_using_allocator` and - * `destroy_at_using_allocator` are same as `construct_at` and `destroy_at`. - * - */ -template -struct is_trivially_allocator : __is_trivially_allocator_impl {}; + struct malloc_chunk { + struct __list_node : list_node<> {}; -template -struct is_trivially_allocator> : std::true_type {}; + malloc_chunk() noexcept { init(&head); } + ~malloc_chunk() noexcept { + for (auto iter = head.begin(); iter != head.end();) { + auto now = iter++; + auto node = static_cast<__list_node *>(&*now); + free(node); + } + } -template -inline constexpr bool is_trivially_allocator_v = is_trivially_allocator::value; + WJR_MALLOC void *allocate(size_t n) noexcept { + __list_node *ptr = (__list_node *)malloc(n + sizeof(__list_node)); + push_back(&head, ptr); + return (char *)(ptr) + sizeof(__list_node); + } -/// @private -template -struct __is_trivially_allocator_constructible_impl : std::false_type {}; + void deallocate(void *ptr) noexcept { + auto node = (__list_node *)((char *)(ptr) - sizeof(__list_node)); + remove_uninit(node); + free(node); + } -/// @private -template -struct __is_trivially_allocator_constructible_impl< - Alloc, std::enable_if_t>> - : Alloc::is_trivially_allocator_constructible {}; + __list_node head; + }; -template -struct is_trivially_allocator_constructible - : std::disjunction<__is_trivially_allocator_constructible_impl, - is_trivially_allocator> {}; + WJR_CONST static constexpr size_t __round_up(size_t bytes) noexcept { + return (((bytes) + 2048 - 1) & ~(2048 - 1)); + } -template -inline constexpr bool is_trivially_allocator_constructible_v = - is_trivially_allocator_constructible::value; + WJR_CONST static constexpr uint8_t __get_index(uint16_t bytes) noexcept { + if (bytes <= 256) { + return memory_pool_details::__ctz_table[(bytes - 1) >> 3]; + } -/// @private -template -struct __is_trivially_allocator_destructible_impl : std::false_type {}; + return memory_pool_details::__ctz_table[(bytes - 1) >> 9] + 6; + } -/// @private -template -struct __is_trivially_allocator_destructible_impl< - Alloc, std::enable_if_t>> - : Alloc::is_trivially_allocator_destructible {}; + WJR_CONST static constexpr uint16_t __get_size(uint8_t idx) noexcept { + return (uint16_t)(1) << (idx + 3); + } -template -struct is_trivially_allocator_destructible - : std::disjunction<__is_trivially_allocator_destructible_impl, - is_trivially_allocator> {}; + static malloc_chunk &get_chunk() noexcept { + static thread_local malloc_chunk chunk; + return chunk; + } -template -inline constexpr bool is_trivially_allocator_destructible_v = - is_trivially_allocator_destructible::value; +public: + struct object { -template -struct trivially_allocator_traits { - using is_trivially = is_trivially_allocator; - using is_trivially_constructible = is_trivially_allocator_constructible; - using is_trivially_destructible = is_trivially_allocator_destructible; -}; + WJR_INTRINSIC_INLINE allocation_result + __small_allocate(size_t n) noexcept { + const size_t idx = __get_index(n); + const size_t size = __get_size(idx); + obj *volatile *my_free_list = free_list + idx; + obj *result = *my_free_list; + if (WJR_LIKELY(result != nullptr)) { + *my_free_list = result->free_list_link; + return {result, size}; + } -} // namespace wjr + return {refill(idx), size}; + } -#endif // WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ -#ifndef WJR_MATH_CLZ_HPP__ -#define WJR_MATH_CLZ_HPP__ + WJR_INTRINSIC_INLINE void __small_deallocate(void *p, size_t n) noexcept { + obj *q = (obj *)p; + obj *volatile *my_free_list = free_list + __get_index(n); + q->free_list_link = *my_free_list; + *my_free_list = q; + } -// Already included -#ifndef WJR_MATH_POPCOUNT_HPP__ -#define WJR_MATH_POPCOUNT_HPP__ + // n must be > 0 + WJR_INTRINSIC_INLINE allocation_result allocate(size_t n) noexcept { + if (WJR_LIKELY(n <= 16384)) { + return __small_allocate(n); + } -// Already included + return {malloc(n), n}; + } -namespace wjr { + // p must not be 0 + WJR_INTRINSIC_INLINE void deallocate(void *p, size_t n) noexcept { + if (WJR_LIKELY(n <= 16384)) { + return __small_deallocate(p, n); + } -template -WJR_CONST WJR_INTRINSIC_CONSTEXPR int fallback_popcount(T x) noexcept { - constexpr auto nd = std::numeric_limits::digits; - if constexpr (nd < 32) { - return fallback_popcount(static_cast(x)); - } else { - if constexpr (nd == 32) { - x -= (x >> 1) & 0x5555'5555; - x = (x & 0x3333'3333) + ((x >> 2) & 0x3333'3333); - x = (x + (x >> 4)) & 0x0f0f'0f0f; - return (x * 0x0101'0101) >> 24; - } else { - x -= (x >> 1) & 0x5555'5555'5555'5555; - x = (x & 0x3333'3333'3333'3333) + ((x >> 2) & 0x3333'3333'3333'3333); - x = (x + (x >> 4)) & 0x0f0f'0f0f'0f0f'0f0f; - return (x * 0x0101'0101'0101'0101) >> 56; + free(p); } - } -} - -#if WJR_HAS_BUILTIN(__builtin_popcount) -#define WJR_HAS_BUILTIN_POPCOUNT WJR_HAS_DEF -#endif -#if WJR_HAS_BUILTIN(POPCOUNT) + allocation_result chunk_allocate(size_t n) noexcept { + if (WJR_LIKELY(n <= 16384)) { + return __small_allocate(n); + } -template -WJR_CONST WJR_INTRINSIC_INLINE int builtin_popcount(T x) noexcept { - constexpr auto nd = std::numeric_limits::digits; - if constexpr (nd < 32) { - return builtin_popcount(static_cast(x)); - } else { - if constexpr (nd <= std::numeric_limits::digits) { - return __builtin_popcount(x); - } else if constexpr (nd <= std::numeric_limits::digits) { - return __builtin_popcountl(x); - } - if constexpr (nd <= std::numeric_limits::digits) { - return __builtin_popcountll(x); - } else { - static_assert(nd <= 64, "not support yet"); + return {get_chunk().allocate(n), n}; } - } -} -#endif // WJR_HAS_BUILTIN(POPCOUNT) + // p must not be 0 + WJR_INTRINSIC_INLINE void chunk_deallocate(void *p, size_t n) noexcept { + if (WJR_LIKELY(n <= 16384)) { + return __small_deallocate(p, n); + } -template -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int popcount_impl(T x) noexcept { - if (WJR_BUILTIN_CONSTANT_P(is_zero_or_single_bit(x)) && is_zero_or_single_bit(x)) { - return x != 0; - } + get_chunk().deallocate(p); + } -#if WJR_HAS_BUILTIN(POPCOUNT) - if (is_constant_evaluated() || WJR_BUILTIN_CONSTANT_P(x)) { - return fallback_popcount(x); - } + private: + // Allocates a chunk for nobjs of size "size". nobjs may be reduced + // if it is inconvenient to allocate the requested number. + WJR_MALLOC char *chunk_alloc(uint8_t idx, int &nobjs) noexcept; - return builtin_popcount(x); -#else - return fallback_popcount(x); -#endif -} + // Returns an object of size n, and optionally adds to size n free list. + WJR_MALLOC void *refill(uint8_t idx) noexcept; -template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int popcount(T x) noexcept { - const int ret = popcount_impl(x); - WJR_ASSUME(0 <= ret && ret <= std::numeric_limits::digits); - return ret; -} + obj *volatile free_list[12] = {nullptr}; + char *start_free = nullptr; + char *end_free = nullptr; + size_t heap_size = 0; + }; -} // namespace wjr + static object &get_instance() noexcept { + static thread_local object instance; + return instance; + } -#endif // WJR_MATH_POPCOUNT_HPP__ + // n must be > 0 + static allocation_result allocate(size_t n) noexcept { + return get_instance().allocate(n); + } -namespace wjr { + // p must not be 0 + WJR_INTRINSIC_INLINE static void deallocate(void *p, size_t n) noexcept { + get_instance().deallocate(p, n); + } -template -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int fallback_clz_impl(T x) noexcept { - constexpr auto nd = std::numeric_limits::digits; + // n must be > 0 + static allocation_result chunk_allocate(size_t n) noexcept { + return get_instance().chunk_allocate(n); + } -#if !(WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT)) - if constexpr (nd >= 32) { -#endif - x |= (x >> 1); - x |= (x >> 2); - x |= (x >> 4); + // p must not be 0 + WJR_INTRINSIC_INLINE static void chunk_deallocate(void *p, size_t n) noexcept { + get_instance().chunk_deallocate(p, n); + } +}; - if constexpr (nd >= 16) { - x |= (x >> 8); - } +template +class memory_pool { +private: + using allocator_type = __default_alloc_template__; - if constexpr (nd >= 32) { - x |= (x >> 16); - } +public: + using value_type = Ty; + using size_type = size_t; + using difference_type = ptrdiff_t; + using propagate_on_container_move_assignment = std::true_type; + using is_always_equal = std::true_type; + using is_trivially_allocator = std::true_type; - if constexpr (nd >= 64) { - x |= (x >> 32); - } -#if !(WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT)) - } -#endif + template + struct rebind { + using other = memory_pool; + }; -#if WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT) - return popcount(~x); -#else - if constexpr (nd < 32) { - return fallback_clz_impl(static_cast(x)) - (32 - nd); - } else { - ++x; + constexpr memory_pool() noexcept = default; + constexpr memory_pool(const memory_pool &) noexcept = default; + template + constexpr memory_pool(const memory_pool &) noexcept {} + ~memory_pool() = default; + memory_pool &operator=(const memory_pool &) noexcept = default; - if constexpr (nd <= 32) { - return math_details::de_bruijn32.getr(x); - } else if constexpr (nd <= 64) { - return math_details::de_bruijn64.getr(x); - } else { - static_assert(nd <= 64, "not support yet"); - } + WJR_NODISCARD WJR_CONSTEXPR20 allocation_result + allocate_at_least(size_type n) const noexcept { + const auto ret = allocator_type::allocate(n * sizeof(Ty)); + return {static_cast(ret.ptr), ret.count}; } -#endif -} -template -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int fallback_clz(T x) noexcept { - return fallback_clz_impl(x); -} + WJR_NODISCARD WJR_CONSTEXPR20 allocation_result + chunk_allocate_at_least(size_type n) const noexcept { + const auto ret = allocator_type::chunk_allocate(n * sizeof(Ty)); + return {static_cast(ret.ptr), ret.count}; + } -#if WJR_HAS_BUILTIN(__builtin_clz) -#define WJR_HAS_BUILTIN_CLZ WJR_HAS_DEF -#endif + WJR_NODISCARD WJR_CONSTEXPR20 WJR_MALLOC Ty *allocate(size_type n) const noexcept { + return allocate_at_least(n).ptr; + } -#if WJR_HAS_BUILTIN(CLZ) + WJR_CONSTEXPR20 void deallocate(Ty *ptr, size_type n) const noexcept { + return allocator_type::deallocate(static_cast(ptr), sizeof(Ty) * n); + } -template -WJR_CONST WJR_INTRINSIC_INLINE int builtin_clz_impl(T x) noexcept { - constexpr auto nd = std::numeric_limits::digits; + /** + * @details Allocate memory, don't need to deallocate it until the thread exits. \n + * Automatically deallocate memory when the thread exits. \n + * Used in thread_local memory pool that only needs to allocate memory once and \n + * deallocate it when the thread exits. \n + * + */ + WJR_NODISCARD WJR_CONSTEXPR20 WJR_MALLOC Ty * + chunk_allocate(size_type n) const noexcept { + return chunk_allocate_at_least(n).ptr; + } - if constexpr (nd < 32) { - return builtin_clz_impl(static_cast(x)) - (32 - nd); - } else { - if constexpr (nd <= std::numeric_limits::digits) { - constexpr auto delta = std::numeric_limits::digits - nd; - return __builtin_clz(static_cast(x)) - delta; - } else if constexpr (nd <= std::numeric_limits::digits) { - constexpr auto delta = std::numeric_limits::digits - nd; - return __builtin_clzl(static_cast(x)) - delta; - } else if constexpr (nd <= std::numeric_limits::digits) { - constexpr auto delta = std::numeric_limits::digits - nd; - return __builtin_clzll(static_cast(x)) - delta; - } else { - static_assert(nd <= 64, "not supported yet"); - } + WJR_CONSTEXPR20 void chunk_deallocate(Ty *ptr, size_type n) const noexcept { + return allocator_type::chunk_deallocate(static_cast(ptr), sizeof(Ty) * n); } + + constexpr size_t max_size() const noexcept { + return static_cast(-1) / sizeof(Ty); + } +}; + +template +constexpr bool operator==(const memory_pool &, const memory_pool &) noexcept { + return true; } -template -WJR_CONST WJR_INTRINSIC_INLINE int builtin_clz(T x) noexcept { - return builtin_clz_impl(x); +template +constexpr bool operator!=(const memory_pool &, const memory_pool &) noexcept { + return false; } -#endif +} // namespace wjr -template -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int clz_impl(T x) noexcept { -#if WJR_HAS_BUILTIN(CLZ) - if (is_constant_evaluated() || WJR_BUILTIN_CONSTANT_P(x)) { - return fallback_clz(x); - } +#endif // WJR_MEMORY_MEMORY_POOL_HPP__ +#ifndef WJR_MEMORY_TEMPORARY_VALUE_ALLOCATOR_HPP__ +#define WJR_MEMORY_TEMPORARY_VALUE_ALLOCATOR_HPP__ - return builtin_clz(x); -#else - return fallback_clz(x); -#endif -} +#ifndef WJR_MEMORY_UNINITIALIZED_HPP__ +#define WJR_MEMORY_UNINITIALIZED_HPP__ /** - * @brief Fast count leading zeros + * @file uninitialized.hpp + * @brief The header file for uninitialized memory operations using allocator. + * + * @version 0.0.1 + * @date 2024-03-18 * - * @tparam T Must be an unsigned integral type */ -template )> -WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int clz(T x) noexcept { - WJR_ASSERT_ASSUME_L2(x != 0); - const int ret = clz_impl(x); - WJR_ASSUME(0 <= ret && ret < std::numeric_limits::digits); - return ret; -} -} // namespace wjr +// Already included +#ifndef WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ +#define WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ -#endif // WJR_MATH_CLZ_HPP__ // Already included namespace wjr { -namespace memory_pool_details { +WJR_REGISTER_HAS_TYPE(is_trivially_allocator, + std::declval(), Alloc); +WJR_REGISTER_HAS_TYPE( + is_trivially_allocator_constructible, + std::declval(), Alloc); -static constexpr uint8_t __ctz_table[32] = { - 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, -}; +WJR_REGISTER_HAS_TYPE(is_trivially_allocator_destructible, + std::declval(), + Alloc); -} // namespace memory_pool_details +/// @private +template +struct __is_trivially_allocator_impl : std::false_type {}; -class __default_alloc_template__ { -private: - union obj { - union obj *free_list_link; - char client_data[1]; - }; +/// @private +template +struct __is_trivially_allocator_impl< + Alloc, std::enable_if_t>> + : Alloc::is_trivially_allocator {}; - struct __list_node : list_node<> {}; +/** + * @brief Default construct, destruct allocator. + * + * @details If `Alloc::is_trivially_allocator` is not defined or + * `Alloc::is_trivially_allocator` is `std::false_type`, derive from `std::false_type`. \n + * If is_trivially_allocator_v is true, then `construct_at_using_allocator` and + * `destroy_at_using_allocator` are same as `construct_at` and `destroy_at`. + * + */ +template +struct is_trivially_allocator : __is_trivially_allocator_impl {}; - struct malloc_chunk { +template +struct is_trivially_allocator> : std::true_type {}; - malloc_chunk() noexcept { init(&head); } - ~malloc_chunk() noexcept { - for (auto iter = head.begin(); iter != head.end();) { - auto now = iter++; - auto node = static_cast<__list_node *>(&*now); - free(node); - } - } +template +inline constexpr bool is_trivially_allocator_v = is_trivially_allocator::value; - WJR_MALLOC void *allocate(size_t n) noexcept { - __list_node *ptr = (__list_node *)malloc(n + sizeof(__list_node)); - push_back(&head, ptr); - return (char *)(ptr) + sizeof(__list_node); - } - - void deallocate(void *ptr) noexcept { - auto node = (__list_node *)((char *)(ptr) - sizeof(__list_node)); - remove_uninit(node); - free(node); - } - - __list_node head; - }; - - static WJR_INTRINSIC_CONSTEXPR WJR_CONST size_t __round_up(size_t bytes) noexcept { - return (((bytes) + 2048 - 1) & ~(2048 - 1)); - } - - static WJR_INTRINSIC_CONSTEXPR WJR_CONST uint8_t - __get_index(uint16_t bytes) noexcept { - if (bytes <= 256) { - return memory_pool_details::__ctz_table[(bytes - 1) >> 3]; - } - - return memory_pool_details::__ctz_table[(bytes - 1) >> 9] + 6; - } - - static WJR_INTRINSIC_CONSTEXPR WJR_CONST uint16_t __get_size(uint8_t idx) noexcept { - return (uint16_t)(1) << (idx + 3); - } - - static malloc_chunk &get_chunk() noexcept { - static thread_local malloc_chunk chunk; - return chunk; - } - -public: - struct object { - - WJR_INTRINSIC_INLINE allocation_result - __small_allocate(size_t n) noexcept { - const size_t idx = __get_index(n); - const size_t size = __get_size(idx); - obj *volatile *my_free_list = free_list + idx; - obj *result = *my_free_list; - if (WJR_LIKELY(result != nullptr)) { - *my_free_list = result->free_list_link; - return {result, size}; - } - - return {refill(idx), size}; - } - - WJR_INTRINSIC_INLINE void __small_deallocate(void *p, size_t n) noexcept { - obj *q = (obj *)p; - obj *volatile *my_free_list = free_list + __get_index(n); - q->free_list_link = *my_free_list; - *my_free_list = q; - } - - // n must be > 0 - allocation_result allocate(size_t n) noexcept { - if (WJR_LIKELY(n <= 16384)) { - return __small_allocate(n); - } - - return {malloc(n), n}; - } - - // p must not be 0 - WJR_INTRINSIC_INLINE void deallocate(void *p, size_t n) noexcept { - if (WJR_LIKELY(n <= 16384)) { - return __small_deallocate(p, n); - } - - free(p); - } - - allocation_result chunk_allocate(size_t n) noexcept { - if (WJR_LIKELY(n <= 16384)) { - return __small_allocate(n); - } - - return {get_chunk().allocate(n), n}; - } - - // p must not be 0 - WJR_INTRINSIC_INLINE void chunk_deallocate(void *p, size_t n) noexcept { - if (WJR_LIKELY(n <= 16384)) { - return __small_deallocate(p, n); - } - - get_chunk().deallocate(p); - } - - private: - // Allocates a chunk for nobjs of size "size". nobjs may be reduced - // if it is inconvenient to allocate the requested number. - WJR_MALLOC char *chunk_alloc(uint8_t idx, int &nobjs) noexcept; - - // Returns an object of size n, and optionally adds to size n free list. - WJR_MALLOC void *refill(uint8_t idx) noexcept; - - obj *volatile free_list[12] = {nullptr}; - char *start_free = nullptr; - char *end_free = nullptr; - size_t heap_size = 0; - }; - - static object &get_instance() noexcept { - static thread_local object instance; - return instance; - } - - // n must be > 0 - static allocation_result allocate(size_t n) noexcept { - return get_instance().allocate(n); - } - - // p must not be 0 - WJR_INTRINSIC_INLINE static void deallocate(void *p, size_t n) noexcept { - get_instance().deallocate(p, n); - } - - // n must be > 0 - static allocation_result chunk_allocate(size_t n) noexcept { - return get_instance().chunk_allocate(n); - } - - // p must not be 0 - WJR_INTRINSIC_INLINE static void chunk_deallocate(void *p, size_t n) noexcept { - get_instance().chunk_deallocate(p, n); - } -}; - -template -class memory_pool { -private: - using allocator_type = __default_alloc_template__; - -public: - using value_type = Ty; - using size_type = size_t; - using difference_type = ptrdiff_t; - using propagate_on_container_move_assignment = std::true_type; - using is_always_equal = std::true_type; - using is_trivially_allocator = std::true_type; - - template - struct rebind { - using other = memory_pool; - }; +/// @private +template +struct __is_trivially_allocator_constructible_impl : std::false_type {}; - constexpr memory_pool() noexcept = default; - constexpr memory_pool(const memory_pool &) noexcept = default; - template - constexpr memory_pool(const memory_pool &) noexcept {} - ~memory_pool() = default; - memory_pool &operator=(const memory_pool &) noexcept = default; +/// @private +template +struct __is_trivially_allocator_constructible_impl< + Alloc, std::enable_if_t>> + : Alloc::is_trivially_allocator_constructible {}; - WJR_NODISCARD WJR_CONSTEXPR20 allocation_result - allocate_at_least(size_type n) const noexcept { - const auto ret = allocator_type::allocate(n * sizeof(Ty)); - return {static_cast(ret.ptr), ret.count}; - } +template +struct is_trivially_allocator_constructible + : std::disjunction<__is_trivially_allocator_constructible_impl, + is_trivially_allocator> {}; - WJR_NODISCARD WJR_CONSTEXPR20 allocation_result - chunk_allocate_at_least(size_type n) const noexcept { - const auto ret = allocator_type::chunk_allocate(n * sizeof(Ty)); - return {static_cast(ret.ptr), ret.count}; - } +template +inline constexpr bool is_trivially_allocator_constructible_v = + is_trivially_allocator_constructible::value; - WJR_NODISCARD WJR_CONSTEXPR20 WJR_MALLOC Ty *allocate(size_type n) const noexcept { - return allocate_at_least(n).ptr; - } +/// @private +template +struct __is_trivially_allocator_destructible_impl : std::false_type {}; - WJR_CONSTEXPR20 void deallocate(Ty *ptr, size_type n) const noexcept { - return allocator_type::deallocate(static_cast(ptr), sizeof(Ty) * n); - } +/// @private +template +struct __is_trivially_allocator_destructible_impl< + Alloc, std::enable_if_t>> + : Alloc::is_trivially_allocator_destructible {}; - /** - * @details Allocate memory, don't need to deallocate it until the thread exits. \n - * Automatically deallocate memory when the thread exits. \n - * Used in thread_local memory pool that only needs to allocate memory once and \n - * deallocate it when the thread exits. \n - * - */ - WJR_NODISCARD WJR_CONSTEXPR20 WJR_MALLOC Ty * - chunk_allocate(size_type n) const noexcept { - return chunk_allocate_at_least(n).ptr; - } +template +struct is_trivially_allocator_destructible + : std::disjunction<__is_trivially_allocator_destructible_impl, + is_trivially_allocator> {}; - WJR_CONSTEXPR20 void chunk_deallocate(Ty *ptr, size_type n) const noexcept { - return allocator_type::chunk_deallocate(static_cast(ptr), sizeof(Ty) * n); - } +template +inline constexpr bool is_trivially_allocator_destructible_v = + is_trivially_allocator_destructible::value; - constexpr size_t max_size() const noexcept { - return static_cast(-1) / sizeof(Ty); - } +template +struct trivially_allocator_traits { + using is_trivially = is_trivially_allocator; + using is_trivially_constructible = is_trivially_allocator_constructible; + using is_trivially_destructible = is_trivially_allocator_destructible; }; -template -constexpr bool operator==(const memory_pool &, const memory_pool &) noexcept { - return true; -} - -template -constexpr bool operator!=(const memory_pool &, const memory_pool &) noexcept { - return false; -} - } // namespace wjr -#endif // WJR_MEMORY_MEMORY_POOL_HPP__ -#ifndef WJR_MEMORY_TEMPORARY_VALUE_ALLOCATOR_HPP__ -#define WJR_MEMORY_TEMPORARY_VALUE_ALLOCATOR_HPP__ - -#ifndef WJR_MEMORY_UNINITIALIZED_HPP__ -#define WJR_MEMORY_UNINITIALIZED_HPP__ - -/** - * @file uninitialized.hpp - * @brief The header file for uninitialized memory operations using allocator. - * - * @version 0.0.1 - * @date 2024-03-18 - * - */ - -// Already included -// Already included +#endif // WJR_CRTP_TRIVIALLY_ALLOCATOR_BASE_HPP__ // Already included namespace wjr { @@ -10618,9 +10368,254 @@ struct container_traits> #ifndef WJR_MATH_BIT_HPP__ #define WJR_MATH_BIT_HPP__ -// Already included -#ifndef WJR_MATH_CTZ_HPP__ -#define WJR_MATH_CTZ_HPP__ +#ifndef WJR_MATH_CLZ_HPP__ +#define WJR_MATH_CLZ_HPP__ + +// Already included +#ifndef WJR_MATH_POPCOUNT_HPP__ +#define WJR_MATH_POPCOUNT_HPP__ + +// Already included + +namespace wjr { + +template +WJR_CONST WJR_INTRINSIC_CONSTEXPR int fallback_popcount(T x) noexcept { + constexpr auto nd = std::numeric_limits::digits; + if constexpr (nd < 32) { + return fallback_popcount(static_cast(x)); + } else { + if constexpr (nd == 32) { + x -= (x >> 1) & 0x5555'5555; + x = (x & 0x3333'3333) + ((x >> 2) & 0x3333'3333); + x = (x + (x >> 4)) & 0x0f0f'0f0f; + return (x * 0x0101'0101) >> 24; + } else { + x -= (x >> 1) & 0x5555'5555'5555'5555; + x = (x & 0x3333'3333'3333'3333) + ((x >> 2) & 0x3333'3333'3333'3333); + x = (x + (x >> 4)) & 0x0f0f'0f0f'0f0f'0f0f; + return (x * 0x0101'0101'0101'0101) >> 56; + } + } +} + +#if WJR_HAS_BUILTIN(__builtin_popcount) +#define WJR_HAS_BUILTIN_POPCOUNT WJR_HAS_DEF +#endif + +#if WJR_HAS_BUILTIN(POPCOUNT) + +template +WJR_CONST WJR_INTRINSIC_INLINE int builtin_popcount(T x) noexcept { + constexpr auto nd = std::numeric_limits::digits; + if constexpr (nd < 32) { + return builtin_popcount(static_cast(x)); + } else { + if constexpr (nd <= std::numeric_limits::digits) { + return __builtin_popcount(x); + } else if constexpr (nd <= std::numeric_limits::digits) { + return __builtin_popcountl(x); + } + if constexpr (nd <= std::numeric_limits::digits) { + return __builtin_popcountll(x); + } else { + static_assert(nd <= 64, "not support yet"); + } + } +} + +#endif // WJR_HAS_BUILTIN(POPCOUNT) + +template +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int popcount_impl(T x) noexcept { + if (WJR_BUILTIN_CONSTANT_P(is_zero_or_single_bit(x)) && is_zero_or_single_bit(x)) { + return x != 0; + } + +#if WJR_HAS_BUILTIN(POPCOUNT) + if (is_constant_evaluated() || WJR_BUILTIN_CONSTANT_P(x)) { + return fallback_popcount(x); + } + + return builtin_popcount(x); +#else + return fallback_popcount(x); +#endif +} + +template )> +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int popcount(T x) noexcept { + const int ret = popcount_impl(x); + WJR_ASSUME(0 <= ret && ret <= std::numeric_limits::digits); + return ret; +} + +} // namespace wjr + +#endif // WJR_MATH_POPCOUNT_HPP__ + +namespace wjr { + +template +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int fallback_clz_impl(T x) noexcept { + constexpr auto nd = std::numeric_limits::digits; + +#if !(WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT)) + if constexpr (nd >= 32) { +#endif + x |= (x >> 1); + x |= (x >> 2); + x |= (x >> 4); + + if constexpr (nd >= 16) { + x |= (x >> 8); + } + + if constexpr (nd >= 32) { + x |= (x >> 16); + } + + if constexpr (nd >= 64) { + x |= (x >> 32); + } +#if !(WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT)) + } +#endif + +#if WJR_HAS_BUILTIN(POPCOUNT) && WJR_HAS_SIMD(POPCNT) + return popcount(~x); +#else + if constexpr (nd < 32) { + return fallback_clz_impl(static_cast(x)) - (32 - nd); + } else { + ++x; + + if constexpr (nd <= 32) { + return math_details::de_bruijn32.getr(x); + } else if constexpr (nd <= 64) { + return math_details::de_bruijn64.getr(x); + } else { + static_assert(nd <= 64, "not support yet"); + } + } +#endif +} + +template +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int fallback_clz(T x) noexcept { + return fallback_clz_impl(x); +} + +#if WJR_HAS_BUILTIN(__builtin_clz) +#define WJR_HAS_BUILTIN_CLZ WJR_HAS_DEF +#endif + +#if WJR_HAS_BUILTIN(CLZ) + +template +WJR_CONST WJR_INTRINSIC_INLINE int builtin_clz_impl(T x) noexcept { + constexpr auto nd = std::numeric_limits::digits; + + if constexpr (nd < 32) { + return builtin_clz_impl(static_cast(x)) - (32 - nd); + } else { + if constexpr (nd <= std::numeric_limits::digits) { + constexpr auto delta = std::numeric_limits::digits - nd; + return __builtin_clz(static_cast(x)) - delta; + } else if constexpr (nd <= std::numeric_limits::digits) { + constexpr auto delta = std::numeric_limits::digits - nd; + return __builtin_clzl(static_cast(x)) - delta; + } else if constexpr (nd <= std::numeric_limits::digits) { + constexpr auto delta = std::numeric_limits::digits - nd; + return __builtin_clzll(static_cast(x)) - delta; + } else { + static_assert(nd <= 64, "not supported yet"); + } + } +} + +template +WJR_CONST WJR_INTRINSIC_INLINE int builtin_clz(T x) noexcept { + return builtin_clz_impl(x); +} + +#endif + +#if !WJR_HAS_BUILTIN(CLZ) + +#if defined(WJR_MSVC) +#define WJR_HAS_BUILTIN_MSVC_CLZ WJR_HAS_DEF +#endif + +#if WJR_HAS_BUILTIN(MSVC_CLZ) + +template +WJR_CONST WJR_INTRINSIC_INLINE int builtin_msvc_clz_impl(T x) noexcept { + constexpr auto nd = std::numeric_limits::digits; + + if constexpr (nd < 32) { + return builtin_msvc_clz_impl(static_cast(x)) - (32 - nd); + } else { + if constexpr (nd == 32) { + unsigned long result; + if (_BitScanReverse(&result, x)) { + return 31 - result; + } + return 0; + } else { + unsigned long result; + if (_BitScanReverse64(&result, x)) { + return 63 - result; + } + return 0; + } + } +} + +template +WJR_CONST WJR_INTRINSIC_INLINE int builtin_msvc_clz(T x) noexcept { + return builtin_msvc_clz_impl(x); +} + +#endif + +#endif + +template +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int clz_impl(T x) noexcept { +#if WJR_HAS_BUILTIN(CLZ) || WJR_HAS_BUILTIN(MSVC_CLZ) + if (is_constant_evaluated() || WJR_BUILTIN_CONSTANT_P(x)) { + return fallback_clz(x); + } + +#if WJR_HAS_BUILTIN(CLZ) + return builtin_clz(x); +#else + return builtin_msvc_clz(x); +#endif +#else + return fallback_clz(x); +#endif +} + +/** + * @brief Fast count leading zeros + * + * @tparam T Must be an unsigned integral type + */ +template )> +WJR_CONST WJR_INTRINSIC_CONSTEXPR_E int clz(T x) noexcept { + WJR_ASSERT_ASSUME_L2(x != 0); + const int ret = clz_impl(x); + WJR_ASSUME(0 <= ret && ret < std::numeric_limits::digits); + return ret; +} + +} // namespace wjr + +#endif // WJR_MATH_CLZ_HPP__ +#ifndef WJR_MATH_CTZ_HPP__ +#define WJR_MATH_CTZ_HPP__ // Already included // Already included @@ -18835,3372 +18830,3035 @@ __greater_equal_128(uint64_t lo0, uint64_t hi0, uint64_t lo1, uint64_t hi1) noex #include -#ifndef WJR_TUPLE_HPP__ -#define WJR_TUPLE_HPP__ - -#include - // Already included +#ifndef WJR_MATH_UINT128_T_HPP__ +#define WJR_MATH_UINT128_T_HPP__ + // Already included namespace wjr { -template -class tuple; - -} // namespace wjr +/** + * @brief temporary uint128_t for divide 128 + * + * @todo implement more functions + * + */ +struct uint128_t { + WJR_CONSTEXPR20 uint128_t() noexcept = default; + constexpr uint128_t(const uint128_t &) noexcept = default; + constexpr uint128_t(uint128_t &&) noexcept = default; + constexpr uint128_t &operator=(const uint128_t &) noexcept = default; + constexpr uint128_t &operator=(uint128_t &&) noexcept = default; + ~uint128_t() noexcept = default; -namespace std { + constexpr uint128_t(uint64_t lo, uint64_t hi) noexcept : lo(lo), hi(hi) {} -template -struct tuple_size> : std::integral_constant { -}; + template + constexpr uint64_t &get() & noexcept { + if constexpr (I == 0) { + return lo; + } else { + return hi; + } + } -template -struct tuple_element> { - using type = wjr::tp_at_t, I>; -}; + template + constexpr const uint64_t &get() const & noexcept { + if constexpr (I == 0) { + return lo; + } else { + return hi; + } + } -template ...>)> -constexpr void swap(wjr::tuple &lhs, - wjr::tuple &rhs) noexcept(noexcept(lhs.swap(rhs))); + template + constexpr uint64_t &&get() && noexcept { + if constexpr (I == 0) { + return std::move(lo); + } else { + return std::move(hi); + } + } -template -constexpr tuple_element_t> &get(wjr::tuple &t) noexcept; + template + constexpr const uint64_t &&get() const && noexcept { + if constexpr (I == 0) { + return std::move(lo); + } else { + return std::move(hi); + } + } -template -constexpr tuple_element_t> & -get(const wjr::tuple &t) noexcept; + uint64_t lo; + uint64_t hi; +}; -template -constexpr tuple_element_t> &&get(wjr::tuple &&t) noexcept; +} // namespace wjr -template -constexpr tuple_element_t> && -get(const wjr::tuple &&t) noexcept; +namespace std { -template -constexpr T &get(wjr::tuple &t) noexcept; +template <> +struct tuple_size : std::integral_constant {}; -template -constexpr T &get(const wjr::tuple &t) noexcept; +template +struct tuple_element { + using type = uint64_t; +}; -template -constexpr T &&get(wjr::tuple &&t) noexcept; +template +WJR_NODISCARD constexpr uint64_t &get(wjr::uint128_t &u) noexcept { + return u.get(); +} -template -constexpr T &&get(const wjr::tuple &&t) noexcept; +template +WJR_NODISCARD constexpr const uint64_t &get(const wjr::uint128_t &u) noexcept { + return u.get(); +} + +template +WJR_NODISCARD constexpr uint64_t &&get(wjr::uint128_t &&u) noexcept { + return std::move(u).get(); +} + +template +WJR_NODISCARD constexpr const uint64_t &&get(const wjr::uint128_t &&u) noexcept { + return std::move(u).get(); +} } // namespace std -namespace wjr { +#endif // WJR_MATH_UINT128_T_HPP__ -template -class tuple_impl; +namespace wjr { -template -class WJR_EMPTY_BASES tuple_impl, Args...> - : capture_leaf>, - enable_base_identity_t< - Indexs, tuple_impl, Args...>>>..., - enable_special_members_of_args_base< - tuple_impl, Args...>, - capture_leaf< - std::tuple_element_t>, - enable_base_identity_t< - Indexs, tuple_impl, Args...>>>...> { - using Sequence = std::index_sequence; +template +class div2by1_divider; - template - using Mybase = capture_leaf>, - enable_base_identity_t>; +template +class div3by2_divider; - using Mybase2 = enable_special_members_of_args_base< - tuple_impl, Args...>, - capture_leaf>, - enable_base_identity_t>...>; +template +class divexact1_divider; - constexpr static size_t Size = sizeof...(Args); +WJR_INLINE_CONSTEXPR20 uint64_t +div128by64to64(uint64_t &rem, uint64_t lo, uint64_t hi, + const div2by1_divider ÷r) noexcept; -public: - template , - std::is_default_constructible>...>)> - constexpr tuple_impl() noexcept( - std::conjunction_v...>) - : Mybase2(enable_default_constructor) {} +WJR_INLINE_CONSTEXPR20 uint64_t div128by64to64(uint64_t &rem, uint64_t lo, uint64_t hi, + uint64_t div) noexcept; - template , _Args>...>)> - constexpr tuple_impl(std::index_sequence<_Indexs...>, _Args &&...args) noexcept( - std::conjunction_v...>) - : Mybase<_Indexs>(std::forward<_Args>(args))..., - Mybase2(enable_default_constructor) {} +inline uint128_t div128by64to128(uint64_t &rem, uint64_t lo, uint64_t hi, + const div2by1_divider ÷r) noexcept; - template - constexpr auto &get() & noexcept { - return Mybase::get(); - } +inline uint128_t div128by64to128(uint64_t &rem, uint64_t lo, uint64_t hi, + uint64_t div) noexcept; - template - constexpr const auto &get() const & noexcept { - return Mybase::get(); - } +WJR_INTRINSIC_CONSTEXPR20 void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, + size_t n, + const div2by1_divider &div) noexcept; - template - constexpr auto &&get() && noexcept { - return std::move(Mybase::get()); - } +WJR_INTRINSIC_CONSTEXPR20 void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, + size_t n, uint64_t div) noexcept; - template - constexpr const auto &&get() const && noexcept { - return std::move(Mybase::get()); - } -}; +WJR_INTRINSIC_CONSTEXPR20 void div_qr_2(uint64_t *dst, uint64_t *rem, const uint64_t *src, + size_t n, + const div3by2_divider &div) noexcept; -template -struct __tuple_like; +WJR_INTRINSIC_CONSTEXPR20 void div_qr_2(uint64_t *dst, uint64_t *rem, const uint64_t *src, + size_t n, const uint64_t *div) noexcept; -template