diff --git a/godbolt/wjr.hpp b/godbolt/wjr.hpp index 84495186..c08377a7 100644 --- a/godbolt/wjr.hpp +++ b/godbolt/wjr.hpp @@ -3524,13 +3524,13 @@ constexpr auto to_address(const std::move_iterator &p) noexcept { */ template constexpr decltype(auto) to_contiguous_address(T &&t) noexcept { -#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) if constexpr (is_contiguous_iterator_v>) { return wjr::to_address(std::forward(t)); } else { #endif return std::forward(t); -#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) } #endif } @@ -7395,6 +7395,7 @@ WJR_INTRINSIC_CONSTEXPR20 T prefix_xor(T x) noexcept { } // namespace wjr #endif // WJR_MATH_PREFIX_XOR_HPP__ + #ifndef WJR_BIGINTEGER_HPP__ #define WJR_BIGINTEGER_HPP__ @@ -10509,7 +10510,7 @@ class contiguous_const_iterator_adapter { std::is_nothrow_move_assignable_v<__pointer>) = default; WJR_NODISCARD WJR_PURE WJR_CONSTEXPR20 pointer operator->() const noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't dereference an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't dereference an invalid iterator."); @@ -10524,7 +10525,7 @@ class contiguous_const_iterator_adapter { } WJR_CONSTEXPR20 contiguous_const_iterator_adapter &operator++() noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't increment an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't increment an invalid iterator."); @@ -10542,7 +10543,7 @@ class contiguous_const_iterator_adapter { } WJR_CONSTEXPR20 contiguous_const_iterator_adapter &operator--() noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't decrement an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't decrement an invalid iterator."); @@ -10636,7 +10637,7 @@ class contiguous_const_iterator_adapter { WJR_CONSTEXPR20 void check_same_container(WJR_MAYBE_UNUSED const Container *cont) const noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container == cont, "Can't compare iterators from different containers."); #else @@ -10645,7 +10646,7 @@ class contiguous_const_iterator_adapter { } private: -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) /// @private WJR_CONSTEXPR20 void __set_container(const Container *container) noexcept { m_container = container; @@ -10700,7 +10701,7 @@ class contiguous_const_iterator_adapter { #endif __pointer m_ptr; -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const Container *m_container; #endif }; @@ -10809,7 +10810,7 @@ struct pointer_traits> using difference_type = typename pointer::difference_type; WJR_NODISCARD constexpr static element_type *to_address(const pointer &ptr) noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const auto cont = ptr.m_container; if (cont) { WJR_ASSERT_L0(ptr.m_ptr >= ptr.__begin() && ptr.m_ptr <= ptr.__end(), @@ -10830,7 +10831,7 @@ struct pointer_traits> { using difference_type = typename pointer::difference_type; WJR_NODISCARD constexpr static element_type *to_address(const pointer &ptr) noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const auto cont = ptr.m_container; if (cont) { WJR_ASSERT_L0(ptr.m_ptr >= ptr.__begin() && ptr.m_ptr <= ptr.__end(), @@ -23332,9 +23333,8 @@ WJR_INTRINSIC_INLINE void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t dst[n - 1] = div_qr_1_impl(dst, rem, src, n, div); } -WJR_INTRINSIC_CONSTEXPR20 void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, - size_t n, - type_identity_t div) noexcept { +WJR_INTRINSIC_INLINE void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, + size_t n, type_identity_t div) noexcept { WJR_ASSERT_ASSUME(n >= 1); WJR_ASSERT_ASSUME(div != 0); @@ -25122,11 +25122,14 @@ inline constexpr bool __is_fast_convert_iterator_v = __is_fast_convert_iterator::value; template -struct __is_valid_converter - : std::disjunction, - is_nonbool_integral>, - std::conjunction, - is_nonbool_unsigned_integral>> {}; +struct __is_valid_converter : std::false_type {}; + +template +struct __is_valid_converter : is_nonbool_integral {}; + +template +struct __is_valid_converter + : is_nonbool_unsigned_integral {}; template inline constexpr bool __is_valid_converter_v = @@ -27453,8 +27456,7 @@ class __unsigned_from_chars_unchecked_fn<10> { } }; -template )> +template void __fast_from_chars_unchecked_impl(const uint8_t *first, const uint8_t *last, Value &val, IBase ibase, Converter conv) noexcept { int sign = 0; @@ -27502,8 +27504,7 @@ void __fast_from_chars_unchecked_impl(const uint8_t *first, const uint8_t *last, } } -template )> +template void __from_chars_unchecked_impl(Iter first, Iter last, Value &val, IBase ibase, Converter conv) noexcept { const auto __first = reinterpret_cast(wjr::to_address(first)); @@ -27676,8 +27677,7 @@ struct __unsigned_from_chars_fn<10> { } }; -template )> +template from_chars_result __fast_from_chars_impl(const uint8_t *first, const uint8_t *last, Value &val, IBase ibase, Converter conv) noexcept { @@ -27746,8 +27746,7 @@ __fast_from_chars_impl(const uint8_t *first, const uint8_t *last, Value &val, IB return ret; } -template )> +template from_chars_result __from_chars_impl(const char *first, const char *last, Value &val, IBase ibase, Converter conv) noexcept { @@ -31940,4 +31939,2078 @@ std::basic_ostream &operator<<(std::basic_ostream &o #endif -#endif // WJR_BIGINTEGER_HPP__ \ No newline at end of file +#endif // WJR_BIGINTEGER_HPP__ +#ifndef WJR_CONTAINER_GENERIC_BPLUS_TREE_HPP__ +#define WJR_CONTAINER_GENERIC_BPLUS_TREE_HPP__ + +/** + * @file bplus_tree.hpp + * @brief B+ tree implementation. + * + * @details The multiset/multimap/set/map adapter has not been implemented yet. The + * node_size should be set to 16 by default, and optimization has been made for queries + * less than or equal to 16. \n + * After improvement, the number of queries for the i-th query is + * [1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10], the average number of queries + * is 6.56 times. In fact, the probability of querying smaller nodes is slightly greater + * than that of larger nodes, so the actual number of queries will be less. If the + * comparison operation of key_type is more complex, it is not recommended to use B+ tree, + * because the number of queries of B+ tree will be more, thus offsetting the advantages + * of B+ tree. + * + * @todo + * + * @version 0.1 + * @date 2024-05-06 + * + */ + +// Already included +// Already included +#ifndef WJR_CONTAINER_GENERIC_CONTAINER_TRAITS_HPP__ +#define WJR_CONTAINER_GENERIC_CONTAINER_TRAITS_HPP__ + +#include +#include + +// Already included + +namespace wjr { + +/** + * @class container_fn + * @brief The same characteristics and behavior of all allocator containers + * + * @details container must have the following member functions: + * -# auto& __get_allocator() noexcept + * -# void __destroy() noexcept + * -# void __destroy_and_deallocate() noexcept + * -# void __copy_element(const container& other) + * -# void __take_storage(container&& other) + * -# void __move_element(container&& other) + * -# void __swap_storage(container& other) + * + * 1 : is used to manage the allocator of the container. \n + * 2-3 : is used to destroy the container and deallocate the memory. \n + * 4-7 : is used to assign the container data. Shouldn't change the allocator. + * + */ +template +class container_fn { +private: + using allocator_type = Alloc; + using allocator_traits = std::allocator_traits; + using is_always_equal = typename allocator_traits::is_always_equal; + using propagate_on_container_copy_assignment = + typename allocator_traits::propagate_on_container_copy_assignment; + using propagate_on_container_move_assignment = + typename allocator_traits::propagate_on_container_move_assignment; + using propagate_on_container_swap = + typename allocator_traits::propagate_on_container_swap; + +public: + template + WJR_CONSTEXPR20 static void + copy_assign(Container &lhs, const Container &rhs) noexcept( + noexcept(lhs.__copy_element(rhs)) && + !propagate_on_container_copy_assignment::value + ? true + : (noexcept(lhs.__get_allocator() = rhs.__get_allocator()) && + is_always_equal::value + ? true + : noexcept(lhs.__destroy_and_deallocate()))) { + if constexpr (propagate_on_container_copy_assignment::value) { + auto &lhs_allocator = lhs.__get_allocator(); + auto &rhs_allocator = rhs.__get_allocator(); + if constexpr (!is_always_equal::value) { + if (lhs_allocator != rhs_allocator) { + lhs.__destroy_and_deallocate(); + } + } + + lhs_allocator = rhs_allocator; + } + + lhs.__copy_element(rhs); + } + + template + WJR_CONSTEXPR20 static void move_assign(Container &lhs, Container &&rhs) noexcept( + noexcept(lhs.__destroy_and_deallocate()) && noexcept( + lhs.__take_storage(std::move(rhs))) && + std::disjunction_v + ? (!propagate_on_container_move_assignment::value + ? true + : noexcept(lhs.__get_allocator() = std::move(rhs.__get_allocator()))) + : (noexcept(lhs.__destroy()) && noexcept( + lhs.__move_element(std::move(rhs))))) { + if constexpr (std::disjunction_v) { + lhs.__destroy_and_deallocate(); + if constexpr (propagate_on_container_move_assignment::value) { + lhs.__get_allocator() = std::move(rhs.__get_allocator()); + } + lhs.__take_storage(std::move(rhs)); + } else { + if (lhs.__get_allocator() != rhs.__get_allocator()) { + lhs.__destroy(); + lhs.__move_element(std::move(rhs)); + } else { + lhs.__destroy_and_deallocate(); + lhs.__take_storage(std::move(rhs)); + } + } + } + + template + WJR_CONSTEXPR20 static void swap(Container &lhs, Container &rhs) noexcept( + noexcept(lhs.__swap_storage(rhs)) && + !std::conjunction_v> + ? true + : noexcept(std::swap(lhs.__get_allocator(), rhs.__get_allocator()))) { + if constexpr (std::conjunction_v>) { + auto &lhs_allocator = lhs.__get_allocator(); + auto &rhs_allocator = rhs.__get_allocator(); + if (lhs_allocator != rhs_allocator) { + std::swap(lhs_allocator, rhs_allocator); + } + } + + lhs.__swap_storage(rhs); + } +}; + +} // namespace wjr + +#endif // WJR_CONTAINER_GENERIC_CONTAINER_TRAITS_HPP__ +// Already included +// Already included + +#if defined(WJR_X86) +#ifndef WJR_X86_CONTAINER_GENERIC_BPLUS_TREE_HPP__ +#define WJR_X86_CONTAINER_GENERIC_BPLUS_TREE_HPP__ + +#include + +// Already included + +namespace wjr { + +#if WJR_HAS_SIMD(SSE2) +#define WJR_HAS_BUILTIN_BPLUS_TREE_COPY WJR_HAS_DEF +#endif + +#if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) + +template +WJR_INTRINSIC_INLINE void __builtin_bplus_tree_copy_impl(const uint8_t *first, + const uint8_t *last, + uint8_t *dest) noexcept { + const size_t n = last - first; + WJR_ASSERT_ASSUME_L2(n >= Min * size && n <= Max * size); + + if (WJR_UNLIKELY(n == 0)) { + return; + } + + if (WJR_UNLIKELY(n == size)) { + reinterpret_cast *>(dest)[0] = + reinterpret_cast *>(first)[0]; + return; + } + + if constexpr (size <= 1) { + do { + if (n >= 4) { + break; + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 2, endian::native); + write_memory(dest, x0, endian::native); + write_memory(dest + n - 2, x1, endian::native); + return; + } while (0); + } + + if constexpr (size <= 2) { + do { + if (n >= 8) { + break; + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 4, endian::native); + write_memory(dest, x0, endian::native); + write_memory(dest + n - 4, x1, endian::native); + return; + } while (0); + } + + if constexpr (size <= 4) { + do { + if constexpr (size >= 2) { + if (n >= 16) { + break; + } + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 8, endian::native); + write_memory(dest, x0, endian::native); + write_memory(dest + n - 8, x1, endian::native); + return; + } while (0); + } + + if constexpr (size >= 2) { + do { + if constexpr (size >= 4) { + if (n >= 32) { + break; + } + } + + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest), x0); + sse::storeu((__m128i *)(dest + n - 16), x1); + return; + } while (0); + } + + if constexpr (size >= 4) { + do { + if constexpr (size >= 8) { + if (n >= 64) { + break; + } + } + +#if WJR_HAS_SIMD(AVX2) + const auto x0 = avx::loadu((__m256i *)first); + const auto x1 = avx::loadu((__m256i *)(last - 32)); + avx::storeu((__m256i *)(dest), x0); + avx::storeu((__m256i *)(dest + n - 32), x1); +#else + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(first + 16)); + const auto x2 = sse::loadu((__m128i *)(last - 32)); + const auto x3 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest), x0); + sse::storeu((__m128i *)(dest + 16), x1); + sse::storeu((__m128i *)(dest + n - 32), x2); + sse::storeu((__m128i *)(dest + n - 16), x3); +#endif + return; + } while (0); + } + + if constexpr (size == 8) { +#if WJR_HAS_SIMD(AVX2) + const auto x0 = avx::loadu((__m256i *)first); + const auto x1 = avx::loadu((__m256i *)(first + 32)); + const auto x2 = avx::loadu((__m256i *)(last - 64)); + const auto x3 = avx::loadu((__m256i *)(last - 32)); + avx::storeu((__m256i *)(dest), x0); + avx::storeu((__m256i *)(dest + 32), x1); + avx::storeu((__m256i *)(dest + n - 64), x2); + avx::storeu((__m256i *)(dest + n - 32), x3); +#else + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(first + 16)); + const auto x2 = sse::loadu((__m128i *)(first + 32)); + const auto x3 = sse::loadu((__m128i *)(first + 48)); + const auto x4 = sse::loadu((__m128i *)(last - 64)); + const auto x5 = sse::loadu((__m128i *)(last - 48)); + const auto x6 = sse::loadu((__m128i *)(last - 32)); + const auto x7 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest), x0); + sse::storeu((__m128i *)(dest + 16), x1); + sse::storeu((__m128i *)(dest + 32), x2); + sse::storeu((__m128i *)(dest + 48), x3); + sse::storeu((__m128i *)(dest + n - 64), x4); + sse::storeu((__m128i *)(dest + n - 48), x5); + sse::storeu((__m128i *)(dest + n - 32), x6); + sse::storeu((__m128i *)(dest + n - 16), x7); +#endif + } +} + +template +WJR_INTRINSIC_INLINE void builtin_bplus_tree_copy(const Other *first, const Other *last, + Other *dest) noexcept { + __builtin_bplus_tree_copy_impl( + reinterpret_cast(first), reinterpret_cast(last), + reinterpret_cast(dest)); +} + +template +WJR_INTRINSIC_INLINE void +__builtin_bplus_tree_copy_backward_impl(const uint8_t *first, const uint8_t *last, + uint8_t *dest) noexcept { + const size_t n = last - first; + WJR_ASSERT_ASSUME_L2(n >= Min * size && n <= Max * size); + + if (WJR_UNLIKELY(n == 0)) { + return; + } + + if (WJR_UNLIKELY(n == size)) { + reinterpret_cast *>(dest)[-1] = + reinterpret_cast *>(first)[0]; + return; + } + + if constexpr (size <= 1) { + do { + if (n >= 4) { + break; + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 2, endian::native); + write_memory(dest - n, x0, endian::native); + write_memory(dest - 2, x1, endian::native); + return; + } while (0); + } + + if constexpr (size <= 2) { + do { + if (n >= 8) { + break; + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 4, endian::native); + write_memory(dest - n, x0, endian::native); + write_memory(dest - 4, x1, endian::native); + return; + } while (0); + } + + if constexpr (size <= 4) { + do { + if constexpr (size >= 2) { + if (n >= 16) { + break; + } + } + + const auto x0 = read_memory(first, endian::native); + const auto x1 = read_memory(last - 8, endian::native); + write_memory(dest - n, x0, endian::native); + write_memory(dest - 8, x1, endian::native); + return; + } while (0); + } + + if constexpr (size >= 2) { + do { + if constexpr (size >= 4) { + if (n >= 32) { + break; + } + } + + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest - n), x0); + sse::storeu((__m128i *)(dest - 16), x1); + return; + } while (0); + } + + if constexpr (size >= 4) { + do { + if constexpr (size >= 8) { + if (n >= 64) { + break; + } + } + +#if WJR_HAS_SIMD(AVX2) + const auto x0 = avx::loadu((__m256i *)first); + const auto x1 = avx::loadu((__m256i *)(last - 32)); + avx::storeu((__m256i *)(dest - n), x0); + avx::storeu((__m256i *)(dest - 32), x1); +#else + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(first + 16)); + const auto x2 = sse::loadu((__m128i *)(last - 32)); + const auto x3 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest - n), x0); + sse::storeu((__m128i *)(dest - n + 16), x1); + sse::storeu((__m128i *)(dest - 32), x2); + sse::storeu((__m128i *)(dest - 16), x3); +#endif + return; + } while (0); + } + + if constexpr (size == 8) { +#if WJR_HAS_SIMD(AVX2) + const auto x0 = avx::loadu((__m256i *)first); + const auto x1 = avx::loadu((__m256i *)(first + 32)); + const auto x2 = avx::loadu((__m256i *)(last - 64)); + const auto x3 = avx::loadu((__m256i *)(last - 32)); + avx::storeu((__m256i *)(dest - n), x0); + avx::storeu((__m256i *)(dest - n + 32), x1); + avx::storeu((__m256i *)(dest - 64), x2); + avx::storeu((__m256i *)(dest - 32), x3); +#else + const auto x0 = sse::loadu((__m128i *)first); + const auto x1 = sse::loadu((__m128i *)(first + 16)); + const auto x2 = sse::loadu((__m128i *)(first + 32)); + const auto x3 = sse::loadu((__m128i *)(first + 48)); + const auto x4 = sse::loadu((__m128i *)(last - 64)); + const auto x5 = sse::loadu((__m128i *)(last - 48)); + const auto x6 = sse::loadu((__m128i *)(last - 32)); + const auto x7 = sse::loadu((__m128i *)(last - 16)); + sse::storeu((__m128i *)(dest - n), x0); + sse::storeu((__m128i *)(dest - n + 16), x1); + sse::storeu((__m128i *)(dest - n + 32), x2); + sse::storeu((__m128i *)(dest - n + 48), x3); + sse::storeu((__m128i *)(dest - 64), x4); + sse::storeu((__m128i *)(dest - 48), x5); + sse::storeu((__m128i *)(dest - 32), x6); + sse::storeu((__m128i *)(dest - 16), x7); +#endif + } +} + +template +WJR_INTRINSIC_INLINE void builtin_bplus_tree_copy_backward(const Other *first, + const Other *last, + Other *dest) noexcept { + __builtin_bplus_tree_copy_backward_impl( + reinterpret_cast(first), reinterpret_cast(last), + reinterpret_cast(dest)); +} + +#endif + +} // namespace wjr + +#endif // WJR_X86_CONTAINER_GENERIC_BPLUS_TREE_HPP__ + +#endif + +namespace wjr { + +template +struct bplus_tree_node; + +template +struct bplus_tree_inner_node; + +template +struct bplus_tree_leaf_node; + +namespace bplus_tree_details { + +template +class inline_key { +public: + static_assert(!std::is_const_v, ""); + + using value_type = T; + using reference = std::add_const_t &; + using pointer = std::add_const_t *; + + constexpr inline_key() noexcept = default; + constexpr inline_key(const inline_key &other) noexcept = default; + constexpr inline_key(inline_key &&other) noexcept = default; + constexpr inline_key &operator=(const inline_key &other) noexcept = default; + constexpr inline_key &operator=(inline_key &&other) noexcept = default; + ~inline_key() noexcept = default; + + constexpr inline_key(reference value) noexcept( + std::is_nothrow_constructible_v, reference>) + : m_storage(value) {} + + constexpr reference operator*() const noexcept { return *m_storage; } + constexpr reference get() const noexcept { return m_storage.get(); } + constexpr pointer operator->() const noexcept { return get(); } + +private: + // no need to check + algined_storage m_storage; +}; + +template +class inline_key { +public: + static_assert(!std::is_const_v, ""); + + using value_type = T; + using reference = std::add_const_t &; + using pointer = std::add_const_t *; + + constexpr inline_key() noexcept = default; + constexpr inline_key(const inline_key &other) noexcept = default; + constexpr inline_key(inline_key &&other) noexcept = default; + constexpr inline_key &operator=(const inline_key &other) noexcept = default; + constexpr inline_key &operator=(inline_key &&other) noexcept = default; + ~inline_key() noexcept = default; + + constexpr inline_key(reference value) noexcept : m_ptr(std::addressof(value)) {} + + constexpr reference operator*() const noexcept { return *m_ptr; } + constexpr pointer operator->() const noexcept { return m_ptr; } + constexpr reference get() const noexcept { return *m_ptr; } + +private: + pointer m_ptr; +}; + +template +struct is_possible_inline_key : std::is_trivially_copyable> {}; + +template +inline constexpr bool is_possible_inline_key_v = is_possible_inline_key::value; + +template +WJR_INTRINSIC_INLINE static void copy(Other *first, Other *last, Other *dest) noexcept { +#if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) + if constexpr (Max <= 16 && std::is_trivially_copyable_v) { + builtin_bplus_tree_copy(first, last, dest); + } else { +#endif + (void)std::copy(first, last, dest); +#if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) + } +#endif +} + +template +WJR_INTRINSIC_INLINE static void copy_backward(Other *first, Other *last, + Other *dest) noexcept { +#if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) + if constexpr (Max <= 16 && std::is_trivially_copyable_v) { + builtin_bplus_tree_copy_backward(first, last, dest); + } else { +#endif + (void)std::copy_backward(first, last, dest); +#if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) + } +#endif +} + +} // namespace bplus_tree_details + +template +struct bplus_tree_traits { + using key_type = Key; + using mapped_type = Value; + static constexpr bool is_map = !std::is_same_v; + using value_type = + std::conditional_t, key_type>; + using key_compare = Compare; + + static constexpr size_t node_size = Size; + static constexpr bool is_inline_key = + bplus_tree_details::is_possible_inline_key_v> && + sizeof(key_type) <= 8; + static constexpr bool is_inline_value = + bplus_tree_details::is_possible_inline_key_v> && + sizeof(value_type) <= 8; + + using InlineKey = + bplus_tree_details::inline_key, is_inline_key>; + using InlineValue = std::conditional_t< + is_inline_value, + bplus_tree_details::inline_key, true>, + value_type *>; + + using node_type = bplus_tree_node; + using inner_node_type = bplus_tree_inner_node; + using leaf_node_type = + bplus_tree_leaf_node; + static constexpr bool multi = Multi; + + WJR_INTRINSIC_INLINE static const key_type & + get_key(const value_type &value) noexcept { + if constexpr (is_map) { + return value.first; + } else { + return value; + } + } + +public: + template + WJR_INTRINSIC_INLINE static void copy(Other *first, Other *last, + Other *dest) noexcept { + return bplus_tree_details::copy(first, last, dest); + } + + template + WJR_INTRINSIC_INLINE static void copy_backward(Other *first, Other *last, + Other *dest) noexcept { + return bplus_tree_details::copy_backward(first, last, dest); + } +}; + +template +struct bplus_tree_node { + using key_type = typename Traits::key_type; + using value_type = typename Traits::value_type; + constexpr static size_t node_size = Traits::node_size; + using InlineKey = typename Traits::InlineKey; + using inner_node_type = typename Traits::inner_node_type; + using leaf_node_type = typename Traits::leaf_node_type; + + constexpr inner_node_type *as_inner() noexcept; + constexpr const inner_node_type *as_inner() const noexcept; + + constexpr leaf_node_type *as_leaf() noexcept; + constexpr const leaf_node_type *as_leaf() const noexcept; + + int m_size; + unsigned int m_pos; + bplus_tree_node *m_parent; +}; + +template +struct bplus_tree_inner_node : bplus_tree_node { + using key_type = typename Traits::key_type; + using value_type = typename Traits::value_type; + constexpr static size_t node_size = Traits::node_size; + using InlineKey = typename Traits::InlineKey; + + alignas(16) InlineKey m_keys[node_size]; + alignas(16) bplus_tree_node *m_sons[node_size + 1]; +}; + +template +struct bplus_tree_leaf_node : bplus_tree_node, list_node<> { + using key_type = typename Traits::key_type; + using value_type = typename Traits::value_type; + constexpr static size_t node_size = Traits::node_size; + using InlineKey = typename Traits::InlineKey; + using ListNode = list_node<>; + + const key_type &__get_key(unsigned int pos) const noexcept { return *m_keys[pos]; } + + template + WJR_INTRINSIC_INLINE void __copy(unsigned int start, unsigned int end, + unsigned int dst_start, + bplus_tree_leaf_node *dst) noexcept { + Traits::template copy(m_keys + start, m_keys + end, + dst->m_keys + dst_start); + Traits::template copy(m_values + start, m_values + end, + dst->m_values + dst_start); + } + + template + WJR_INTRINSIC_INLINE void __copy_backward(unsigned int start, unsigned int end, + unsigned int dst_end, + bplus_tree_leaf_node *dst) noexcept { + Traits::template copy_backward(m_keys + start, m_keys + end, + dst->m_keys + dst_end); + Traits::template copy_backward(m_values + start, m_values + end, + dst->m_values + dst_end); + } + + WJR_INTRINSIC_INLINE void __assign(unsigned int idx, + value_type *const value) noexcept { + m_keys[idx] = Traits::get_key(*value); + m_values[idx] = value; + } + + constexpr ListNode *__get_list() noexcept { return this; } + constexpr const ListNode *__get_list() const noexcept { return this; } + + alignas(16) InlineKey m_keys[node_size]; + alignas(16) value_type *m_values[node_size]; +}; + +template +struct bplus_tree_leaf_node : bplus_tree_node, list_node<> { + using key_type = typename Traits::key_type; + using value_type = typename Traits::value_type; + constexpr static size_t node_size = Traits::node_size; + constexpr static bool is_inline_value = Traits::is_inline_value; + using InlineValue = typename Traits::InlineValue; + using ListNode = list_node<>; + + const key_type &__get_key(unsigned int pos) const noexcept { + return Traits::get_key(*m_values[pos]); + } + + template + WJR_INTRINSIC_INLINE void __copy(unsigned int start, unsigned int end, + unsigned int dst_start, + bplus_tree_leaf_node *dst) noexcept { + Traits::template copy(m_values + start, m_values + end, + dst->m_values + dst_start); + } + + template + WJR_INTRINSIC_INLINE void __copy_backward(unsigned int start, unsigned int end, + unsigned int dst_end, + bplus_tree_leaf_node *dst) noexcept { + Traits::template copy_backward(m_values + start, m_values + end, + dst->m_values + dst_end); + } + + WJR_INTRINSIC_INLINE void __assign(unsigned int idx, InlineValue value) noexcept { + m_values[idx] = value; + } + + constexpr ListNode *__get_list() noexcept { return this; } + constexpr const ListNode *__get_list() const noexcept { return this; } + + alignas(16) InlineValue m_values[node_size]; +}; + +template +constexpr typename bplus_tree_node::inner_node_type * +bplus_tree_node::as_inner() noexcept { + return static_cast(this); +} + +template +constexpr const typename bplus_tree_node::inner_node_type * +bplus_tree_node::as_inner() const noexcept { + return static_cast(this); +} + +template +constexpr typename bplus_tree_node::leaf_node_type * +bplus_tree_node::as_leaf() noexcept { + return static_cast(this); +} + +template +constexpr const typename bplus_tree_node::leaf_node_type * +bplus_tree_node::as_leaf() const noexcept { + return static_cast(this); +} + +template +class basic_bplus_tree; + +template +class bplus_tree_const_iterator { + using node_type = typename Traits::node_type; + using inner_node_type = typename Traits::inner_node_type; + using leaf_node_type = typename Traits::leaf_node_type; + + template + friend class basic_bplus_tree; + + using ListNode = list_node<>; + +public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = typename Traits::value_type; + using difference_type = std::ptrdiff_t; + using pointer = const value_type *; + using reference = const value_type &; + + bplus_tree_const_iterator() noexcept = default; + bplus_tree_const_iterator(const bplus_tree_const_iterator &) noexcept = default; + bplus_tree_const_iterator(bplus_tree_const_iterator &&) noexcept = default; + bplus_tree_const_iterator & + operator=(const bplus_tree_const_iterator &) noexcept = default; + bplus_tree_const_iterator &operator=(bplus_tree_const_iterator &&) noexcept = default; + ~bplus_tree_const_iterator() noexcept = default; + +protected: + bplus_tree_const_iterator(const ListNode *list_node, unsigned int pos) noexcept + : m_node(const_cast(list_node)), m_pos(pos) {} + + bplus_tree_const_iterator(const leaf_node_type *leaf, unsigned int pos) noexcept + : bplus_tree_const_iterator(leaf->__get_list(), pos) {} + +public: + reference operator*() const noexcept { return *get_leaf()->m_values[m_pos]; } + + pointer operator->() const noexcept { return get_leaf()->m_values[m_pos]; } + + bplus_tree_const_iterator &operator++() noexcept { + ++m_pos; + return __adjust_next(); + } + + bplus_tree_const_iterator operator++(int) noexcept { + bplus_tree_const_iterator tmp = *this; + ++*this; + return tmp; + } + + bplus_tree_const_iterator &operator--() noexcept { + if (m_pos != 0) { + --m_pos; + } else { + m_node = prev(m_node); + m_pos = -get_leaf()->m_size - 1; + } + + return *this; + } + + bplus_tree_const_iterator operator--(int) noexcept { + bplus_tree_const_iterator tmp = *this; + --*this; + return tmp; + } + + bool operator==(const bplus_tree_const_iterator &other) const noexcept { + return m_node == other.m_node && m_pos == other.m_pos; + } + + bool operator!=(const bplus_tree_const_iterator &other) const noexcept { + return !(*this == other); + } + + leaf_node_type *get_leaf() const noexcept { + return static_cast(m_node); + } + + ListNode *get_node() const noexcept { return m_node; } + unsigned int get_pos() const noexcept { return m_pos; } + +protected: + bplus_tree_const_iterator &__adjust_next() noexcept { + if (m_pos == static_cast(-get_leaf()->m_size)) { + m_node = next(m_node); + m_pos = 0; + } + + return *this; + } + +private: + ListNode *m_node = nullptr; + unsigned int m_pos = 0; +}; + +template +class bplus_tree_iterator : public bplus_tree_const_iterator { + using Mybase = bplus_tree_const_iterator; + using leaf_node_type = typename Traits::leaf_node_type; + + template + friend class basic_bplus_tree; + + using ListNode = list_node<>; + +public: + using Mybase::Mybase; + + using iterator_category = typename Mybase::iterator_category; + using value_type = typename Mybase::value_type; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + + bplus_tree_iterator(const Mybase &other) noexcept : Mybase(other) {} + +protected: + bplus_tree_iterator(const ListNode *list_node, unsigned int pos) noexcept + : Mybase(list_node, pos) {} + + bplus_tree_iterator(const leaf_node_type *leaf, unsigned int pos) noexcept + : Mybase(leaf, pos) {} + +public: + value_type &operator*() const noexcept { + return const_cast(Mybase::operator*()); + } + + value_type *operator->() const noexcept { + return const_cast(Mybase::operator->()); + } + + bplus_tree_iterator &operator++() noexcept { + Mybase::operator++(); + return *this; + } + + bplus_tree_iterator operator++(int) noexcept { + bplus_tree_iterator tmp = *this; + ++*this; + return tmp; + } + + bplus_tree_iterator &operator--() noexcept { + Mybase::operator--(); + return *this; + } + + bplus_tree_iterator operator--(int) noexcept { + bplus_tree_iterator tmp = *this; + --*this; + return tmp; + } + + bool operator==(const bplus_tree_iterator &other) const noexcept { + return Mybase::operator==(other); + } + + bool operator!=(const bplus_tree_iterator &other) const noexcept { + return Mybase::operator!=(other); + } + +protected: + bplus_tree_iterator &__adjust_next() noexcept { + Mybase::__adjust_next(); + return *this; + } +}; + +template +class basic_bplus_tree { + using _Alty = typename std::allocator_traits::template rebind_alloc; + using _Alty_traits = std::allocator_traits<_Alty>; + using storage_fn_type = container_fn<_Alty>; + + friend class container_fn<_Alty>; + + using mapped_type = typename Traits::mapped_type; + static constexpr size_t node_size = Traits::node_size; + static constexpr bool is_inline_key = Traits::is_inline_key; + static constexpr bool is_inline_value = Traits::is_inline_value; + using InlineKey = typename Traits::InlineKey; + using InlineValue = typename Traits::InlineValue; + static constexpr size_t floor_half = node_size / 2; + static constexpr size_t ceil_half = node_size - floor_half; + static constexpr bool Multi = Traits::Multi; + + using node_type = typename Traits::node_type; + using inner_node_type = typename Traits::inner_node_type; + using leaf_node_type = typename Traits::leaf_node_type; + + using ListNode = list_node<>; + +public: + using key_type = typename Traits::key_type; + using value_type = typename Traits::value_type; + using key_compare = typename Traits::key_compare; + using allocator_type = Alloc; + using size_type = typename _Alty_traits::size_type; + using difference_type = typename _Alty_traits::difference_type; + using iterator = bplus_tree_iterator; + using const_iterator = bplus_tree_const_iterator; + using reverse_iterator = std::reverse_iterator; + using const_reverse_iterator = std::reverse_iterator; + + static_assert(node_size >= 3, "node_size must be greater than or equal to 3."); + static_assert(node_size <= 255, "node_size must be less than or equal to 255."); + + basic_bplus_tree() noexcept(std::is_nothrow_default_constructible_v<_Alty>) { + init(&m_sentry); + } + + // not implemented currently + basic_bplus_tree(const basic_bplus_tree &) = delete; + + basic_bplus_tree(basic_bplus_tree &&other) noexcept( + std::is_nothrow_move_constructible_v + &&std::is_nothrow_move_constructible_v<_Alty>) + : m_pair(std::piecewise_construct, + wjr::forward_as_tuple(std::move(other.key_comp())), + wjr::forward_as_tuple( + std::piecewise_construct, + wjr::forward_as_tuple(std::move(other.__get_allocator())), + wjr::forward_as_tuple())) { + __take_tree(std::move(other)); + } + + basic_bplus_tree &operator=(const basic_bplus_tree &) = delete; + + basic_bplus_tree &operator=(basic_bplus_tree &&other) noexcept( + noexcept(storage_fn_type::move_assign(*this, std::move(other)))) { + if (WJR_LIKELY(this != std::addressof(other))) { + storage_fn_type::move_assign(*this, std::move(other)); + } + + return *this; + } + + ~basic_bplus_tree() noexcept { __destroy_and_deallocate(); } + + constexpr key_compare &key_comp() noexcept { return m_pair.first(); } + constexpr const key_compare &key_comp() const noexcept { return m_pair.first(); } + + iterator begin() noexcept { return iterator(m_sentry.next(), 0); } + const_iterator begin() const noexcept { return const_iterator(m_sentry.next(), 0); } + const_iterator cbegin() const noexcept { return const_iterator(m_sentry.next(), 0); } + + iterator end() noexcept { return iterator(&m_sentry, 0); } + const_iterator end() const noexcept { return const_iterator(&m_sentry, 0); } + const_iterator cend() const noexcept { return const_iterator(&m_sentry, 0); } + + reverse_iterator rbegin() noexcept { return reverse_iterator(end()); } + const_reverse_iterator rbegin() const noexcept { + return const_reverse_iterator(end()); + } + + const_reverse_iterator crbegin() const noexcept { + return const_reverse_iterator(cend()); + } + + reverse_iterator rend() noexcept { return reverse_iterator(begin()); } + const_reverse_iterator rend() const noexcept { + return const_reverse_iterator(begin()); + } + + const_reverse_iterator crend() const noexcept { + return const_reverse_iterator(cbegin()); + } + +private: + template + InlineValue __create_node(Args &&...args) noexcept { + if constexpr (is_inline_value) { + InlineValue ret(std::forward(args)...); + return ret; + } else { + auto &al = __get_allocator(); + value_type *const xval = + (value_type *)_Alty_traits::allocate(al, sizeof(value_type)); + uninitialized_construct_using_allocator(xval, al, + std::forward(args)...); + return xval; + } + } + + template + void __drop_node(InlineValue xval) noexcept { + if constexpr (!is_inline_value) { + auto &al = __get_allocator(); + _Alty_traits::destroy(al, xval); + _Alty_traits::deallocate(al, (uint8_t *)xval, sizeof(value_type)); + } + } + + const_iterator __get_insert_multi_pos(const key_type &key) const noexcept { + return __search(key); + } + + std::pair + __get_insert_unique_pos(const key_type &key) const noexcept { + const const_iterator iter = __search(key); + const auto pos = iter.get_pos(); + const bool inserted = + pos == 0 || key_comp()(*iter.get_leaf()->m_values[pos - 1], key); + return {iter, inserted}; + } + +public: + template + iterator __emplace_multi(Args &&...args) noexcept { + const auto xval = __create_node(std::forward(args)...); + const auto iter = __get_insert_multi_pos(Traits::get_key(*xval)); + return __insert_iter(iter, xval); + } + + template + std::pair __emplace_unique(Args &&...args) noexcept { + const auto xval = __create_node(std::forward(args)...); + const auto [iter, inserted] = __get_insert_unique_pos(Traits::get_key(*xval)); + + if (inserted) { + return {__insert_iter(iter, xval), true}; + } + + __drop_node(xval); + return {iterator(iter).__adjust_next(), false}; + } + + const_iterator __insert_multi(const value_type &val) noexcept { + return __emplace_multi(val); + } + + const_iterator __insert_multi(value_type &&val) noexcept { + return __emplace_multi(std::move(val)); + } + + std::pair __insert_unique(const value_type &val) noexcept { + return __emplace_unique(val); + } + + std::pair __insert_unique(value_type &&val) noexcept { + return __emplace_unique(std::move(val)); + } + + iterator lower_bound(const key_type &key) noexcept { + return __search(key).__adjust_next(); + } + + const_iterator lower_bound(const key_type &key) const noexcept { + return __search(key).__adjust_next(); + } + + iterator upper_bound(const key_type &key) noexcept { + return __search(key).__adjust_next(); + } + + const_iterator upper_bound(const key_type &key) const noexcept { + return __search(key).__adjust_next(); + } + + iterator erase(const_iterator iter) noexcept { + return __erase_iter(iter).__adjust_next(); + } + +private: + void __take_tree(basic_bplus_tree &&other) noexcept { + const auto root = other.__get_root(); + if (root == nullptr) { + init(&m_sentry); + return; + } + + __get_root() = root; + other.__get_root() = nullptr; + replace_uninit(&other.m_sentry, &m_sentry); + init(&other.m_sentry); + } + + // member function for container_fn (START) + + WJR_PURE WJR_INTRINSIC_CONSTEXPR _Alty &__get_allocator() noexcept { + return m_pair.second().first(); + } + + WJR_PURE WJR_INTRINSIC_CONSTEXPR const _Alty &__get_allocator() const noexcept { + return m_pair.second().first(); + } + + void __destroy_and_deallocate() noexcept { + node_type *current = __get_root(); + + if (WJR_UNLIKELY(current == nullptr)) { + return; + } + + auto &al = __get_allocator(); + int cur_size = current->m_size; + + // If root is leaf + if (cur_size < 0) { + const auto leaf = current->as_leaf(); + const unsigned int cur_usize = -cur_size; + + for (unsigned int i = 0; i < cur_usize; ++i) { + __drop_node(leaf->m_values[i]); + } + + _Alty_traits::deallocate(al, (uint8_t *)leaf, sizeof(leaf_node_type)); + return; + } + + // skip to the leftmost leaf + current = begin().get_leaf(); + cur_size = -current->m_size; + + // cache of parent and parent's size + node_type *parent = current->m_parent; + unsigned int par_size = parent->m_size; + + // cache of `current' node's position in parent + unsigned int pos = 0; + + do { + const auto leaf = current->as_leaf(); + const unsigned int cur_usize = cur_size; + + for (unsigned int i = 0; i < cur_usize; ++i) { + __drop_node(leaf->m_values[i]); + } + + ListNode *next = wjr::next(leaf); + _Alty_traits::deallocate(al, (uint8_t *)leaf, sizeof(leaf_node_type)); + + // if `current' is the last child of parent + if (WJR_UNLIKELY(pos++ == par_size)) { + do { + current = parent; + parent = current->m_parent; + pos = current->m_pos; + _Alty_traits::deallocate(al, (uint8_t *)current, + sizeof(inner_node_type)); + + // if `current' is the rightmost leaf + if (parent == nullptr) { + return; + } + + // if `current' is the last child of parent + } while (pos == (unsigned int)parent->m_size); + + // update cache of parent and parent's size + parent = static_cast(next)->m_parent; + par_size = parent->m_size; + pos = 0; + } + + WJR_ASSERT(next != &m_sentry); + + current = static_cast(next); + cur_size = -current->m_size; + } while (true); + } + + void __take_storage(basic_bplus_tree &&other) noexcept { + key_comp() = std::move(other.key_comp()); + __take_tree(std::move(other)); + } + + // member function for container_fn (END) + + WJR_NOINLINE void __rec_insert_iter(node_type *current, node_type *inst) noexcept { + auto &al = __get_allocator(); + + node_type *parent = current->m_parent; + InlineKey key = inst->as_leaf()->__get_key(0); + + while (parent != nullptr) { + inst->m_parent = parent; + unsigned int pos = current->m_pos + 1; + current = parent; + const auto inner = current->as_inner(); + + unsigned int cur_size = inner->m_size + 1; + InlineKey *const keys = inner->m_keys; + node_type **const sons = inner->m_sons; + + // non-full inner + if (WJR_LIKELY(cur_size != node_size + 1)) { + Traits::copy_backward(keys + pos - 1, keys + cur_size - 1, + keys + cur_size); + Traits::copy_backward(sons + pos, sons + cur_size, sons + cur_size + 1); + + inner->m_size = cur_size; + keys[pos - 1] = key; + sons[pos] = inst; + + inst->m_pos = pos; + for (unsigned int i = pos + 1; i <= cur_size; ++i) { + sons[i]->m_pos = i; + } + + return; + } + + parent = inner->m_parent; + + const auto tmp_inst = + (inner_node_type *)_Alty_traits::allocate(al, sizeof(inner_node_type)); + + inner->m_size = (int)(ceil_half); + tmp_inst->m_size = (int)(floor_half); + + InlineKey next_key; + + if (pos <= ceil_half) { + next_key = keys[ceil_half - 1]; + + Traits::template copy( + keys + ceil_half, keys + node_size, tmp_inst->m_keys); + Traits::template copy( + sons + ceil_half, sons + node_size + 1, tmp_inst->m_sons); + Traits::template copy_backward<0, ceil_half>( + keys + pos - 1, keys + ceil_half - 1, keys + ceil_half); + Traits::template copy_backward<0, ceil_half>(sons + pos, sons + ceil_half, + sons + ceil_half + 1); + + keys[pos - 1] = key; + sons[pos] = inst; + + inst->m_pos = pos; + for (unsigned int i = pos + 1; i <= ceil_half; ++i) { + sons[i]->m_pos = i; + } + } else { + if (pos == ceil_half + 1) { + next_key = key; + + Traits::template copy( + keys + ceil_half, keys + node_size, tmp_inst->m_keys); + Traits::template copy( + sons + ceil_half + 1, sons + node_size + 1, tmp_inst->m_sons + 1); + + tmp_inst->m_sons[0] = inst; + } else { + next_key = keys[ceil_half]; + + Traits::template copy<0, floor_half - 1>( + keys + ceil_half + 1, keys + pos - 1, tmp_inst->m_keys); + Traits::template copy<1, floor_half>(sons + ceil_half + 1, sons + pos, + tmp_inst->m_sons); + + const unsigned int rpos = pos - ceil_half - 1; + + Traits::template copy<0, floor_half - 1>( + keys + pos - 1, keys + node_size, tmp_inst->m_keys + rpos); + Traits::template copy<0, floor_half>(sons + pos, sons + node_size + 1, + tmp_inst->m_sons + rpos + 1); + + tmp_inst->m_keys[rpos - 1] = key; + tmp_inst->m_sons[rpos] = inst; + } + } + + for (unsigned int i = 0; i <= floor_half; ++i) { + tmp_inst->m_sons[i]->m_parent = tmp_inst; + tmp_inst->m_sons[i]->m_pos = i; + } + + key = next_key; + inst = tmp_inst; + } + + const auto new_root = + (inner_node_type *)_Alty_traits::allocate(al, sizeof(inner_node_type)); + new_root->m_size = 1; + new_root->m_parent = nullptr; + new_root->m_keys[0] = key; + new_root->m_sons[0] = current; + new_root->m_sons[1] = inst; + current->m_pos = 0; + inst->m_pos = 1; + + current->m_parent = new_root; + inst->m_parent = new_root; + + __get_root() = new_root; + return; + } + + WJR_NODISCARD iterator __insert_iter(const_iterator iter, InlineValue xval) noexcept { + auto &al = __get_allocator(); + + leaf_node_type *leaf; + do { + ListNode *const node = iter.get_node(); + + // empty + if (WJR_UNLIKELY(node == &m_sentry)) { + leaf = (leaf_node_type *)_Alty_traits::allocate(__get_allocator(), + sizeof(leaf_node_type)); + + __get_root() = leaf; + + leaf->m_size = -1; + leaf->m_parent = nullptr; + leaf->__assign(0, xval); + wjr::push_front(&m_sentry, leaf); + return iterator(leaf, 0); + } + + leaf = static_cast(node); + } while (0); + + unsigned int pos = iter.get_pos(); + unsigned int cur_size = -leaf->m_size; + + // non-full leaf + if (WJR_LIKELY(cur_size != node_size)) { + WJR_ASSERT_ASSUME(pos <= cur_size); + + leaf->__copy_backward(pos, cur_size, cur_size + 1, leaf); + + leaf->m_size = -(cur_size + 1); + leaf->__assign(pos, xval); + return iterator(leaf, pos); + } + + const auto inst = + (leaf_node_type *)_Alty_traits::allocate(al, sizeof(leaf_node_type)); + push_front(leaf, inst); + + leaf->m_size = -(int)(floor_half + 1); + inst->m_size = -(int)(node_size - floor_half); + + leaf_node_type *result; + + if (pos <= floor_half) { + leaf->template __copy(floor_half, node_size, 0, inst); + leaf->template __copy_backward<0, floor_half>(pos, floor_half, floor_half + 1, + leaf); + leaf->__assign(pos, xval); + result = leaf; + } else { + // pos in inst + const unsigned int rpos = pos - floor_half - 1; + leaf->template __copy<0, ceil_half - 1>(floor_half + 1, pos, 0, inst); + leaf->template __copy<0, ceil_half - 1>(pos, node_size, rpos + 1, inst); + inst->__assign(rpos, xval); + result = inst; + pos = rpos; + } + + __rec_insert_iter(leaf, inst); + return iterator(result, pos); + } + + template + WJR_PURE WJR_INTRINSIC_INLINE static bool + __compare(const key_type &a, const key_type &key, const key_compare &comp) noexcept { + if constexpr (Upper) { + return comp(key, a); + } else { + return !comp(a, key); + } + } + + template + WJR_PURE WJR_NOINLINE const_iterator __search(const key_type &key) const noexcept { + const node_type *current = __get_root(); + + if (WJR_UNLIKELY(current == nullptr)) { + return cend(); + } + + unsigned int pos; + + int cur_size = current->m_size; + const auto &comp = key_comp(); + + // root search + if (WJR_UNLIKELY(cur_size < 0)) { + pos = __search(current->as_leaf(), -cur_size, key, + comp); + return const_iterator(current->as_leaf(), pos); + } + + if (!__compare(*current->as_inner()->m_keys[0], key, comp)) { + goto NOT_LEFTMOST_AT_ROOT; + } + + current = current->as_inner()->m_sons[0]; + cur_size = current->m_size; + + while (cur_size >= 0) { + if (!__compare(*current->as_inner()->m_keys[0], key, comp)) { + goto NOT_LEFTMOST_AT_INNER; + } + + current = current->as_inner()->m_sons[0]; + cur_size = current->m_size; + } + + // leftmost leaf need to check first key + if (__compare(current->as_leaf()->__get_key(0), key, comp)) { + return const_iterator(current->as_leaf(), 0); + } + + goto LEAF; + + do { + + NOT_LEFTMOST_AT_ROOT: + pos = __search(current->as_inner(), cur_size, key, + comp); + + break; + + NOT_LEFTMOST_AT_INNER: + pos = __search(current->as_inner(), cur_size, + key, comp); + } while (0); + + current = current->as_inner()->m_sons[pos]; + cur_size = current->m_size; + + if (cur_size < 0) { + goto LEAF; + } + + do { + pos = __search(current->as_inner(), cur_size, + key, comp); + + current = current->as_inner()->m_sons[pos]; + cur_size = current->m_size; + } while (cur_size >= 0); + + LEAF: + pos = __search(current->as_leaf(), -cur_size, + key, comp); + return const_iterator(current->as_leaf(), pos); + } + + template + WJR_PURE WJR_INTRINSIC_INLINE static unsigned int + __search(const node_type *current, unsigned int size, Compare &&comp) noexcept { + static_assert(Offset == 0 || Offset == 1, ""); + static_assert(Min != 0, ""); + + WJR_ASSERT_ASSUME(size >= Min); + WJR_ASSERT_ASSUME(size <= Max); + + if constexpr (Min == 1 && Offset == 1) { + if (WJR_UNLIKELY(size == 1)) { + return 1; + } + } + + if constexpr (Max <= 16) { + if constexpr (Offset == 0) { + if (comp(current, 0)) { + return 0; + } + } + +#define WJR_REGISTER_BLPUS_SEARCH_2(A, B, C) \ + do { \ + if constexpr (A == Max) { \ + return A; \ + } else if constexpr (B == Max) { \ + if (size == A || comp(current, A)) { \ + return A; \ + } \ + return B; \ + } else if constexpr (C <= Max) { \ + if constexpr (Min < C) { \ + if (size < C) { \ + if constexpr (Min <= A) { \ + if (size == A || comp(current, A)) { \ + return A; \ + } \ + } else { \ + if (comp(current, A)) { \ + return A; \ + } \ + } \ + return B; \ + } \ + } \ + if (comp(current, B)) { \ + if (comp(current, A)) { \ + return A; \ + } \ + return B; \ + } \ + } \ + } while (0) +#define WJR_REGISTER_BLPUS_SEARCH_4(A, B, C, D, E) \ + do { \ + if constexpr (E > Max) { \ + WJR_REGISTER_BLPUS_SEARCH_2(A, B, C); \ + WJR_REGISTER_BLPUS_SEARCH_2(C, D, E); \ + } else { \ + if constexpr (Min < E) { \ + if (size < E) { \ + WJR_REGISTER_BLPUS_SEARCH_2(A, B, C); \ + if constexpr (Min <= C) { \ + if (size == C || comp(current, C)) { \ + return C; \ + } \ + } else { \ + if (comp(current, C)) { \ + return C; \ + } \ + } \ + return D; \ + } \ + } \ + if (comp(current, D)) { \ + if (comp(current, B)) { \ + if (comp(current, A)) { \ + return A; \ + } \ + return B; \ + } \ + if (comp(current, C)) { \ + return C; \ + } \ + return D; \ + } \ + } \ + } while (0) + + WJR_REGISTER_BLPUS_SEARCH_2(1, 2, 3); + WJR_REGISTER_BLPUS_SEARCH_2(3, 4, 5); + WJR_REGISTER_BLPUS_SEARCH_2(5, 6, 7); + WJR_REGISTER_BLPUS_SEARCH_2(7, 8, 9); + WJR_REGISTER_BLPUS_SEARCH_2(9, 10, 11); + WJR_REGISTER_BLPUS_SEARCH_2(11, 12, 13); + WJR_REGISTER_BLPUS_SEARCH_2(13, 14, 15); + + if constexpr (Max == 15) { + return 15; + } else if constexpr (Max == 16) { + if (size == 15 || comp(current, 15)) { + return 15; + } + + return 16; + } + +#undef WJR_REGISTER_BLPUS_SEARCH_4 +#undef WJR_REGISTER_BLPUS_SEARCH_2 + } else { + unsigned int pos = Offset; + do { + if (comp(current, pos)) { + break; + } + } while (++pos != size); + return pos; + } + } + + template + WJR_PURE WJR_INTRINSIC_INLINE static unsigned int + __search(const inner_node_type *current, unsigned int size, const key_type &key, + const key_compare &comp) noexcept { + return __search( + current, size, [&key, &comp](const node_type *current, unsigned int pos) { + return __compare(*current->as_inner()->m_keys[pos], key, comp); + }); + } + + template + WJR_PURE WJR_INTRINSIC_INLINE static unsigned int + __search(const leaf_node_type *current, unsigned int size, const key_type &key, + const key_compare &comp) noexcept { + return __search( + current, size, [&key, &comp](const node_type *current, unsigned int pos) { + return __compare(current->as_leaf()->__get_key(pos), key, comp); + }); + } + + template + WJR_INTRINSIC_INLINE static unsigned int + __init_remove_rotate(const inner_node_type *parent, unsigned int pos, + unsigned int par_size, T *&lhs, T *&rhs) noexcept { + unsigned int size; + + do { + if (pos != par_size) { + const auto tmp = static_cast(parent->m_sons[pos + 1]); + unsigned int tmp_size; + + if constexpr (std::is_same_v) { + tmp_size = -tmp->m_size; + } else { + tmp_size = tmp->m_size; + } + + WJR_ASSERT_ASSUME(tmp_size >= floor_half); + + rhs = tmp; + size = tmp_size; + } else { + auto tmp = static_cast(parent->m_sons[pos - 1]); + lhs = tmp; + + if constexpr (std::is_same_v) { + return -tmp->m_size; + } else { + return tmp->m_size; + } + } + } while (0); + + do { + if (pos != 0) { + const auto tmp = static_cast(parent->m_sons[pos - 1]); + unsigned int tmp_size; + + if constexpr (std::is_same_v) { + tmp_size = -tmp->m_size; + } else { + tmp_size = tmp->m_size; + } + + if (tmp_size >= size) { + lhs = tmp; + size = tmp_size; + break; + } + } + + lhs = nullptr; + } while (0); + + return size; + } + + /** + * @todo use to optimize + * + */ + WJR_NOINLINE void __rec_erase_iter(node_type *parent, unsigned int par_pos, + unsigned int par_size) noexcept { + constexpr unsigned int merge_size = floor_half * 2; + + unsigned int pos; + unsigned int cur_size; + node_type *current; + + current = parent; + pos = par_pos; + cur_size = par_size; + parent = current->m_parent; + + while (parent != nullptr) { + WJR_ASSERT_ASSUME(pos > 0); + + const auto inner = current->as_inner(); + + InlineKey *const keys = inner->m_keys; + node_type **const sons = inner->m_sons; + + if (cur_size > floor_half) { + Traits::copy(keys + pos, keys + cur_size, keys + pos - 1); + Traits::copy(sons + pos + 1, sons + cur_size + 1, sons + pos); + + for (unsigned int i = pos; i < cur_size; ++i) { + sons[i]->m_pos = i; + } + + inner->m_size = cur_size - 1; + return; + } + + WJR_ASSERT_ASSUME(cur_size == floor_half); + + const auto par_inner = parent->as_inner(); + par_pos = inner->m_pos; + par_size = par_inner->m_size; + inner_node_type *lhs; + inner_node_type *rhs; + + unsigned int next_size = + __init_remove_rotate(par_inner, par_pos, par_size, lhs, rhs); + + do { + if (lhs != nullptr) { + rhs = inner; + + if (next_size == floor_half) { + Traits::copy(keys, keys + pos - 1, lhs->m_keys + floor_half + 1); + Traits::copy(sons, sons + pos, lhs->m_sons + floor_half + 1); + Traits::copy(keys + pos, keys + floor_half, + lhs->m_keys + floor_half + pos); + Traits::copy(sons + pos + 1, sons + floor_half + 1, + lhs->m_sons + floor_half + pos + 1); + + for (unsigned int i = floor_half; i <= merge_size; ++i) { + lhs->m_sons[i]->m_parent = lhs; + lhs->m_sons[i]->m_pos = i; + } + + lhs->m_keys[floor_half] = par_inner->m_keys[par_pos - 1]; + break; + } + + const unsigned int moved_elements = (next_size - floor_half + 1) / 2; + + InlineKey key = lhs->m_keys[next_size - moved_elements]; + + if (moved_elements != 1) { + Traits::copy_backward(keys + pos, keys + floor_half, + keys + floor_half + moved_elements - 1); + Traits::copy_backward(sons + pos + 1, sons + floor_half + 1, + sons + floor_half + moved_elements); + for (unsigned int i = pos + moved_elements; + i < floor_half + moved_elements; ++i) { + sons[i]->m_pos = i; + } + } + + Traits::copy_backward(keys, keys + pos - 1, + keys + pos + moved_elements - 1); + Traits::copy_backward(sons, sons + pos, sons + pos + moved_elements); + Traits::copy(lhs->m_keys + next_size - moved_elements + 1, + lhs->m_keys + next_size, keys); + Traits::copy(lhs->m_sons + next_size - moved_elements + 1, + lhs->m_sons + next_size + 1, sons); + + keys[moved_elements - 1] = par_inner->m_keys[par_pos - 1]; + par_inner->m_keys[par_pos - 1] = key; + + for (unsigned int i = 0; i < moved_elements; ++i) { + sons[i]->m_parent = inner; + sons[i]->m_pos = i; + } + + for (unsigned int i = moved_elements; i < pos + moved_elements; ++i) { + sons[i]->m_pos = i; + } + + lhs->m_size = next_size - moved_elements; + inner->m_size = floor_half + moved_elements - 1; + } else { + WJR_ASSERT_ASSUME(rhs != nullptr); + + lhs = inner; + + if (next_size == floor_half) { + Traits::copy(keys + pos, keys + floor_half, keys + pos - 1); + Traits::copy(sons + pos + 1, sons + floor_half + 1, sons + pos); + Traits::copy(rhs->m_keys, rhs->m_keys + floor_half, + keys + floor_half); + Traits::copy(rhs->m_sons, rhs->m_sons + floor_half + 1, + sons + floor_half); + + for (unsigned int i = pos; i < floor_half; ++i) { + inner->m_sons[i]->m_pos = i; + } + + for (unsigned int i = floor_half; i <= merge_size; ++i) { + inner->m_sons[i]->m_parent = inner; + inner->m_sons[i]->m_pos = i; + } + + lhs->m_keys[floor_half - 1] = par_inner->m_keys[par_pos]; + ++par_pos; + break; + } + + const unsigned int moved_elements = (next_size - floor_half + 1) / 2; + + InlineKey key = rhs->m_keys[moved_elements - 1]; + + Traits::copy(keys + pos, keys + floor_half, keys + pos - 1); + Traits::copy(sons + pos + 1, sons + floor_half + 1, sons + pos); + Traits::copy(rhs->m_keys, rhs->m_keys + moved_elements - 1, + keys + floor_half); + Traits::copy(rhs->m_sons, rhs->m_sons + moved_elements, + sons + floor_half); + Traits::copy(rhs->m_keys + moved_elements, rhs->m_keys + next_size, + rhs->m_keys); + Traits::copy(rhs->m_sons + moved_elements, + rhs->m_sons + next_size + 1, rhs->m_sons); + + keys[floor_half - 1] = par_inner->m_keys[par_pos]; + par_inner->m_keys[par_pos] = key; + + for (unsigned int i = pos; i < floor_half; ++i) { + sons[i]->m_pos = i; + } + + for (unsigned int i = floor_half; i < floor_half + moved_elements; + ++i) { + sons[i]->m_parent = inner; + sons[i]->m_pos = i; + } + + for (unsigned int i = 0; i <= next_size - moved_elements; ++i) { + rhs->m_sons[i]->m_pos = i; + } + + rhs->m_size = next_size - moved_elements; + inner->m_size = floor_half + moved_elements - 1; + } + + return; + } while (0); + + lhs->m_size = merge_size; + _Alty_traits::deallocate(__get_allocator(), (uint8_t *)rhs, + sizeof(inner_node_type)); + + pos = par_pos; + cur_size = par_size; + current = parent; + parent = current->m_parent; + } + + const auto inner = current->as_inner(); + + if (cur_size == 1) { + _Alty_traits::deallocate(__get_allocator(), (uint8_t *)inner, + sizeof(inner_node_type)); + node_type *root = inner->m_sons[0]; + __get_root() = root; + root->m_parent = nullptr; + return; + } + + Traits::copy(inner->m_keys + pos, inner->m_keys + cur_size, + inner->m_keys + pos - 1); + Traits::copy(inner->m_sons + pos + 1, inner->m_sons + cur_size + 1, + inner->m_sons + pos); + + for (unsigned int i = pos; i < cur_size; ++i) { + inner->m_sons[i]->m_pos = i; + } + + inner->m_size = cur_size - 1; + } + + iterator __erase_iter(const_iterator iter) noexcept { + constexpr unsigned int merge_size = floor_half * 2; + + leaf_node_type *leaf = iter.get_leaf(); + unsigned int pos = iter.get_pos(); + unsigned int cur_size = -leaf->m_size; + node_type *parent = leaf->m_parent; + + __drop_node(leaf->m_values[pos]); + + if (WJR_LIKELY(cur_size > floor_half)) { + leaf->__copy(pos + 1, cur_size, pos, leaf); + leaf->m_size = -(cur_size - 1); + + // first key in leaf is changed + if (pos == 0 && parent != nullptr) { + node_type *current = leaf; + unsigned int tmp_pos; + + do { + tmp_pos = current->m_pos; + current = parent; + parent = current->m_parent; + } while (tmp_pos == 0 && parent != nullptr); + + if (tmp_pos != 0) { + current->as_inner()->m_keys[tmp_pos - 1] = leaf->__get_key(0); + } + } + + return iterator(leaf, pos); + } + + if (parent == nullptr) { + if (cur_size == 1) { + _Alty_traits::deallocate(__get_allocator(), (uint8_t *)leaf, + sizeof(leaf_node_type)); + __get_root() = nullptr; + init(&m_sentry); + return cend(); + } + + leaf->__copy(pos + 1, cur_size, pos, leaf); + leaf->m_size = -(cur_size - 1); + return iterator(leaf, pos); + } + + WJR_ASSERT_ASSUME(cur_size == floor_half); + + const auto inner = parent->as_inner(); + unsigned int par_pos = leaf->m_pos; + cur_size = inner->m_size; + leaf_node_type *lhs; + leaf_node_type *rhs; + + const unsigned int next_size = + __init_remove_rotate(inner, par_pos, cur_size, lhs, rhs); + + do { + constexpr unsigned int max_moved_elements = (ceil_half + 1) / 2; + + if (lhs != nullptr) { + rhs = leaf; + + if (next_size == floor_half) { + leaf->template __copy<0, floor_half>(0, pos, floor_half, lhs); + leaf->template __copy<0, floor_half>(pos + 1, floor_half, + pos + floor_half, lhs); + + leaf = lhs; + pos += floor_half; + break; + } + + const unsigned int moved_elements = (next_size - floor_half + 1) / 2; + + if (moved_elements != 1) { + leaf->template __copy_backward<0, floor_half>( + pos + 1, floor_half, floor_half + moved_elements - 1, leaf); + } + + leaf->template __copy_backward<0, floor_half>(0, pos, + pos + moved_elements, leaf); + lhs->template __copy<1, max_moved_elements>(next_size - moved_elements, + next_size, 0, leaf); + + lhs->m_size = -(next_size - moved_elements); + leaf->m_size = -(floor_half + moved_elements - 1); + + pos += moved_elements; + } else { + WJR_ASSERT_ASSUME(rhs != nullptr); + + lhs = leaf; + + leaf->template __copy<0, floor_half>(pos + 1, floor_half, pos, leaf); + + // merge rhs to leaf, and pos of iter is zero, then + // need to update key in parent + if (pos == 0) { + node_type *current = leaf; + + unsigned int tmp_pos; + node_type *tmp_parent = parent; + + do { + tmp_pos = current->m_pos; + current = tmp_parent; + tmp_parent = current->m_parent; + } while (tmp_pos == 0 && tmp_parent != nullptr); + + if (tmp_pos != 0) { + current->as_inner()->m_keys[tmp_pos - 1] = leaf->__get_key(0); + } + } + + if (next_size == floor_half) { + rhs->template __copy<0, floor_half>(0, floor_half, floor_half - 1, + leaf); + + ++par_pos; + break; + } + + const unsigned int moved_elements = (next_size - floor_half + 1) / 2; + + rhs->template __copy<1, max_moved_elements>(0, moved_elements, + floor_half - 1, leaf); + rhs->template __copy<1, node_size - max_moved_elements>( + moved_elements, next_size, 0, rhs); + + rhs->m_size = -(next_size - moved_elements); + leaf->m_size = -(floor_half + moved_elements - 1); + } + + node_type *current = rhs; + + unsigned int tmp_pos = current->m_pos; + current = parent; + parent = current->m_parent; + + current->as_inner()->m_keys[tmp_pos - 1] = rhs->__get_key(0); + + return iterator(leaf, pos); + } while (0); + + lhs->m_size = -(merge_size - 1); + remove_uninit(rhs); + _Alty_traits::deallocate(__get_allocator(), (uint8_t *)rhs, + sizeof(leaf_node_type)); + + __rec_erase_iter(parent, par_pos, cur_size); + + return iterator(leaf, pos); + } + + WJR_INTRINSIC_CONSTEXPR node_type *&__get_root() noexcept { + return m_pair.second().second(); + } + + WJR_INTRINSIC_CONSTEXPR const node_type *__get_root() const noexcept { + return m_pair.second().second(); + } + + compressed_pair> m_pair; + ListNode m_sentry; +}; + +} // namespace wjr + +#endif // WJR_CONTAINER_GENERIC_BPLUS_TREE_HPP__ diff --git a/include/wjr/container/generic/bplus_tree.hpp b/include/wjr/container/generic/bplus_tree.hpp index db492134..4f970a0b 100644 --- a/include/wjr/container/generic/bplus_tree.hpp +++ b/include/wjr/container/generic/bplus_tree.hpp @@ -5,21 +5,18 @@ * @file bplus_tree.hpp * @brief B+ tree implementation. * - * @details The addition, deletion, query, iterator and other functions have been - * implemented. The multiset/multimap/set/map adapter has not been implemented yet. The + * @details The multiset/multimap/set/map adapter has not been implemented yet. The * node_size should be set to 16 by default, and optimization has been made for queries - * less than or equal to - * 16. The general B+ tree query is proportional to node_size. For example, when node_size - * is 16, the number of queries per bit is [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, - * 14, 15, 16, 16], and the average number of queries is 8.9 times. After improvement, the - * number of queries for the i-th query is [1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, - * 10, 10], and the average number of queries is 6.56 times. In fact, the probability of - * querying smaller nodes is slightly greater than that of larger nodes, so the actual - * number of queries will be less. If the comparison operation of key_type is more - * complex, it is not recommended to use B+ tree, because the number of queries of B+ tree - * will be more, thus offsetting the advantages of B+ tree. + * less than or equal to 16. \n + * After improvement, the number of queries for the i-th query is + * [1, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10], the average number of queries + * is 6.56 times. In fact, the probability of querying smaller nodes is slightly greater + * than that of larger nodes, so the actual number of queries will be less. If the + * comparison operation of key_type is more complex, it is not recommended to use B+ tree, + * because the number of queries of B+ tree will be more, thus offsetting the advantages + * of B+ tree. * - * @note Currently not needed for use, and some bugs exists, don't use it. + * @todo * * @version 0.1 * @date 2024-05-06 @@ -30,7 +27,6 @@ #include #include #include -#include #include #if defined(WJR_X86) @@ -50,6 +46,67 @@ struct bplus_tree_leaf_node; namespace bplus_tree_details { +template +class inline_key { +public: + static_assert(!std::is_const_v, ""); + + using value_type = T; + using reference = std::add_const_t &; + using pointer = std::add_const_t *; + + constexpr inline_key() noexcept = default; + constexpr inline_key(const inline_key &other) noexcept = default; + constexpr inline_key(inline_key &&other) noexcept = default; + constexpr inline_key &operator=(const inline_key &other) noexcept = default; + constexpr inline_key &operator=(inline_key &&other) noexcept = default; + ~inline_key() noexcept = default; + + constexpr inline_key(reference value) noexcept( + std::is_nothrow_constructible_v, reference>) + : m_storage(value) {} + + constexpr reference operator*() const noexcept { return *m_storage; } + constexpr reference get() const noexcept { return m_storage.get(); } + constexpr pointer operator->() const noexcept { return get(); } + +private: + // no need to check + algined_storage m_storage; +}; + +template +class inline_key { +public: + static_assert(!std::is_const_v, ""); + + using value_type = T; + using reference = std::add_const_t &; + using pointer = std::add_const_t *; + + constexpr inline_key() noexcept = default; + constexpr inline_key(const inline_key &other) noexcept = default; + constexpr inline_key(inline_key &&other) noexcept = default; + constexpr inline_key &operator=(const inline_key &other) noexcept = default; + constexpr inline_key &operator=(inline_key &&other) noexcept = default; + ~inline_key() noexcept = default; + + constexpr inline_key(reference value) noexcept : m_ptr(std::addressof(value)) {} + + constexpr reference operator*() const noexcept { return *m_ptr; } + constexpr pointer operator->() const noexcept { return m_ptr; } + constexpr reference get() const noexcept { return *m_ptr; } + +private: + pointer m_ptr; +}; + +template +struct is_possible_inline_key : std::is_trivially_copyable> {}; + +template +inline constexpr bool is_possible_inline_key_v = is_possible_inline_key::value; + template WJR_INTRINSIC_INLINE static void copy(Other *first, Other *last, Other *dest) noexcept { #if WJR_HAS_BUILTIN(BPLUS_TREE_COPY) @@ -89,15 +146,24 @@ struct bplus_tree_traits { using key_compare = Compare; static constexpr size_t node_size = Size; - using InlineKey = auto_key, 8>; - static constexpr bool is_inline_key = InlineKey::is_inlined; + static constexpr bool is_inline_key = + bplus_tree_details::is_possible_inline_key_v> && + sizeof(key_type) <= 8; static constexpr bool is_inline_value = - std::is_trivially_copyable_v && sizeof(value_type) <= 16; - using InlineValue = std::conditional_t; + bplus_tree_details::is_possible_inline_key_v> && + sizeof(value_type) <= 8; + + using InlineKey = + bplus_tree_details::inline_key, is_inline_key>; + using InlineValue = std::conditional_t< + is_inline_value, + bplus_tree_details::inline_key, true>, + value_type *>; using node_type = bplus_tree_node; using inner_node_type = bplus_tree_inner_node; - using leaf_node_type = bplus_tree_leaf_node; + using leaf_node_type = + bplus_tree_leaf_node; static constexpr bool multi = Multi; WJR_INTRINSIC_INLINE static const key_type & @@ -110,13 +176,13 @@ struct bplus_tree_traits { } public: - template + template WJR_INTRINSIC_INLINE static void copy(Other *first, Other *last, Other *dest) noexcept { return bplus_tree_details::copy(first, last, dest); } - template + template WJR_INTRINSIC_INLINE static void copy_backward(Other *first, Other *last, Other *dest) noexcept { return bplus_tree_details::copy_backward(first, last, dest); @@ -202,6 +268,8 @@ struct bplus_tree_leaf_node : bplus_tree_node, list_node< using key_type = typename Traits::key_type; using value_type = typename Traits::value_type; constexpr static size_t node_size = Traits::node_size; + constexpr static bool is_inline_value = Traits::is_inline_value; + using InlineValue = typename Traits::InlineValue; using ListNode = list_node<>; const key_type &__get_key(unsigned int pos) const noexcept { @@ -224,15 +292,14 @@ struct bplus_tree_leaf_node : bplus_tree_node, list_node< dst->m_values + dst_end); } - WJR_INTRINSIC_INLINE void __assign(unsigned int idx, - value_type *const value) noexcept { + WJR_INTRINSIC_INLINE void __assign(unsigned int idx, InlineValue value) noexcept { m_values[idx] = value; } constexpr ListNode *__get_list() noexcept { return this; } constexpr const ListNode *__get_list() const noexcept { return this; } - alignas(16) value_type *m_values[node_size]; + alignas(16) InlineValue m_values[node_size]; }; template @@ -443,7 +510,9 @@ class basic_bplus_tree { using mapped_type = typename Traits::mapped_type; static constexpr size_t node_size = Traits::node_size; static constexpr bool is_inline_key = Traits::is_inline_key; + static constexpr bool is_inline_value = Traits::is_inline_value; using InlineKey = typename Traits::InlineKey; + using InlineValue = typename Traits::InlineValue; static constexpr size_t floor_half = node_size / 2; static constexpr size_t ceil_half = node_size - floor_half; static constexpr bool Multi = Traits::Multi; @@ -532,19 +601,27 @@ class basic_bplus_tree { private: template - value_type *__create_node(Args &&...args) noexcept { - auto &al = __get_allocator(); - value_type *const xval = - (value_type *)_Alty_traits::allocate(al, sizeof(value_type)); - uninitialized_construct_using_allocator(xval, al, std::forward(args)...); - return xval; + InlineValue __create_node(Args &&...args) noexcept { + if constexpr (is_inline_value) { + InlineValue ret(std::forward(args)...); + return ret; + } else { + auto &al = __get_allocator(); + value_type *const xval = + (value_type *)_Alty_traits::allocate(al, sizeof(value_type)); + uninitialized_construct_using_allocator(xval, al, + std::forward(args)...); + return xval; + } } template - void __drop_node(value_type *xval) noexcept { - auto &al = __get_allocator(); - _Alty_traits::destroy(al, xval); - _Alty_traits::deallocate(al, (uint8_t *)xval, sizeof(value_type)); + void __drop_node(InlineValue xval) noexcept { + if constexpr (!is_inline_value) { + auto &al = __get_allocator(); + _Alty_traits::destroy(al, xval); + _Alty_traits::deallocate(al, (uint8_t *)xval, sizeof(value_type)); + } } const_iterator __get_insert_multi_pos(const key_type &key) const noexcept { @@ -842,7 +919,7 @@ class basic_bplus_tree { return; } - WJR_NODISCARD iterator __insert_iter(const_iterator iter, value_type *xval) noexcept { + WJR_NODISCARD iterator __insert_iter(const_iterator iter, InlineValue xval) noexcept { auto &al = __get_allocator(); leaf_node_type *leaf; @@ -1006,7 +1083,7 @@ class basic_bplus_tree { WJR_ASSERT_ASSUME(size <= Max); if constexpr (Min == 1 && Offset == 1) { - if (size == 1) { + if (WJR_UNLIKELY(size == 1)) { return 1; } } diff --git a/include/wjr/iterator/contiguous_iterator_adpater.hpp b/include/wjr/iterator/contiguous_iterator_adpater.hpp index d80b7c2e..4d7dee5e 100644 --- a/include/wjr/iterator/contiguous_iterator_adpater.hpp +++ b/include/wjr/iterator/contiguous_iterator_adpater.hpp @@ -44,7 +44,7 @@ class contiguous_const_iterator_adapter { std::is_nothrow_move_assignable_v<__pointer>) = default; WJR_NODISCARD WJR_PURE WJR_CONSTEXPR20 pointer operator->() const noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't dereference an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't dereference an invalid iterator."); @@ -59,7 +59,7 @@ class contiguous_const_iterator_adapter { } WJR_CONSTEXPR20 contiguous_const_iterator_adapter &operator++() noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't increment an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't increment an invalid iterator."); @@ -77,7 +77,7 @@ class contiguous_const_iterator_adapter { } WJR_CONSTEXPR20 contiguous_const_iterator_adapter &operator--() noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container != nullptr, "Can't decrement an value-initialized iterator."); WJR_ASSERT_L0(m_ptr != nullptr, "Can't decrement an invalid iterator."); @@ -171,7 +171,7 @@ class contiguous_const_iterator_adapter { WJR_CONSTEXPR20 void check_same_container(WJR_MAYBE_UNUSED const Container *cont) const noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) WJR_ASSERT_L0(m_container == cont, "Can't compare iterators from different containers."); #else @@ -180,7 +180,7 @@ class contiguous_const_iterator_adapter { } private: -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) /// @private WJR_CONSTEXPR20 void __set_container(const Container *container) noexcept { m_container = container; @@ -235,7 +235,7 @@ class contiguous_const_iterator_adapter { #endif __pointer m_ptr; -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const Container *m_container; #endif }; @@ -344,7 +344,7 @@ struct pointer_traits> using difference_type = typename pointer::difference_type; WJR_NODISCARD constexpr static element_type *to_address(const pointer &ptr) noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const auto cont = ptr.m_container; if (cont) { WJR_ASSERT_L0(ptr.m_ptr >= ptr.__begin() && ptr.m_ptr <= ptr.__end(), @@ -365,7 +365,7 @@ struct pointer_traits> { using difference_type = typename pointer::difference_type; WJR_NODISCARD constexpr static element_type *to_address(const pointer &ptr) noexcept { -#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) const auto cont = ptr.m_container; if (cont) { WJR_ASSERT_L0(ptr.m_ptr >= ptr.__begin() && ptr.m_ptr <= ptr.__end(), diff --git a/include/wjr/math/convert.hpp b/include/wjr/math/convert.hpp index ee9ae2af..c97f796f 100644 --- a/include/wjr/math/convert.hpp +++ b/include/wjr/math/convert.hpp @@ -74,11 +74,14 @@ inline constexpr bool __is_fast_convert_iterator_v = __is_fast_convert_iterator::value; template -struct __is_valid_converter - : std::disjunction, - is_nonbool_integral>, - std::conjunction, - is_nonbool_unsigned_integral>> {}; +struct __is_valid_converter : std::false_type {}; + +template +struct __is_valid_converter : is_nonbool_integral {}; + +template +struct __is_valid_converter + : is_nonbool_unsigned_integral {}; template inline constexpr bool __is_valid_converter_v = @@ -2405,8 +2408,7 @@ class __unsigned_from_chars_unchecked_fn<10> { } }; -template )> +template void __fast_from_chars_unchecked_impl(const uint8_t *first, const uint8_t *last, Value &val, IBase ibase, Converter conv) noexcept { int sign = 0; @@ -2454,8 +2456,7 @@ void __fast_from_chars_unchecked_impl(const uint8_t *first, const uint8_t *last, } } -template )> +template void __from_chars_unchecked_impl(Iter first, Iter last, Value &val, IBase ibase, Converter conv) noexcept { const auto __first = reinterpret_cast(wjr::to_address(first)); @@ -2628,8 +2629,7 @@ struct __unsigned_from_chars_fn<10> { } }; -template )> +template from_chars_result __fast_from_chars_impl(const uint8_t *first, const uint8_t *last, Value &val, IBase ibase, Converter conv) noexcept { @@ -2698,8 +2698,7 @@ __fast_from_chars_impl(const uint8_t *first, const uint8_t *last, Value &val, IB return ret; } -template )> +template from_chars_result __from_chars_impl(const char *first, const char *last, Value &val, IBase ibase, Converter conv) noexcept { diff --git a/include/wjr/math/div.hpp b/include/wjr/math/div.hpp index 20a30ee8..7e9cafff 100644 --- a/include/wjr/math/div.hpp +++ b/include/wjr/math/div.hpp @@ -205,9 +205,8 @@ WJR_INTRINSIC_INLINE void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t dst[n - 1] = div_qr_1_impl(dst, rem, src, n, div); } -WJR_INTRINSIC_CONSTEXPR20 void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, - size_t n, - type_identity_t div) noexcept { +WJR_INTRINSIC_INLINE void div_qr_1(uint64_t *dst, uint64_t &rem, const uint64_t *src, + size_t n, type_identity_t div) noexcept { WJR_ASSERT_ASSUME(n >= 1); WJR_ASSERT_ASSUME(div != 0); diff --git a/include/wjr/math/div1by1.hpp b/include/wjr/math/div1by1.hpp new file mode 100644 index 00000000..54784d7a --- /dev/null +++ b/include/wjr/math/div1by1.hpp @@ -0,0 +1,228 @@ +#ifndef WJR_MATH_DIV1BY1_HPP__ +#define WJR_MATH_DIV1BY1_HPP__ + +#include +#include +#include +#include + +namespace wjr { + +#pragma pack(push, 1) + +template +struct div1by1_uint_t_impl { + T magic; + uint8_t more; +}; + +template +struct div1by1_uint_branchfree_t_impl { + T magic; + uint8_t more; +}; + +template +using div1by1_uint_t = std::conditional_t, + div1by1_uint_t_impl>; + +#pragma pack(pop) + +enum { + DIV1BY1_16_SHIFT_MASK = 0x1F, + DIV1BY1_32_SHIFT_MASK = 0x1F, + DIV1BY1_64_SHIFT_MASK = 0x3F, + DIV1BY1_ADD_MARKER = 0x40, +}; + +template +WJR_CONST WJR_INTRINSIC_INLINE div1by1_uint_t +div1by1_internal_uint_gen(T d) noexcept; + +template +WJR_CONST WJR_INTRINSIC_INLINE div1by1_uint_t +div1by1_uint_gen(T d) noexcept; + +template +WJR_CONST WJR_INTRINSIC_INLINE T +div1by1_uint_do(T d, const div1by1_uint_t &denom) noexcept; + +template +WJR_INTRINSIC_INLINE T div1by1_div_half(T hi, T lo, T den, T &rem) noexcept { + constexpr auto nd = std::numeric_limits::digits; + using U = uint_t; + const U n = (static_cast(hi) << nd) | lo; + const auto result = static_cast(n / den); + rem = n - result * static_cast(den); + return result; +} + +template <> +WJR_INTRINSIC_INLINE uint64_t div1by1_div_half(uint64_t hi, uint64_t lo, + uint64_t den, + uint64_t &rem) noexcept { + return div128by64to64(rem, lo, hi, den); +} + +////////// UINT16 + +template +WJR_CONST WJR_INTRINSIC_INLINE div1by1_uint_t +div1by1_internal_uint_gen(T d) noexcept { + WJR_ASSERT(d != 0, "div1by1_divider must be != 0"); + + constexpr auto nd = std::numeric_limits::digits; + + div1by1_uint_t result; + uint8_t floor_log_2_d = static_cast(nd - 1 - clz(d)); + + // Power of 2 + if (is_zero_or_single_bit(d)) { + // We need to subtract 1 from the shift value in case of an unsigned + // branchfree div1by1_divider because there is a hardcoded right shift by 1 + // in its division algorithm. Because of this we also need to add back + // 1 in its recovery algorithm. + result.magic = 0; + result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); + } else { + uint8_t more; + T rem, proposed_m; + proposed_m = div1by1_div_half(static_cast(1) << floor_log_2_d, 0, d, rem); + + WJR_ASSERT(rem > 0 && rem < d); + const T e = d - rem; + + if (!branchfree && (e < (static_cast(1) << floor_log_2_d))) { + more = floor_log_2_d; + } else { + proposed_m += proposed_m; + const T twice_rem = rem + rem; + if (twice_rem >= d || twice_rem < rem) { + ++proposed_m; + } + + more = floor_log_2_d | DIV1BY1_ADD_MARKER; + } + + result.magic = 1 + proposed_m; + result.more = more; + } + + return result; +} + +template +struct div1by1_shift_mask { + static const uint8_t value = 0; +}; + +#define WJR_REGISTER_SHIFT_MASK(nd) \ + template <> \ + struct div1by1_shift_mask { \ + static const uint8_t value = DIV1BY1_##nd##_SHIFT_MASK; \ + } + +WJR_REGISTER_SHIFT_MASK(16); +WJR_REGISTER_SHIFT_MASK(32); +WJR_REGISTER_SHIFT_MASK(64); + +#undef WJR_REGISTER_SHIFT_MASK + +template +WJR_CONST WJR_INTRINSIC_INLINE div1by1_uint_t +div1by1_uint_gen(T d) noexcept { + if constexpr (!branchfree) { + return div1by1_internal_uint_gen(d); + } else { + WJR_ASSERT(d != 1, "div1by1_divider must be != 1"); + const auto tmp = div1by1_internal_uint_gen(d); + div1by1_uint_t ret = { + tmp.magic, (uint8_t)(tmp.more & div1by1_shift_mask::value)}; + return ret; + } +} + +template +WJR_CONST WJR_INTRINSIC_INLINE T +div1by1_uint_do_impl(T d, const div1by1_uint_t &denom) noexcept { + const T magic = denom.magic; + const uint8_t more = denom.more; + if (!magic) { + return d >> more; + } else { + T q = mulhi(magic, d); + if (more & DIV1BY1_ADD_MARKER) { + T t = ((d - q) >> 1) + q; + return t >> (more & div1by1_shift_mask::value); + } else { + return q >> more; + } + } +} + +template +WJR_CONST WJR_INTRINSIC_INLINE T +div1by1_uint_branchfree_do_impl(T d, const div1by1_uint_t &denom) noexcept { + const T q = mulhi(denom.magic, d); + const T t = ((d - q) >> 1) + q; + return t >> denom.more; +} + +template +WJR_CONST WJR_INTRINSIC_INLINE T +div1by1_uint_do(T d, const div1by1_uint_t &denom) noexcept { + if constexpr (!branchfree) { + return div1by1_uint_do_impl(d, denom); + } else { + return div1by1_uint_branchfree_do_impl(d, denom); + } +} + +template +class div1by1_divider { +private: + using dispatcher_t = div1by1_uint_t; + +public: + constexpr div1by1_divider() = default; + constexpr div1by1_divider(const div1by1_divider &) = default; + constexpr div1by1_divider(div1by1_divider &&) = default; + constexpr div1by1_divider &operator=(const div1by1_divider &) = default; + constexpr div1by1_divider &operator=(div1by1_divider &&) = default; + ~div1by1_divider() = default; + + WJR_INTRINSIC_INLINE div1by1_divider(T d) { + div = div1by1_uint_gen(d); + } + + WJR_INTRINSIC_INLINE T divide(T n) const { return div1by1_uint_do(n, div); } + + WJR_CONST bool operator==(const div1by1_divider &other) const { + return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; + } + + WJR_CONST bool operator!=(const div1by1_divider &other) const { + return !(*this == other); + } + +private: + dispatcher_t div; +}; + +template +WJR_INTRINSIC_INLINE T operator/(T n, const div1by1_divider &div) { + return div.divide(n); +} + +template +WJR_INTRINSIC_INLINE T &operator/=(T &n, const div1by1_divider &div) { + n = div.divide(n); + return n; +} + +template +using branchfree_divider = div1by1_divider; + +} // namespace wjr + +#endif // WJR_MATH_DIV1BY1_HPP__ \ No newline at end of file diff --git a/include/wjr/math/libdivide.hpp b/include/wjr/math/libdivide.hpp deleted file mode 100644 index cf777dcc..00000000 --- a/include/wjr/math/libdivide.hpp +++ /dev/null @@ -1,302 +0,0 @@ -// libdivide.h - Optimized integer division -// https://libdivide.com -// -// Copyright (C) 2010 - 2022 ridiculous_fish, -// Copyright (C) 2016 - 2022 Kim Walisch, -// -// libdivide is dual-licensed under the Boost or zlib licenses. -// You may use libdivide under the terms of either of these. -// See LICENSE.txt for more details. - -#ifndef WJR_LIBDIVIDE_HPP__ -#define WJR_LIBDIVIDE_HPP__ - -#include -#include -#include -#include - -namespace wjr { - -// pack div1by1_divider structs to prevent compilers from padding. -// This reduces memory usage by up to 43% when using a large -// array of libdivide dividers and improves performance -// by up to 10% because of reduced memory bandwidth. -#pragma pack(push, 1) - -template -struct libdivide_uint_t_impl { - T magic; - uint8_t more; -}; - -template -struct libdivide_uint_branchfree_t_impl { - T magic; - uint8_t more; -}; - -template -using libdivide_uint_t = - std::conditional_t, - libdivide_uint_t_impl>; - -#pragma pack(pop) - -// Explanation of the "more" field: -// -// * Bits 0-5 is the shift value (for shift path or mult path). -// * Bit 6 is the add indicator for mult path. -// * Bit 7 is set if the divisor is negative. We use bit 7 as the negative -// divisor indicator so that we can efficiently use sign extension to -// create a bitmask with all bits set to 1 (if the divisor is negative) -// or 0 (if the divisor is positive). -// -// u32: [0-4] shift value -// [5] ignored -// [6] add indicator -// magic number of 0 indicates shift path -// -// s32: [0-4] shift value -// [5] ignored -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// u64: [0-5] shift value -// [6] add indicator -// magic number of 0 indicates shift path -// -// s64: [0-5] shift value -// [6] add indicator -// [7] indicates negative divisor -// magic number of 0 indicates shift path -// -// In s32 and s64 branchfree modes, the magic number is negated according to -// whether the divisor is negated. In branchfree strategy, it is not negated. - -enum { - LIBDIVIDE_16_SHIFT_MASK = 0x1F, - LIBDIVIDE_32_SHIFT_MASK = 0x1F, - LIBDIVIDE_64_SHIFT_MASK = 0x3F, - LIBDIVIDE_ADD_MARKER = 0x40, - LIBDIVIDE_NEGATIVE_DIVISOR = 0x80 -}; - -template -WJR_CONST WJR_INTRINSIC_INLINE libdivide_uint_t -libdivide_internal_uint_gen(T d) noexcept; - -template -WJR_CONST WJR_INTRINSIC_INLINE libdivide_uint_t -libdivide_uint_gen(T d) noexcept; - -template -WJR_CONST WJR_INTRINSIC_INLINE T -libdivide_uint_do(T d, const libdivide_uint_t &denom) noexcept; - -//////// Internal Utility Functions - -template -WJR_INTRINSIC_INLINE T libdivide_div_half(T hi, T lo, T den, T &rem) noexcept { - constexpr auto nd = std::numeric_limits::digits; - using U = uint_t; - const U n = (static_cast(hi) << nd) | lo; - const auto result = static_cast(n / den); - rem = n - result * static_cast(den); - return result; -} - -template <> -WJR_INTRINSIC_INLINE uint64_t libdivide_div_half(uint64_t hi, uint64_t lo, - uint64_t den, - uint64_t &rem) noexcept { - return div128by64to64(rem, lo, hi, den); -} - -////////// UINT16 - -template -WJR_CONST WJR_INTRINSIC_INLINE libdivide_uint_t -libdivide_internal_uint_gen(T d) noexcept { - constexpr auto nd = std::numeric_limits::digits; - - WJR_ASSERT(d != 0, "div1by1_divider must be != 0"); - - libdivide_uint_t result; - uint8_t floor_log_2_d = static_cast(nd - 1 - clz(d)); - - // Power of 2 - if (is_zero_or_single_bit(d)) { - // We need to subtract 1 from the shift value in case of an unsigned - // branchfree div1by1_divider because there is a hardcoded right shift by 1 - // in its division algorithm. Because of this we also need to add back - // 1 in its recovery algorithm. - result.magic = 0; - result.more = (uint8_t)(floor_log_2_d - (branchfree != 0)); - } else { - uint8_t more; - T rem, proposed_m; - proposed_m = libdivide_div_half(static_cast(1) << floor_log_2_d, 0, d, rem); - - WJR_ASSERT(rem > 0 && rem < d); - const T e = d - rem; - - // This power works if e < 2**floor_log_2_d. - if (!branchfree && (e < (static_cast(1) << floor_log_2_d))) { - // This power works - more = floor_log_2_d; - } else { - // We have to use the general 17-bit algorithm. We need to compute - // (2**power) / d. However, we already have (2**(power-1))/d and - // its remainder. By doubling both, and then correcting the - // remainder, we can compute the larger division. - // don't care about overflow here - in fact, we expect it - proposed_m += proposed_m; - const T twice_rem = rem + rem; - if (twice_rem >= d || twice_rem < rem) - proposed_m += 1; - more = floor_log_2_d | LIBDIVIDE_ADD_MARKER; - } - result.magic = 1 + proposed_m; - result.more = more; - // result.more's shift should in general be ceil_log_2_d. But if we - // used the smaller power, we subtract one from the shift because we're - // using the smaller power. If we're using the larger power, we - // subtract one from the shift because it's taken care of by the add - // indicator. So floor_log_2_d happens to be correct in both cases. - } - - return result; -} - -template -struct libdivide_shift_mask { - static const uint8_t value = 0; -}; - -#define WJR_REGISTER_SHIFT_MASK(nd) \ - template <> \ - struct libdivide_shift_mask { \ - static const uint8_t value = LIBDIVIDE_##nd##_SHIFT_MASK; \ - } - -WJR_REGISTER_SHIFT_MASK(16); -WJR_REGISTER_SHIFT_MASK(32); -WJR_REGISTER_SHIFT_MASK(64); - -#undef WJR_REGISTER_SHIFT_MASK - -template -WJR_CONST WJR_INTRINSIC_INLINE libdivide_uint_t -libdivide_uint_gen(T d) noexcept { - if constexpr (!branchfree) { - return libdivide_internal_uint_gen(d); - } else { - WJR_ASSERT(d != 1, "div1by1_divider must be != 1"); - const auto tmp = libdivide_internal_uint_gen(d); - libdivide_uint_t ret = { - tmp.magic, (uint8_t)(tmp.more & libdivide_shift_mask::value)}; - return ret; - } -} - -template -WJR_CONST WJR_INTRINSIC_INLINE T -libdivide_uint_do_impl(T d, const libdivide_uint_t &denom) noexcept { - const T magic = denom.magic; - const uint8_t more = denom.more; - if (!magic) { - return d >> more; - } else { - T q = mulhi(magic, d); - if (more & LIBDIVIDE_ADD_MARKER) { - T t = ((d - q) >> 1) + q; - return t >> (more & libdivide_shift_mask::value); - } else { - // All upper bits are 0, - // don't need to mask them off. - return q >> more; - } - } -} - -template -WJR_CONST WJR_INTRINSIC_INLINE T -libdivide_uint_branchfree_do_impl(T d, const libdivide_uint_t &denom) noexcept { - const T q = mulhi(denom.magic, d); - const T t = ((d - q) >> 1) + q; - return t >> denom.more; -} - -template -WJR_CONST WJR_INTRINSIC_INLINE T -libdivide_uint_do(T d, const libdivide_uint_t &denom) noexcept { - if constexpr (!branchfree) { - return libdivide_uint_do_impl(d, denom); - } else { - return libdivide_uint_branchfree_do_impl(d, denom); - } -} - -/////////// C++ stuff - -// This is the main div1by1_divider class for use by the user (C++ API). -// The actual division algorithm is selected using the dispatcher struct -// based on the integer width and algorithm template parameters. -template -class div1by1_divider { -private: - using dispatcher_t = libdivide_uint_t; - -public: - // We leave the default constructor empty so that creating - // an array of dividers and then initializing them - // later doesn't slow us down. - constexpr div1by1_divider() = default; - constexpr div1by1_divider(const div1by1_divider &) = default; - constexpr div1by1_divider(div1by1_divider &&) = default; - constexpr div1by1_divider &operator=(const div1by1_divider &) = default; - constexpr div1by1_divider &operator=(div1by1_divider &&) = default; - ~div1by1_divider() = default; - - // Constructor that takes the divisor as a parameter - WJR_INTRINSIC_INLINE div1by1_divider(T d) { - div = libdivide_uint_gen(d); - } - - // Divides n by the divisor - WJR_INTRINSIC_INLINE T divide(T n) const { return libdivide_uint_do(n, div); } - - WJR_CONST bool operator==(const div1by1_divider &other) const { - return div.denom.magic == other.denom.magic && div.denom.more == other.denom.more; - } - - WJR_CONST bool operator!=(const div1by1_divider &other) const { - return !(*this == other); - } - -private: - // Storage for the actual divisor - dispatcher_t div; -}; - -// Overload of operator / for scalar division -template -WJR_INTRINSIC_INLINE T operator/(T n, const div1by1_divider &div) { - return div.divide(n); -} - -// Overload of operator /= for scalar division -template -WJR_INTRINSIC_INLINE T &operator/=(T &n, const div1by1_divider &div) { - n = div.divide(n); - return n; -} - -template -using branchfree_divider = div1by1_divider; - -} // namespace wjr - -#endif // WJR_LIBDIVIDE_HPP__ diff --git a/include/wjr/memory/details.hpp b/include/wjr/memory/details.hpp index 4889007b..4f154b70 100644 --- a/include/wjr/memory/details.hpp +++ b/include/wjr/memory/details.hpp @@ -49,13 +49,13 @@ constexpr auto to_address(const std::move_iterator &p) noexcept { */ template constexpr decltype(auto) to_contiguous_address(T &&t) noexcept { -#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) if constexpr (is_contiguous_iterator_v>) { return wjr::to_address(std::forward(t)); } else { #endif return std::forward(t); -#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECK) +#if !WJR_HAS_DEBUG(CONTIGUOUS_ITERATOR_CHECKER) } #endif } diff --git a/src/libdivide_LICENSE.txt b/src/libdivide_LICENSE.txt deleted file mode 100644 index 56f6590c..00000000 --- a/src/libdivide_LICENSE.txt +++ /dev/null @@ -1,55 +0,0 @@ - libdivide is made available under two licenses. You may choose either - of the following licenses when using libdivide. - - zlib License - ------------ - - Copyright (C) 2010 - 2019 ridiculous_fish, - Copyright (C) 2016 - 2019 Kim Walisch, - - This software is provided 'as-is', without any express or implied - warranty. In no event will the authors be held liable for any damages - arising from the use of this software. - - Permission is granted to anyone to use this software for any purpose, - including commercial applications, and to alter it and redistribute it - freely, subject to the following restrictions: - - 1. The origin of this software must not be misrepresented; you must not - claim that you wrote the original software. If you use this software - in a product, an acknowledgment in the product documentation would be - appreciated but is not required. - 2. Altered source versions must be plainly marked as such, and must not be - misrepresented as being the original software. - 3. This notice may not be removed or altered from any source distribution. - - - Boost License - ------------- - - Copyright (C) 2010 - 2019 ridiculous_fish, - Copyright (C) 2016 - 2019 Kim Walisch, - - Boost Software License - Version 1.0 - August 17th, 2003 - - Permission is hereby granted, free of charge, to any person or organization - obtaining a copy of the software and accompanying documentation covered by - this license (the "Software") to use, reproduce, display, distribute, - execute, and transmit the Software, and to prepare derivative works of the - Software, and to permit third-parties to whom the Software is furnished to - do so, all subject to the following: - - The copyright notices in the Software and this entire statement, including - the above license grant, this restriction and the following disclaimer, - must be included in all copies of the Software, in whole or in part, and - all derivative works of the Software, unless such copies or derivative - works are solely in the form of machine-executable object code generated by - a source language processor. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT - SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE - FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, - ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. diff --git a/src/wjr/x86/math/gen_addrsblsh_n.hpp b/src/wjr/x86/math/gen_addrsblsh_n.hpp index 8287083c..8c139df8 100644 --- a/src/wjr/x86/math/gen_addrsblsh_n.hpp +++ b/src/wjr/x86/math/gen_addrsblsh_n.hpp @@ -24,8 +24,6 @@ uint64_t WJR_PP_CONCAT(__wjr_asm_, const uint64_t *src0, const uint64_t *src1, size_t n, uint64_t cl) noexcept { - WJR_ASSERT(cl != 0); - size_t rcx = n / 8; uint64_t tcl = 64 - cl; uint64_t r8, r9 = n, r10, r11; @@ -256,8 +254,6 @@ uint64_t WJR_PP_CONCAT(__wjr_asm_, : [cl] "r"(cl), [tcl] "r"(tcl) : "cc", "memory"); - WJR_ASSERT_ASSUME(rcx == 0); - return r11; }