diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 6b9ad557..677eb77c 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -688,8 +688,228 @@ inline typename std::enable_if::type count return h != 0 ? h + (num_words / 2) : l; } + +namespace internal +{ +template +struct normalized_div_args +{ + uint divisor; + uint numerator; + typename uint::word_type numerator_ex; + int num_divisor_words; + int num_numerator_words; + unsigned shift; +}; + +template +[[gnu::always_inline]] inline normalized_div_args normalize( + const IntT& numerator, const IntT& denominator) noexcept +{ + // FIXME: Make the implementation type independent + static constexpr auto num_words = IntT::num_words; + + auto* u = as_words(numerator); + auto* v = as_words(denominator); + + normalized_div_args na; + auto* un = as_words(na.numerator); + auto* vn = as_words(na.divisor); + + auto& m = na.num_numerator_words; + for (m = num_words; m > 0 && u[m - 1] == 0; --m) + ; + + auto& n = na.num_divisor_words; + for (n = num_words; n > 0 && v[n - 1] == 0; --n) + ; + + na.shift = clz(v[n - 1]); + if (na.shift) + { + for (int i = num_words - 1; i > 0; --i) + vn[i] = (v[i] << na.shift) | (v[i - 1] >> (64 - na.shift)); + vn[0] = v[0] << na.shift; + + un[num_words] = u[num_words - 1] >> (64 - na.shift); + for (int i = num_words - 1; i > 0; --i) + un[i] = (u[i] << na.shift) | (u[i - 1] >> (64 - na.shift)); + un[0] = u[0] << na.shift; + } + else + { + na.numerator_ex = 0; + na.numerator = numerator; + na.divisor = denominator; + } + + // Skip the highest word of numerator if not significant. + if (un[m] != 0 || un[m - 1] >= vn[n - 1]) + ++m; + + return na; +} + +/// Divides arbitrary long unsigned integer by 64-bit unsigned integer (1 word). +/// @param u The array of a normalized numerator words. It will contain +/// the quotient after execution. +/// @param len The number of numerator words. +/// @param d The normalized divisor. +/// @return The remainder. +inline uint64_t udivrem_by1(uint64_t u[], int len, uint64_t d) noexcept +{ + INTX_REQUIRE(len >= 2); + + const auto reciprocal = reciprocal_2by1(d); + + auto rem = u[len - 1]; // Set the top word as remainder. + u[len - 1] = 0; // Reset the word being a part of the result quotient. + + auto it = &u[len - 2]; + do + { + std::tie(*it, rem) = udivrem_2by1({rem, *it}, d, reciprocal); + } while (it-- != &u[0]); + + return rem; +} + +/// Divides arbitrary long unsigned integer by 128-bit unsigned integer (2 words). +/// @param u The array of a normalized numerator words. It will contain the +/// quotient after execution. +/// @param len The number of numerator words. +/// @param d The normalized divisor. +/// @return The remainder. +inline uint128 udivrem_by2(uint64_t u[], int len, uint128 d) noexcept +{ + INTX_REQUIRE(len >= 3); + + const auto reciprocal = reciprocal_3by2(d); + + auto rem = uint128{u[len - 1], u[len - 2]}; // Set the 2 top words as remainder. + u[len - 1] = u[len - 2] = 0; // Reset these words being a part of the result quotient. + + auto it = &u[len - 3]; + do + { + std::tie(*it, rem) = udivrem_3by2(rem.hi, rem.lo, *it, d, reciprocal); + } while (it-- != &u[0]); + + return rem; +} + +/// s = x + y. +inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) noexcept +{ + // OPT: Add MinLen template parameter and unroll first loop iterations. + INTX_REQUIRE(len >= 2); + + bool carry = false; + for (int i = 0; i < len; ++i) + std::tie(s[i], carry) = add_with_carry(x[i], y[i], carry); + return carry; +} + +/// r = x - multiplier * y. +inline uint64_t submul( + uint64_t r[], const uint64_t x[], const uint64_t y[], int len, uint64_t multiplier) noexcept +{ + // OPT: Add MinLen template parameter and unroll first loop iterations. + INTX_REQUIRE(len >= 1); + + uint64_t borrow = 0; + for (int i = 0; i < len; ++i) + { + const auto s = sub_with_carry(x[i], borrow); + const auto p = umul(y[i], multiplier); + const auto t = sub_with_carry(s.value, p.lo); + r[i] = t.value; + borrow = p.hi + s.carry + t.carry; + } + return borrow; +} + +inline void udivrem_knuth( + uint64_t q[], uint64_t u[], int ulen, const uint64_t d[], int dlen) noexcept +{ + INTX_REQUIRE(dlen >= 3); + INTX_REQUIRE(ulen >= dlen); + + const auto divisor = uint128{d[dlen - 1], d[dlen - 2]}; + const auto reciprocal = reciprocal_3by2(divisor); + for (int j = ulen - dlen - 1; j >= 0; --j) + { + const auto u2 = u[j + dlen]; + const auto u1 = u[j + dlen - 1]; + const auto u0 = u[j + dlen - 2]; + + uint64_t qhat; + if (INTX_UNLIKELY(uint128(u2, u1) == divisor)) // Division overflows. + { + qhat = ~uint64_t{0}; + + u[j + dlen] = u2 - submul(&u[j], &u[j], d, dlen, qhat); + } + else + { + uint128 rhat; + std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); + + bool carry; + const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); + std::tie(u[j + dlen - 2], carry) = sub_with_carry(rhat.lo, overflow); + std::tie(u[j + dlen - 1], carry) = sub_with_carry(rhat.hi, carry); + + if (INTX_UNLIKELY(carry)) + { + --qhat; + u[j + dlen - 1] += divisor.hi + add(&u[j], &u[j], d, dlen - 1); + } + } + + q[j] = qhat; // Store quotient digit. + } +} + +} // namespace internal + template -div_result> udivrem(const uint& u, const uint& v) noexcept; +div_result> udivrem(const uint& u, const uint& v) noexcept +{ + auto na = internal::normalize(u, v); + + if (na.num_numerator_words <= na.num_divisor_words) + return {0, u}; + + if (na.num_divisor_words == 1) + { + const auto r = internal::udivrem_by1( + as_words(na.numerator), na.num_numerator_words, as_words(na.divisor)[0]); + return {na.numerator, r >> na.shift}; + } + + if (na.num_divisor_words == 2) + { + const auto d = as_words(na.divisor); + const auto r = + internal::udivrem_by2(as_words(na.numerator), na.num_numerator_words, {d[1], d[0]}); + return {na.numerator, r >> na.shift}; + } + + auto un = as_words(na.numerator); // Will be modified. + + uint q; + internal::udivrem_knuth( + as_words(q), &un[0], na.num_numerator_words, as_words(na.divisor), na.num_divisor_words); + + uint r; + auto rw = as_words(r); + for (int i = 0; i < na.num_divisor_words - 1; ++i) + rw[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i]; + rw[na.num_divisor_words - 1] = un[na.num_divisor_words - 1] >> na.shift; + + return {q, r}; +} template constexpr div_result> sdivrem(const uint& u, const uint& v) noexcept diff --git a/lib/intx/CMakeLists.txt b/lib/intx/CMakeLists.txt index 72f6ae9b..c75749d7 100644 --- a/lib/intx/CMakeLists.txt +++ b/lib/intx/CMakeLists.txt @@ -1,5 +1,5 @@ # intx: extended precision integer library. -# Copyright 2019 Pawel Bylica. +# Copyright 2019-2020 Pawel Bylica. # Licensed under the Apache License, Version 2.0. set(include_dir ${PROJECT_SOURCE_DIR}/include) @@ -13,8 +13,7 @@ target_include_directories(int128 INTERFACE $$ - -namespace intx -{ -namespace internal -{ -/// Divides arbitrary long unsigned integer by 64-bit unsigned integer (1 word). -/// @param u The array of a normalized numerator words. It will contain -/// the quotient after execution. -/// @param len The number of numerator words. -/// @param d The normalized divisor. -/// @return The remainder. -inline uint64_t udivrem_by1(uint64_t u[], int len, uint64_t d) noexcept -{ - INTX_REQUIRE(len >= 2); - - const auto reciprocal = reciprocal_2by1(d); - - auto rem = u[len - 1]; // Set the top word as remainder. - u[len - 1] = 0; // Reset the word being a part of the result quotient. - - auto it = &u[len - 2]; - do - { - std::tie(*it, rem) = udivrem_2by1({rem, *it}, d, reciprocal); - } while (it-- != &u[0]); - - return rem; -} - -/// Divides arbitrary long unsigned integer by 128-bit unsigned integer (2 words). -/// @param u The array of a normalized numerator words. It will contain the -/// quotient after execution. -/// @param len The number of numerator words. -/// @param d The normalized divisor. -/// @return The remainder. -inline uint128 udivrem_by2(uint64_t u[], int len, uint128 d) noexcept -{ - INTX_REQUIRE(len >= 3); - - const auto reciprocal = reciprocal_3by2(d); - - auto rem = uint128{u[len - 1], u[len - 2]}; // Set the 2 top words as remainder. - u[len - 1] = u[len - 2] = 0; // Reset these words being a part of the result quotient. - - auto it = &u[len - 3]; - do - { - std::tie(*it, rem) = udivrem_3by2(rem.hi, rem.lo, *it, d, reciprocal); - } while (it-- != &u[0]); - - return rem; -} - -/// s = x + y. -inline bool add(uint64_t s[], const uint64_t x[], const uint64_t y[], int len) noexcept -{ - // OPT: Add MinLen template parameter and unroll first loop iterations. - INTX_REQUIRE(len >= 2); - - bool carry = false; - for (int i = 0; i < len; ++i) - std::tie(s[i], carry) = add_with_carry(x[i], y[i], carry); - return carry; -} - -/// r = x - multiplier * y. -inline uint64_t submul( - uint64_t r[], const uint64_t x[], const uint64_t y[], int len, uint64_t multiplier) noexcept -{ - // OPT: Add MinLen template parameter and unroll first loop iterations. - INTX_REQUIRE(len >= 1); - - uint64_t borrow = 0; - for (int i = 0; i < len; ++i) - { - const auto s = sub_with_carry(x[i], borrow); - const auto p = umul(y[i], multiplier); - const auto t = sub_with_carry(s.value, p.lo); - r[i] = t.value; - borrow = p.hi + s.carry + t.carry; - } - return borrow; -} - -inline void udivrem_knuth( - uint64_t q[], uint64_t u[], int ulen, const uint64_t d[], int dlen) noexcept -{ - INTX_REQUIRE(dlen >= 3); - INTX_REQUIRE(ulen >= dlen); - - const auto divisor = uint128{d[dlen - 1], d[dlen - 2]}; - const auto reciprocal = reciprocal_3by2(divisor); - for (int j = ulen - dlen - 1; j >= 0; --j) - { - const auto u2 = u[j + dlen]; - const auto u1 = u[j + dlen - 1]; - const auto u0 = u[j + dlen - 2]; - - uint64_t qhat; - if (INTX_UNLIKELY(uint128(u2, u1) == divisor)) // Division overflows. - { - qhat = ~uint64_t{0}; - - u[j + dlen] = u2 - submul(&u[j], &u[j], d, dlen, qhat); - } - else - { - uint128 rhat; - std::tie(qhat, rhat) = udivrem_3by2(u2, u1, u0, divisor, reciprocal); - - bool carry; - const auto overflow = submul(&u[j], &u[j], d, dlen - 2, qhat); - std::tie(u[j + dlen - 2], carry) = sub_with_carry(rhat.lo, overflow); - std::tie(u[j + dlen - 1], carry) = sub_with_carry(rhat.hi, carry); - - if (INTX_UNLIKELY(carry)) - { - --qhat; - u[j + dlen - 1] += divisor.hi + add(&u[j], &u[j], d, dlen - 1); - } - } - - q[j] = qhat; // Store quotient digit. - } -} - -} // namespace internal - -template -div_result> udivrem(const uint& u, const uint& v) noexcept -{ - auto na = internal::normalize(u, v); - - if (na.num_numerator_words <= na.num_divisor_words) - return {0, u}; - - if (na.num_divisor_words == 1) - { - const auto r = internal::udivrem_by1( - as_words(na.numerator), na.num_numerator_words, as_words(na.divisor)[0]); - return {na.numerator, r >> na.shift}; - } - - if (na.num_divisor_words == 2) - { - const auto d = as_words(na.divisor); - const auto r = - internal::udivrem_by2(as_words(na.numerator), na.num_numerator_words, {d[1], d[0]}); - return {na.numerator, r >> na.shift}; - } - - auto un = as_words(na.numerator); // Will be modified. - - uint q; - internal::udivrem_knuth( - as_words(q), &un[0], na.num_numerator_words, as_words(na.divisor), na.num_divisor_words); - - uint r; - auto rw = as_words(r); - for (int i = 0; i < na.num_divisor_words - 1; ++i) - rw[i] = na.shift ? (un[i] >> na.shift) | (un[i + 1] << (64 - na.shift)) : un[i]; - rw[na.num_divisor_words - 1] = un[na.num_divisor_words - 1] >> na.shift; - - return {q, r}; -} - -template div_result> udivrem(const uint<256>& u, const uint<256>& v) noexcept; -template div_result> udivrem(const uint<512>& u, const uint<512>& v) noexcept; -template div_result> udivrem(const uint<1024>& u, const uint<1024>& v) noexcept; -template div_result> udivrem(const uint<2048>& u, const uint<2048>& v) noexcept; -template div_result> udivrem(const uint<4096>& u, const uint<4096>& v) noexcept; - -} // namespace intx diff --git a/lib/intx/div.hpp b/lib/intx/div.hpp deleted file mode 100644 index b82e19e6..00000000 --- a/lib/intx/div.hpp +++ /dev/null @@ -1,73 +0,0 @@ -// intx: extended precision integer library. -// Copyright 2019-2020 Pawel Bylica. -// Licensed under the Apache License, Version 2.0. - -#pragma once - -#include - -namespace intx -{ -namespace internal -{ -template -struct normalized_div_args -{ - uint divisor; - uint numerator; - typename uint::word_type numerator_ex; - int num_divisor_words; - int num_numerator_words; - unsigned shift; -}; - -template -[[gnu::always_inline]] inline normalized_div_args normalize( - const IntT& numerator, const IntT& denominator) noexcept -{ - // FIXME: Make the implementation type independent - static constexpr auto num_words = IntT::num_words; - - auto* u = as_words(numerator); - auto* v = as_words(denominator); - - normalized_div_args na; - auto* un = as_words(na.numerator); - auto* vn = as_words(na.divisor); - - auto& m = na.num_numerator_words; - for (m = num_words; m > 0 && u[m - 1] == 0; --m) - ; - - auto& n = na.num_divisor_words; - for (n = num_words; n > 0 && v[n - 1] == 0; --n) - ; - - na.shift = clz(v[n - 1]); - if (na.shift) - { - for (int i = num_words - 1; i > 0; --i) - vn[i] = (v[i] << na.shift) | (v[i - 1] >> (64 - na.shift)); - vn[0] = v[0] << na.shift; - - un[num_words] = u[num_words - 1] >> (64 - na.shift); - for (int i = num_words - 1; i > 0; --i) - un[i] = (u[i] << na.shift) | (u[i - 1] >> (64 - na.shift)); - un[0] = u[0] << na.shift; - } - else - { - na.numerator_ex = 0; - na.numerator = numerator; - na.divisor = denominator; - } - - // Skip the highest word of numerator if not significant. - if (un[m] != 0 || un[m - 1] >= vn[n - 1]) - ++m; - - return na; -} - -} // namespace internal -} // namespace intx diff --git a/lib/intx/impl.cpp b/lib/intx/impl.cpp new file mode 100644 index 00000000..81e13b8c --- /dev/null +++ b/lib/intx/impl.cpp @@ -0,0 +1 @@ +#include diff --git a/test/benchmarks/bench_div.cpp b/test/benchmarks/bench_div.cpp index e8715906..93079238 100644 --- a/test/benchmarks/bench_div.cpp +++ b/test/benchmarks/bench_div.cpp @@ -3,7 +3,7 @@ // Licensed under the Apache License, Version 2.0. #include -#include +#include #include uint64_t udiv_native(uint64_t x, uint64_t y) noexcept; diff --git a/test/unittests/test_div.cpp b/test/unittests/test_div.cpp index c3d1337e..54800dd2 100644 --- a/test/unittests/test_div.cpp +++ b/test/unittests/test_div.cpp @@ -2,7 +2,6 @@ // Copyright 2019-2020 Pawel Bylica. // Licensed under the Apache License, Version 2.0. -#include #include #include