From f4cff75f42a7b0c2bb192b1566e993883c6c710c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Bylica?= Date: Tue, 15 Mar 2022 12:15:48 +0100 Subject: [PATCH] Optimize less-than The new implementation is similar to what was there: we select which half to compare and do 128-bit less-than. For small values this is equivalent to do subtraction + borrow check, for big values though we get 25% boost. --- include/intx/intx.hpp | 13 ++++++++----- test/benchmarks/benchmarks.cpp | 6 ++++++ 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp index 16c702e0..236569fa 100644 --- a/include/intx/intx.hpp +++ b/include/intx/intx.hpp @@ -1087,11 +1087,14 @@ inline constexpr bool operator!=(const T& x, const uint& y) noexcept #if !defined(_MSC_VER) || _MSC_VER < 1916 // This kills MSVC 2017 compiler. inline constexpr bool operator<(const uint256& x, const uint256& y) noexcept { - const auto xhi = uint128{x[2], x[3]}; - const auto xlo = uint128{x[0], x[1]}; - const auto yhi = uint128{y[2], y[3]}; - const auto ylo = uint128{y[0], y[1]}; - return (unsigned(xhi < yhi) | (unsigned(xhi == yhi) & unsigned(xlo < ylo))) != 0; + auto xp = intx::uint128{x[2], x[3]}; + auto yp = intx::uint128{y[2], y[3]}; + if (xp == yp) + { + xp = intx::uint128{x[0], x[1]}; + yp = intx::uint128{y[0], y[1]}; + } + return xp < yp; } #endif diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp index 69a8a32d..72cbdd6e 100644 --- a/test/benchmarks/benchmarks.cpp +++ b/test/benchmarks/benchmarks.cpp @@ -351,6 +351,11 @@ BENCHMARK_TEMPLATE(shift, uint512, uint64_t, shl_public)->DenseRange(-1, 3); return x < y; } +[[gnu::noinline]] static bool lt_sub(const uint256& x, const uint256& y) noexcept +{ + return subc(x, y).carry; +} + [[gnu::noinline]] static bool lt_wordcmp(const uint256& x, const uint256& y) noexcept { for (size_t i = 3; i >= 1; --i) @@ -418,6 +423,7 @@ static void compare(benchmark::State& state) } } BENCHMARK_TEMPLATE(compare, lt_public)->DenseRange(64, 256, 64); +BENCHMARK_TEMPLATE(compare, lt_sub)->DenseRange(64, 256, 64); BENCHMARK_TEMPLATE(compare, lt_wordcmp)->DenseRange(64, 256, 64); BENCHMARK_TEMPLATE(compare, lt_halves)->DenseRange(64, 256, 64); #if INTX_HAS_EXTINT