Skip to content

Commit

Permalink
Add fast_mod_240()
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Jul 8, 2024
1 parent 2f51cf0 commit 84bdedc
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 14 deletions.
17 changes: 11 additions & 6 deletions include/PiTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#define PITABLE_HPP

#include <BitSieve240.hpp>
#include <fast_div.hpp>
#include <popcnt.hpp>
#include <macros.hpp>
#include <Vector.hpp>
Expand Down Expand Up @@ -49,9 +50,11 @@ class PiTable : public BitSieve240
if_unlikely(x < pi_tiny_.size())
return pi_tiny_[x];

uint64_t count = pi_[x / 240].count;
uint64_t bits = pi_[x / 240].bits;
uint64_t bitmask = unset_larger_[x % 240];
uint64_t x_div_240 = x / 240;
uint64_t x_mod_240 = fast_mod_240(x, x_div_240);
uint64_t count = pi_[x_div_240].count;
uint64_t bits = pi_[x_div_240].bits;
uint64_t bitmask = unset_larger_[x_mod_240];
return count + popcnt64(bits & bitmask);
}

Expand All @@ -61,9 +64,11 @@ class PiTable : public BitSieve240
if_unlikely(x < pi_tiny_.size())
return pi_tiny_[x];

uint64_t count = pi_cache_[x / 240].count;
uint64_t bits = pi_cache_[x / 240].bits;
uint64_t bitmask = unset_larger_[x % 240];
uint64_t x_div_240 = x / 240;
uint64_t x_mod_240 = fast_mod_240(x, x_div_240);
uint64_t count = pi_cache_[x_div_240].count;
uint64_t bits = pi_cache_[x_div_240].bits;
uint64_t bitmask = unset_larger_[x_mod_240];
return count + popcnt64(bits & bitmask);
}

Expand Down
11 changes: 7 additions & 4 deletions include/SegmentedPiTable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
/// the SegmentedPiTable are described in more detail in:
/// https://github.com/kimwalisch/primecount/blob/master/doc/Easy-Special-Leaves.md
///
/// Copyright (C) 2023 Kim Walisch, <kim.walisch@gmail.com>
/// Copyright (C) 2024 Kim Walisch, <kim.walisch@gmail.com>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand All @@ -28,6 +28,7 @@
#define SEGMENTEDPITABLE_HPP

#include <BitSieve240.hpp>
#include <fast_div.hpp>
#include <macros.hpp>
#include <Vector.hpp>
#include <popcnt.hpp>
Expand Down Expand Up @@ -78,9 +79,11 @@ class SegmentedPiTable : public BitSieve240
return pi_tiny_[x];

x -= low_;
uint64_t count = pi_[x / 240].count;
uint64_t bits = pi_[x / 240].bits;
uint64_t bitmask = unset_larger_[x % 240];
uint64_t x_div_240 = x / 240;
uint64_t x_mod_240 = fast_mod_240(x, x_div_240);
uint64_t count = pi_[x_div_240].count;
uint64_t bits = pi_[x_div_240].bits;
uint64_t bitmask = unset_larger_[x_mod_240];
return count + popcnt64(bits & bitmask);
}

Expand Down
13 changes: 9 additions & 4 deletions include/Sieve.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#ifndef SIEVE_HPP
#define SIEVE_HPP

#include <fast_div.hpp>
#include <macros.hpp>
#include <popcnt.hpp>
#include <Vector.hpp>
Expand Down Expand Up @@ -157,8 +158,10 @@ class Sieve
ASSERT(stop - start < segment_size());
uint64_t start_idx = start / 240;
uint64_t stop_idx = stop / 240;
uint64_t m1 = unset_smaller[start % 240];
uint64_t m2 = unset_larger[stop % 240];
uint64_t start_mod_240 = fast_mod_240(start, start_idx);
uint64_t stop_mod_240 = fast_mod_240(stop, stop_idx);
uint64_t m1 = unset_smaller[start_mod_240];
uint64_t m2 = unset_larger[stop_mod_240];
uint64_t* sieve64 = (uint64_t*) sieve_.data();

if (start_idx == stop_idx)
Expand Down Expand Up @@ -194,8 +197,10 @@ class Sieve
ASSERT(stop - start < segment_size());
uint64_t start_idx = start / 240;
uint64_t stop_idx = stop / 240;
uint64_t m1 = unset_smaller[start % 240];
uint64_t m2 = unset_larger[stop % 240];
uint64_t start_mod_240 = fast_mod_240(start, start_idx);
uint64_t stop_mod_240 = fast_mod_240(stop, stop_idx);
uint64_t m1 = unset_smaller[start_mod_240];
uint64_t m2 = unset_larger[stop_mod_240];
uint64_t* sieve64 = (uint64_t*) sieve_.data();

if (start_idx == stop_idx)
Expand Down
23 changes: 23 additions & 0 deletions include/fast_div.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,29 @@

namespace {

/// Calculates x % 240.
/// GCC's default algorithm for calculating integer modulo by a
/// constant is not optimal (for GCC <= 14) on the x64 CPU
/// architecture. GCC calculates q * 240 using multiple shift
/// instructions instead of a multiplication. Hence we use inline
/// assembly to force GCC to calculate q * 240 using a
/// multiplication instruction.
/// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=115749
///
ALWAYS_INLINE uint64_t fast_mod_240(uint64_t x, uint64_t x_div_240)
{
#if defined(__GNUC__) && \
!defined(__clang__) && \
defined(__x86_64__)
// res = x_div_240 * 240
uint64_t res;
__asm__("imul $240, %1, %0" : "=r" (res) : "r" (x_div_240));
return x - res;
#else
return x - x_div_240 * 240;
#endif
}

/// Used for (64-bit / 32-bit) = 64-bit.
template <typename X, typename Y>
ALWAYS_INLINE typename std::enable_if<(sizeof(X) == sizeof(uint64_t) &&
Expand Down

0 comments on commit 84bdedc

Please sign in to comment.