Skip to content

Commit

Permalink
Update to latest libprimesieve
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Feb 17, 2024
1 parent 695f92e commit 6078c4d
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 88 deletions.
8 changes: 4 additions & 4 deletions lib/primesieve/doc/primesieve.1
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@
.\" Title: primesieve
.\" Author: [see the "AUTHOR" section]
.\" Generator: DocBook XSL Stylesheets vsnapshot <http://docbook.sf.net/>
.\" Date: 02/14/2024
.\" Date: 02/16/2024
.\" Manual: \ \&
.\" Source: \ \&
.\" Language: English
.\"
.TH "PRIMESIEVE" "1" "02/14/2024" "\ \&" "\ \&"
.TH "PRIMESIEVE" "1" "02/16/2024" "\ \&" "\ \&"
.\" -----------------------------------------------------------------
.\" * Define some portability stuff
.\" -----------------------------------------------------------------
Expand Down Expand Up @@ -131,9 +131,9 @@ Run a stress test\&. The
\fIMODE\fR
can be either CPU (default) or RAM\&. The CPU
\fIMODE\fR
uses little memory (on average about 225 MiB per thread) and puts the highest load on the CPU\&. The RAM
uses little memory (< 5 MiB per thread) and puts the highest load on the CPU\&. The RAM
\fIMODE\fR
uses much more memory than the CPU
on the other hand uses much more memory than the CPU
\fIMODE\fR
(each thread uses about 1\&.16 GiB), but the CPU usually won\(cqt get as hot as in the CPU
\fIMODE\fR\&. Stress testing keeps on running until either a miscalculation occurs (due to a hardware issue) or the timeout expires\&. The default timeout is 24 hours, the timeout can be changed using the
Expand Down
4 changes: 2 additions & 2 deletions lib/primesieve/doc/primesieve.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ OPTIONS

*-S, --stress-test*[='MODE']::
Run a stress test. The 'MODE' can be either CPU (default) or RAM. The CPU
'MODE' uses little memory (on average about 225 MiB per thread) and puts the
highest load on the CPU. The RAM 'MODE' uses much more memory than the
'MODE' uses little memory (< 5 MiB per thread) and puts the highest load on
the CPU. The RAM 'MODE' on the other hand uses much more memory than the
CPU 'MODE' (each thread uses about 1.16 GiB), but the CPU usually won't get
as hot as in the CPU 'MODE'. Stress testing keeps on running until either a
miscalculation occurs (due to a hardware issue) or the timeout expires. The
Expand Down
2 changes: 2 additions & 0 deletions lib/primesieve/scripts/build_clang_multiarch_win_x64.bat
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
clang++ -I../include -O3 -DNDEBUG -DMULTIARCH_POPCNT_BMI -DMULTIARCH_AVX512 -c ../src/*.cpp ../src/app/*.cpp
clang-cl *.o /link "C:\Program Files\LLVM\lib\clang\17\lib\windows\clang_rt.builtins-x86_64.lib" /OUT:primesieve.exe
1 change: 0 additions & 1 deletion lib/primesieve/src/app/CmdOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
///

#include "CmdOptions.hpp"

#include <primesieve/PrimeSieve.hpp>
#include <primesieve/primesieve_error.hpp>

Expand Down
149 changes: 68 additions & 81 deletions lib/primesieve/src/app/stressTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
/// file in the top level directory.
///

#include <primesieve/iterator.hpp>
#include <primesieve.hpp>
#include <primesieve/macros.hpp>
#include <primesieve/pmath.hpp>
#include <primesieve/PrimeSieve.hpp>
Expand All @@ -37,8 +37,8 @@ using primesieve::Array;
namespace {

/// Lookup table of correct prime count results.
/// primeCounts_1e13[i] = PrimePi(i*1e11) - PrimePi((i-1)*1e11)
/// This test sieves up to 10^13 where most memory fits into
/// primeCounts_1e13[i] = PrimePi(1e13+i*1e11) - PrimePi(1e13+(i-1)*1e11)
/// This test sieves near 10^13 where most memory fits into
/// the CPU's cache. Each thread uses < 5 MiB of memory.
/// This tests puts the highest load on the CPU, but not much
/// load on the RAM.
Expand All @@ -47,34 +47,34 @@ namespace {
///
/// for i in {0..98};
/// do
/// res=$(primesieve $i*1e11 -d1e11 -q);
/// res=$(primesieve 1e13+$i*1e11 -d1e11 -q);
/// printf "$((res))ull, ";
/// if [ $((($i+1) % 5)) -eq 0 ]; then printf "\n"; fi;
/// done
///
const Array<uint64_t, 100> primeCounts_1e13 =
{
/* Start number = */ 0,
4118054813ull, 3889050246ull, 3811334076ull, 3762566522ull, 3727130485ull,
3699365644ull, 3676572524ull, 3657309217ull, 3640604059ull, 3625924432ull,
3612791400ull, 3600947714ull, 3590161711ull, 3580266494ull, 3571130592ull,
3562622357ull, 3554715520ull, 3547310538ull, 3540307017ull, 3533730778ull,
3527508038ull, 3521536373ull, 3515949965ull, 3510579737ull, 3505445520ull,
3500531339ull, 3495833071ull, 3491305095ull, 3486960151ull, 3482753275ull,
3478749572ull, 3474839811ull, 3471053925ull, 3467407632ull, 3463895032ull,
3460433421ull, 3457117553ull, 3453892434ull, 3450773581ull, 3447746462ull,
3444702138ull, 3441863700ull, 3439036659ull, 3436325635ull, 3433611069ull,
3430944750ull, 3428459293ull, 3425915324ull, 3423506752ull, 3421088203ull,
3418770256ull, 3416469253ull, 3414218299ull, 3412006845ull, 3409864335ull,
3407752910ull, 3405685414ull, 3403637619ull, 3401667635ull, 3399692824ull,
3397740437ull, 3395890778ull, 3394044263ull, 3392171356ull, 3390424659ull,
3388640693ull, 3386884666ull, 3385183718ull, 3383444039ull, 3381837156ull,
3380227778ull, 3378598496ull, 3376990296ull, 3375420221ull, 3373915620ull,
3372400737ull, 3370910165ull, 3369407408ull, 3367985168ull, 3366526118ull,
3365100850ull, 3363709833ull, 3362327791ull, 3360990563ull, 3359614618ull,
3358291592ull, 3357002793ull, 3355683015ull, 3354424950ull, 3353137292ull,
3351906327ull, 3350687979ull, 3349462327ull, 3348236947ull, 3347061905ull,
3345852373ull, 3344702803ull, 3343552482ull, 3342407298ull
/* Start number = */ 10000000000000ull,
3340141707ull, 3339037770ull, 3337978139ull, 3336895789ull, 3335816088ull,
3334786528ull, 3333711223ull, 3332674785ull, 3331678168ull, 3330629301ull,
3329607166ull, 3328673627ull, 3327628347ull, 3326668678ull, 3325640524ull,
3324742444ull, 3323791292ull, 3322806916ull, 3321871448ull, 3320978003ull,
3320071119ull, 3319135499ull, 3318180524ull, 3317331622ull, 3316460192ull,
3315535967ull, 3314685498ull, 3313824325ull, 3312975770ull, 3312115313ull,
3311302346ull, 3310438260ull, 3309566639ull, 3308822830ull, 3307965666ull,
3307206437ull, 3306366382ull, 3305523133ull, 3304756621ull, 3303985935ull,
3303188494ull, 3302450534ull, 3301624455ull, 3300931434ull, 3300140636ull,
3299387997ull, 3298659572ull, 3297919672ull, 3297202595ull, 3296420883ull,
3295716204ull, 3294964942ull, 3294305835ull, 3293606447ull, 3292847935ull,
3292190654ull, 3291459406ull, 3290784567ull, 3290083004ull, 3289386555ull,
3288770253ull, 3288049408ull, 3287445692ull, 3286757785ull, 3286108293ull,
3285403869ull, 3284758824ull, 3284148268ull, 3283516237ull, 3282842708ull,
3282210028ull, 3281607239ull, 3280971749ull, 3280348811ull, 3279699440ull,
3279124815ull, 3278501300ull, 3277898840ull, 3277282614ull, 3276682694ull,
3276121352ull, 3275505636ull, 3274928897ull, 3274299689ull, 3273743021ull,
3273135693ull, 3272563375ull, 3272020535ull, 3271457321ull, 3270889981ull,
3270322147ull, 3269766399ull, 3269190820ull, 3268634444ull, 3268093100ull,
3267530619ull, 3267004191ull, 3266440817ull, 3265923128ull
};

/// Lookup table of correct prime count results.
Expand Down Expand Up @@ -145,19 +145,12 @@ void stressTestInfo(const CmdOptions& opts,

if (opts.stressTestMode == "CPU")
{
int threads_1e19 = threads / 5;
int threads_1e13 = threads - threads_1e19;
double avgMiB = (threads_1e13 * 3.0 + threads_1e19 * 1160.0) / threads;
double avgGiB = avgMiB / 1024.0;

if (threads * avgMiB < 1024)
std::cout << std::fixed << std::setprecision(2) << avgMiB << " MiB = "
<< std::fixed << std::setprecision(2) << threads * avgMiB << " MiB.\n";
else
std::cout << std::fixed << std::setprecision(2) << avgMiB << " MiB = "
<< std::fixed << std::setprecision(2) << threads * avgGiB << " GiB.\n";
double sieveSizeKiB = primesieve::get_sieve_size();
double avgMiB = 2.8 + (sieveSizeKiB / 1024.0);
std::cout << std::fixed << std::setprecision(2) << avgMiB << " MiB = "
<< std::fixed << std::setprecision(2) << threads * avgMiB << " MiB.\n";
}
else // stressTestMode == "RAM"
else // RAM stress test
std::cout << "1.16 GiB = " << std::fixed << std::setprecision(2) << threads * 1.16 << " GiB.\n";

std::cout << "The stress test keeps on running until either a miscalculation occurs\n";
Expand Down Expand Up @@ -225,33 +218,36 @@ void printResult(int threadId,
std::size_t maxIndex = primeCounts.size() - 1;
int iPadding = (int) std::to_string(maxIndex).size();
int threadIdPadding = (int) std::to_string(threads).size();
std::ostringstream oss;

if (count == primeCounts[i])
{
std::cout << getDateTime()
<< "Thread " << std::setw(threadIdPadding) << std::right << threadId
<< ", " << std::fixed << std::setprecision(2) << secsThread.count() << " secs"
<< ", PrimePi(" << startStr << std::setw(iPadding) << std::right << i-1 << "e11, "
<< startStr << std::setw(iPadding) << std::right << i << "e11) = " << count << " OK" << std::endl;
oss << getDateTime()
<< "Thread " << std::setw(threadIdPadding) << std::right << threadId << ", "
<< std::fixed << std::setprecision(2) << secsThread.count() << " secs, "
<< "PrimePi(" << startStr << std::setw(iPadding) << std::right << i-1 << "e11, "
<< startStr << std::setw(iPadding) << std::right << i << "e11) = " << count << " OK\n";

std::cout << oss.str() << std::flush;
}
else
{
std::cerr << getDateTime()
<< "Thread " << std::setw(threadIdPadding) << std::right << threadId
<< ", " << std::fixed << std::setprecision(2) << secsThread.count() << " secs"
<< ", PrimePi(" << startStr << std::setw(iPadding) << std::right << i-1 << "e11, "
<< startStr << std::setw(iPadding) << std::right << i << "e11) = " << count << " ERROR" << std::endl;

std::cerr << "\nMiscalculation detected after running for: "
<< getTimeElapsed((int64_t) secsThread.count()) << std::endl;
oss << getDateTime()
<< "Thread " << std::setw(threadIdPadding) << std::right << threadId << ", "
<< std::fixed << std::setprecision(2) << secsThread.count() << " secs, "
<< "PrimePi(" << startStr << std::setw(iPadding) << std::right << i-1 << "e11, "
<< startStr << std::setw(iPadding) << std::right << i << "e11) = " << count << " ERROR\n\n"
<< "Miscalculation detected after running for: " << getTimeElapsed((int64_t) secsThread.count()) << "\n";

std::cerr << oss.str();
}
}

/// Count primes using a PrimeSieve object, on x64 CPUs this
/// uses the POPCNT instruction for counting primes.
/// PrimeSieve objects use a single thread.
///
NOINLINE uint64_t countPrimesAlgo1(uint64_t start, uint64_t stop)
NOINLINE uint64_t countPrimes1(uint64_t start, uint64_t stop)
{
primesieve::PrimeSieve ps;
return ps.countPrimes(start, stop);
Expand All @@ -261,7 +257,7 @@ NOINLINE uint64_t countPrimesAlgo1(uint64_t start, uint64_t stop)
/// PrimeGenerator::fillNextPrimes() method which is
/// vectorized using AVX512 on x64 CPUs.
///
NOINLINE uint64_t countPrimesAlgo2(uint64_t start, uint64_t stop)
NOINLINE uint64_t countPrimes2(uint64_t start, uint64_t stop)
{
primesieve::iterator it(start, stop);
it.generate_next_primes();
Expand All @@ -275,6 +271,20 @@ NOINLINE uint64_t countPrimesAlgo2(uint64_t start, uint64_t stop)
return count;
}

/// We use 2 different algorithms for counting primes in order
/// to use as many of the CPU's resources as possible. All
/// threads alternately execute algorithm 1 and 2.
///
uint64_t countPrimes(uint64_t threadIndex,
uint64_t start,
uint64_t stop)
{
if (threadIndex % 2)
return countPrimes1(start, stop);
else
return countPrimes2(start, stop);
}

} // namespace

void stressTest(const CmdOptions& opts)
Expand Down Expand Up @@ -308,20 +318,12 @@ void stressTest(const CmdOptions& opts)
{
for (; i < primeCounts.size(); i++)
{
auto t1 = std::chrono::system_clock::now();
uint64_t ChunkSize = (uint64_t) 1e11;
uint64_t threadStart = start + ChunkSize * (i - 1);
uint64_t threadStop = threadStart + ChunkSize;
uint64_t count;

// We use 2 different algorithms for counting primes in order
// to use as many of the CPU's resources as possible.
// All threads alternately execute algorithm 1 and algorithm 2.
if (i % 2)
count = countPrimesAlgo1(threadStart, threadStop);
else
count = countPrimesAlgo2(threadStart, threadStop);

auto t1 = std::chrono::system_clock::now();
uint64_t count = countPrimes(i, threadStart, threadStop);
auto t2 = std::chrono::system_clock::now();
std::chrono::duration<double> secsThread = t2 - t1;

Expand Down Expand Up @@ -360,7 +362,7 @@ void stressTest(const CmdOptions& opts)
if (secsStatus.count() >= statusOutputDelay)
{
lastStatusOutput = t2;
statusOutputDelay += 5;
statusOutputDelay += 7;
statusOutputDelay = std::min(statusOutputDelay, 600);
printResult(threadId, threads, i, count, secsThread, primeCounts);
}
Expand All @@ -370,21 +372,13 @@ void stressTest(const CmdOptions& opts)
}
catch (const std::bad_alloc&)
{
std::ostringstream oss;
if (statusOutputDelay > 0)
oss << std::endl;

oss << "ERROR: failed to allocate memory!" << std::endl;
std::cerr << oss.str();
std::cerr << "ERROR: failed to allocate memory!\n";
std::exit(1);
}
catch (const std::exception& e)
{
std::ostringstream oss;
if (statusOutputDelay > 0)
oss << std::endl;

oss << "ERROR: " << e.what() << std::endl;
oss << "ERROR: " << e.what() << "\n";
std::cerr << oss.str();
std::exit(1);
}
Expand All @@ -396,16 +390,9 @@ void stressTest(const CmdOptions& opts)
Vector<std::thread> workerThreads;
workerThreads.reserve(threads);

// We create 1 thread per CPU core
for (int threadId = 1; threadId <= threads; threadId++)
{
// In CPU stress test mode, we also run 20% of the threads using
// the RAM stress test (threadId % 5 != 0). Since most PCs are
// memory bound e.g. Desktop PC CPUs frequently only have 2 memory
// channels we don't want to use too many RAM stress test threads
// otherwise the threads might become idle due to the limited
// memory bandwidth.
if (opts.stressTestMode == "CPU" && threadId % 5 != 0)
if (opts.stressTestMode == "CPU")
workerThreads.emplace_back(task, threadId, primeCounts_1e13);
else // RAM stress test
workerThreads.emplace_back(task, threadId, primeCounts_1e19);
Expand Down

0 comments on commit 6078c4d

Please sign in to comment.