diff --git a/PrimeCPP/solution_2/.gitignore b/PrimeCPP/solution_2/.gitignore index 5b1fd1a1c..2cafdba9f 100644 --- a/PrimeCPP/solution_2/.gitignore +++ b/PrimeCPP/solution_2/.gitignore @@ -1 +1,2 @@ -.vscore/** \ No newline at end of file +.vscore/** +*.exe \ No newline at end of file diff --git a/PrimeCPP/solution_2/Dockerfile b/PrimeCPP/solution_2/Dockerfile index f6c0ceb2f..e3a2dd0b0 100644 --- a/PrimeCPP/solution_2/Dockerfile +++ b/PrimeCPP/solution_2/Dockerfile @@ -1,13 +1,19 @@ FROM ubuntu:22.04 AS build RUN apt-get update -qq \ - && apt-get install -y clang + && apt-get install -y bash clang WORKDIR /opt/app COPY *.cpp . -RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array \ + && clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask FROM ubuntu:22.04 -COPY --from=build /opt/app/primes_par /usr/local/bin -ENTRYPOINT [ "primes_par", "-l", "1000000" ] \ No newline at end of file +COPY --from=build /opt/app/primes_array /opt/app/primes_mask /opt/app/ + +WORKDIR /opt/app +COPY benchmark.sh . + +ENTRYPOINT [ "./benchmark.sh"] +CMD ["both", "-l", "1000000" ] \ No newline at end of file diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_array.cpp similarity index 98% rename from PrimeCPP/solution_2/PrimeCPP_PAR.cpp rename to PrimeCPP/solution_2/PrimeCPP_array.cpp index 3345d6505..4edd24af0 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_array.cpp @@ -206,7 +206,7 @@ class prime_sieve // Following 2 lines added by rbergen to conform to drag race output format cout << "\n"; - cout << "davepl_par;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + cout << "davepl_array;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; } }; diff --git a/PrimeCPP/solution_2/PrimeCPP_mask.cpp b/PrimeCPP/solution_2/PrimeCPP_mask.cpp new file mode 100644 index 000000000..90a871c3d --- /dev/null +++ b/PrimeCPP/solution_2/PrimeCPP_mask.cpp @@ -0,0 +1,430 @@ +// --------------------------------------------------------------------------- +// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve +// Some great ideas taken from Rust's implementation from Michael Barber +// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) +// --------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::chrono; + +const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; + +class BitArray { + uint32_t *array; + size_t logicalSize; + + inline static size_t arraySize(size_t size) + { + return (size >> 5) + ((size & 31) > 0); + } + + inline static size_t index(size_t n) + { + return (n >> 5); + } + + inline static uint32_t getSubindex(size_t n, uint32_t d) + { + return d & uint32_t(0x01) << (n % 32); + } + + inline void setFalseSubindex(size_t n, uint32_t &d) + { + d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); + } + +public: + explicit BitArray(size_t size) : logicalSize(size) + { + array = new uint32_t[arraySize(size)]; + std::memset(array, 0xFF, arraySize(size) * sizeof(uint32_t)); + } + + ~BitArray() {delete [] array;} + + bool get(size_t n) const + { + return getSubindex(n, array[index(n)]); + } + + static constexpr uint32_t rol(uint32_t x, uint32_t n) + { + return (x<>(32-n)); + } + + static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) + { + uint32_t mask = 0; + for (size_t i = offset; i < 32; i += skip) { + mask |= (1u << i); + } + return ~mask; + } + + void setFlagsFalse(size_t n, size_t skip) + { + if (skip <= 12) { + // For small skips, use pre-built mask approach + size_t word_idx = index(n); + size_t bit_pos = n % 32; + size_t curr_n = n; + + while (curr_n < size()) + { + // Build mask for current word starting at bit_pos + uint32_t mask = buildSkipMask(skip, bit_pos); + + // Apply mask to current word + array[word_idx] &= mask; + + // Move to next word + size_t bits_remaining = 32 - bit_pos; + curr_n += ((bits_remaining + skip - 1) / skip) * skip; + + if (curr_n >= size()) break; + + word_idx = index(curr_n); + bit_pos = curr_n % 32; + } + } + else + { + // Original implementation for larger skips + auto rolling_mask = ~uint32_t(1 << (n % 32)); + auto roll_bits = skip % 32; + while (n < size()) { + array[index(n)] &= rolling_mask; + n += skip; + rolling_mask = rol(rolling_mask, roll_bits); + } + } + } + + inline size_t size() const + { + return logicalSize; + } +}; + + +// prime_sieve +// +// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) +// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. + +class prime_sieve +{ + private: + + BitArray Bits; // Sieve data, where 1==prime, 0==not + + public: + + prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) + { + } + + ~prime_sieve() + { + } + + // runSieve + // + // Scan the array for the next factor (>2) that hasn't yet been eliminated from the array, and then + // walk through the array crossing off every multiple of that factor. + + void runSieve() + { + uint64_t factor = 3; + uint64_t q = (int) sqrt(Bits.size()); + + while (factor <= q) + { + for (uint64_t num = factor; num < Bits.size(); num += 2) + { + if (Bits.get(num)) + { + factor = num; + break; + } + } + Bits.setFlagsFalse(factor * factor, factor + factor); + + factor += 2; + } + } + + // countPrimes + // + // Can be called after runSieve to determine how many primes were found in total + + size_t countPrimes() const + { + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (int i = 3; i < Bits.size(); i+=2) + if (Bits.get(i)) + count++; + return count; + } + + // isPrime + // + // Can be called after runSieve to determine whether a given number is prime. + + bool isPrime(uint64_t n) const + { + if (n & 1) + return Bits.get(n); + else + return false; + } + + // validateResults + // + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // sieve processing at all, only to sanity check that the results are right when done. + + bool validateResults() const + { + const std::map resultsDictionary = + { + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 1'000LLU, 168 }, + { 10'000LLU, 1229 }, + { 100'000LLU, 9592 }, + { 1'000'000LLU, 78498 }, + { 10'000'000LLU, 664579 }, + { 100'000'000LLU, 5761455 }, + { 1'000'000'000LLU, 50847534 }, + { 10'000'000'000LLU, 455052511 }, + }; + if (resultsDictionary.end() == resultsDictionary.find(Bits.size())) + return false; + return resultsDictionary.find(Bits.size())->second == countPrimes(); + } + + // printResults + // + // Displays stats about what was found as well as (optionally) the primes themselves + + void printResults(bool showResults, double duration, size_t passes, size_t threads) const + { + if (showResults) + cout << "2, "; + + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num+=2) + { + if (Bits.get(num)) + { + if (showResults) + cout << num << ", "; + count++; + } + } + + if (showResults) + cout << "\n"; + + cout << "Passes: " << passes << ", " + << "Threads: " << threads << ", " + << "Time: " << duration << ", " + << "Average: " << duration/passes << ", " + << "Limit: " << Bits.size() << ", " + << "Counts: " << count << "/" << countPrimes() << ", " + << "Valid : " << (validateResults() ? "Pass" : "FAIL!") + << "\n"; + + // Following 2 lines added by rbergen to conform to drag race output format + cout << "\n"; + cout << "davepl_mask;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + } + +}; + +// custom_atoll +// +// Like atoll(), but accepts K, M, G, and T as magnitude suffixes. + +long long custom_atoll(const std::string& value_str) { + static const std::unordered_map suffixes = { + {'K', 1000LL}, + {'M', 1000000LL}, + {'G', 1000000000LL}, + {'T', 1000000000000LL} + }; + + std::string input_str = value_str; + for (char& c : input_str) { + c = std::toupper(c); + } + + char last_char = input_str.back(); + if (suffixes.find(last_char) != suffixes.end()) { + long long multiplier = suffixes.at(last_char); + std::string numeric_part = input_str.substr(0, input_str.size() - 1); + std::istringstream iss(numeric_part); + double numeric_value; + if (!(iss >> numeric_value)) { + throw std::invalid_argument("Invalid numeric part: " + numeric_part); + } + return static_cast(numeric_value * multiplier); + } + + std::istringstream iss(input_str); + long long result; + if (!(iss >> result)) { + throw std::invalid_argument("Invalid input format"); + } + return result; +} + +int main(int argc, char **argv) +{ + vector args(argv + 1, argv + argc); // From first to last argument in the argv array + uint64_t ullLimitRequested = 0; + auto cThreadsRequested = 0; + auto cSecondsRequested = 0; + auto bPrintPrimes = false; + auto bOneshot = false; + auto bQuiet = false; + + // Process command-line args + + for (auto i = args.begin(); i != args.end(); ++i) + { + if (*i == "-h" || *i == "--help") { + cout << "Syntax: " << argv[0] << " [-t,--threads threads] [-s,--seconds seconds] [-l,--limit limit] [-1,--oneshot] [-q,--quiet] [-h] " << endl; + return 0; + } + else if (*i == "-t" || *i == "--threads") + { + i++; + cThreadsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-s" || *i == "--seconds") + { + i++; + cSecondsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-l" || *i == "--limit") + { + i++; + ullLimitRequested = (i == args.end()) ? 0LL : max((long long)1, custom_atoll(i->c_str())); + } + else if (*i == "-1" || *i == "--oneshot") + { + bOneshot = true; + cThreadsRequested = 1; + } + else if (*i == "-p" || *i == "--print") + { + bPrintPrimes = true; + } + else if (*i == "-q" || *i == "--quiet") + { + bQuiet = true; + } + else + { + fprintf(stderr, "Unknown argument: %s", i->c_str()); + return 0; + } + } + + if (!bQuiet) + { + cout << "Primes Benchmark (c) 2021 Dave's Garage - http://github.com/davepl/primes" << endl; + cout << "-------------------------------------------------------------------------" << endl; + } + + if (bOneshot) + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + + if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) + { + cout << "Oneshot option cannot be mixed with second count or thread count." << endl; + return 0; + } + + auto cPasses = 0; + auto cSeconds = (cSecondsRequested ? cSecondsRequested : 5); + auto cThreads = (cThreadsRequested ? cThreadsRequested : thread::hardware_concurrency()); + auto llUpperLimit = (ullLimitRequested ? ullLimitRequested : DEFAULT_UPPER_LIMIT); + + if (!bQuiet) + { + printf("Computing primes to %llu on %d thread%s for %d second%s.\n", + (unsigned long long)llUpperLimit, + cThreads, + cThreads == 1 ? "" : "s", + cSeconds, + cSeconds == 1 ? "" : "s" + ); + } + double duration; + + if (bOneshot) + { + auto tStart = steady_clock::now(); + prime_sieve(llUpperLimit).runSieve(); + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + else + { + auto tStart = steady_clock::now(); + std::thread threads[cThreads]; + uint64_t l_passes[cThreads]; + for (unsigned int i = 0; i < cThreads; i++) + threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) + { + l_passes[i] = 0; + while (duration_cast(steady_clock::now() - tStart).count() < 5) { + prime_sieve(llUpperLimit).runSieve(); + ++l_passes[i]; + } + }, llUpperLimit); + for (auto i = 0; i < cThreads; i++) { + threads[i].join(); + cPasses += l_passes[i]; + } + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + + + if (bOneshot) + { + cPasses = 1.0 / duration * 5; + duration = 5.0; + } + + prime_sieve checkSieve(llUpperLimit); + checkSieve.runSieve(); + auto result = checkSieve.validateResults() ? checkSieve.countPrimes() : 0; + + if (!bQuiet) + checkSieve.printResults(bPrintPrimes, duration , cPasses, cThreads); + else + cout << cPasses << ", " << duration / cPasses << endl; + + // On success return the count of primes found; on failure, return 0 + + return (int) result; +} diff --git a/PrimeCPP/solution_2/README.md b/PrimeCPP/solution_2/README.md index d4dd3405a..7c67d9449 100644 --- a/PrimeCPP/solution_2/README.md +++ b/PrimeCPP/solution_2/README.md @@ -8,13 +8,16 @@ ## Run instructions -(Linux): clang++ -march=native -mtune=native -Ofast -pthread -std=c++17 PrimeCPP_PAR.cpp -o Primes_clang++ && ./Primes_clang++ +(Linux): `./run.sh` +(Windows): `.\run.cmd` ## Output +```text Primes Benchmark (c) 2021 Dave's Garage - http://github.com/davepl/primes ------------------------------------------------------------------------- Computing primes to 1000000 on 24 threads for 5 seconds. Passes: 185267, Threads: 24, Time: 5.00074, Average: 2.69921e-05, Limit: 1000000, Counts: 78498/78498, Valid : Pass -davepl_par;185267;5.00074;24;algorithm=base,faithful=yes,bits=1 +davepl_array;185267;5.00074;24;algorithm=base,faithful=yes,bits=1 +``` diff --git a/PrimeCPP/solution_2/benchmark.sh b/PrimeCPP/solution_2/benchmark.sh new file mode 100755 index 000000000..a1cd0ea64 --- /dev/null +++ b/PrimeCPP/solution_2/benchmark.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +if [[ $1 == both || $1 == 1 || $1 == array ]]; then + ./primes_array ${@:2} +fi + +if [[ $1 == both || $1 == 2 || $1 == mask ]]; then + ./primes_mask ${@:2} +fi diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe deleted file mode 100755 index c815cbfcf..000000000 Binary files a/PrimeCPP/solution_2/primes_par.exe and /dev/null differ diff --git a/PrimeCPP/solution_2/run.cmd b/PrimeCPP/solution_2/run.cmd index 3390a0df2..ba425b7f4 100644 --- a/PrimeCPP/solution_2/run.cmd +++ b/PrimeCPP/solution_2/run.cmd @@ -1,2 +1,27 @@ -g++ -Ofast PrimeCPP_PAR.cpp -std=c++17 -lstdc++ -oPrimes_par_gcc.exe -.\Primes_par_gcc.exe +@ECHO OFF + +SET "_RUN_ARRAY=0" +IF [%1] == [] SET "_RUN_ARRAY=1" +IF [%1] == [1] SET "_RUN_ARRAY=1" +IF [%1] == [array] SET "_RUN_ARRAY=1" +IF %_RUN_ARRAY% == 1 ( + ECHO Building and running the array approach... + ECHO: + g++ -Ofast PrimeCPP_array.cpp -std=c++17 -lstdc++ -oPrimes_array.exe + .\Primes_array.exe + ECHO: +) +SET _RUN_ARRAY= + +SET "_RUN_MASK=0" +IF [%1] == [] SET "_RUN_MASK=1" +IF [%1] == [2] SET "_RUN_MASK=1" +IF [%1] == [mask] SET "_RUN_MASK=1" +IF %_RUN_MASK% == 1 ( + ECHO Building and running the mask approach... + ECHO: + g++ -Ofast PrimeCPP_mask.cpp -std=c++17 -lstdc++ -oPrimes_mask.exe + .\Primes_mask.exe + ECHO: +) +SET _RUN_MASK= diff --git a/PrimeCPP/solution_2/run.sh b/PrimeCPP/solution_2/run.sh index ea150e106..3ecc9be9c 100755 --- a/PrimeCPP/solution_2/run.sh +++ b/PrimeCPP/solution_2/run.sh @@ -1,6 +1,19 @@ +#!/bin/bash + # g++ -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes.exe # gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe # clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe -clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par.exe -./primes_par.exe +if [[ $# == 0 || $1 == 1 || $1 == array ]]; then + echo -e "Building and running the array approach...\n" + clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array.exe + ./primes_array.exe + echo +fi + +if [[ $# == 0 || $1 == 2 || $1 == mask ]]; then + echo -e "Building and running the mask approach...\n" + clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask.exe + ./primes_mask.exe + echo +fi