Skip to content

Commit

Permalink
Merge pull request #16 from itzmeanjan/switch-to-using-std-span
Browse files Browse the repository at this point in the history
Switch to using C++20's `std::span` -based Interface
  • Loading branch information
itzmeanjan committed Jan 13, 2024
2 parents 9b53454 + ce1acd2 commit 9e76ee0
Show file tree
Hide file tree
Showing 50 changed files with 1,923 additions and 1,380 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
[submodule "sha3"]
path = sha3
url = https://github.com/itzmeanjan/sha3.git
[submodule "gtest-parallel"]
path = gtest-parallel
url = https://github.com/google/gtest-parallel.git
10 changes: 7 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@ TEST_BINARY = $(BUILD_DIR)/test.out

BENCHMARK_DIR = benchmarks
BENCHMARK_SOURCES := $(wildcard $(BENCHMARK_DIR)/*.cpp)
BENCHMARK_HEADERS := $(wildcard $(BENCHMARK_DIR)/*.hpp)
BENCHMARK_OBJECTS := $(addprefix $(BUILD_DIR)/, $(notdir $(patsubst %.cpp,%.o,$(BENCHMARK_SOURCES))))
BENCHMARK_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpthread
BENCHMARK_BINARY = $(BUILD_DIR)/bench.out
PERF_LINK_FLAGS = -lbenchmark -lbenchmark_main -lpfm -lpthread
PERF_BINARY = $(BUILD_DIR)/perf.out
GTEST_PARALLEL = ./gtest-parallel/gtest-parallel

all: test

Expand All @@ -34,14 +36,16 @@ $(BUILD_DIR):
$(SHA3_INC_DIR):
git submodule update --init

$(GTEST_PARALLEL): $(SHA3_INC_DIR)

$(BUILD_DIR)/%.o: $(TEST_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR)
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@

$(TEST_BINARY): $(TEST_OBJECTS)
$(CXX) $(OPT_FLAGS) $(LINK_FLAGS) $^ $(TEST_LINK_FLAGS) -o $@

test: $(TEST_BINARY)
./$< --gtest_shuffle --gtest_random_seed=0
test: $(TEST_BINARY) $(GTEST_PARALLEL)
$(GTEST_PARALLEL) $< --print_test_times

$(BUILD_DIR)/%.o: $(BENCHMARK_DIR)/%.cpp $(BUILD_DIR) $(SHA3_INC_DIR)
$(CXX) $(CXX_FLAGS) $(WARN_FLAGS) $(OPT_FLAGS) $(I_FLAGS) $(DEP_IFLAGS) -c $< -o $@
Expand All @@ -65,5 +69,5 @@ perf: $(PERF_BINARY)
clean:
rm -rf $(BUILD_DIR)

format: $(SPHINCS+_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES)
format: $(SPHINCS+_SOURCES) $(TEST_SOURCES) $(BENCHMARK_SOURCES) $(BENCHMARK_HEADERS)
clang-format -i $^
721 changes: 496 additions & 225 deletions README.md

Large diffs are not rendered by default.

6 changes: 6 additions & 0 deletions benchmarks/bench_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#pragma once
#include <algorithm>
#include <vector>

const auto compute_min = [](const std::vector<double>& v) -> double { return *std::min_element(v.begin(), v.end()); };
const auto compute_max = [](const std::vector<double>& v) -> double { return *std::max_element(v.begin(), v.end()); };
239 changes: 120 additions & 119 deletions benchmarks/bench_sphincs+.hpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#pragma once
#include "prng.hpp"
#include "sphincs+.hpp"
#include "x86_64_cpu_cycles.hpp"
#include "x86_64_cpu_ticks.hpp"
#include <benchmark/benchmark.h>
#include <cassert>

Expand All @@ -12,56 +13,56 @@ template<const size_t n, const uint32_t h, const uint32_t d, const size_t w, con
static inline void
keygen(benchmark::State& state)
{
namespace utils = sphincs_plus_utils;
constexpr size_t pklen = utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = utils::get_sphincs_skey_len<n>();

uint8_t* sk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* sk_prf = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pkey = static_cast<uint8_t*>(std::malloc(pklen));
uint8_t* skey = static_cast<uint8_t*>(std::malloc(sklen));

sphincs_plus_utils::random_data<uint8_t>(sk_seed, n);
sphincs_plus_utils::random_data<uint8_t>(sk_prf, n);
sphincs_plus_utils::random_data<uint8_t>(pk_seed, n);
constexpr size_t pklen = sphincs_plus_utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = sphincs_plus_utils::get_sphincs_skey_len<n>();

std::vector<uint8_t> sk_seed(n, 0);
std::vector<uint8_t> sk_prf(n, 0);
std::vector<uint8_t> pk_seed(n, 0);
std::vector<uint8_t> pkey(pklen, 0);
std::vector<uint8_t> skey(sklen, 0);

auto _sk_seed = std::span<uint8_t, n>(sk_seed);
auto _sk_prf = std::span<uint8_t, n>(sk_prf);
auto _pk_seed = std::span<uint8_t, n>(pk_seed);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);

prng::prng_t prng;
prng.read(_sk_seed);
prng.read(_sk_prf);
prng.read(_pk_seed);

#ifdef __x86_64__
uint64_t total_cycles = 0ul;
uint64_t total_ticks = 0ul;
#endif

for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_cycles();
const uint64_t start = cpu_ticks();
#endif

sphincs_plus::keygen<n, h, d, w, v>(sk_seed, sk_prf, pk_seed, skey, pkey);
sphincs_plus::keygen<n, h, d, w, v>(_sk_seed, _sk_prf, _pk_seed, _skey, _pkey);

benchmark::DoNotOptimize(sk_seed);
benchmark::DoNotOptimize(sk_prf);
benchmark::DoNotOptimize(pk_seed);
benchmark::DoNotOptimize(skey);
benchmark::DoNotOptimize(pkey);
benchmark::DoNotOptimize(_sk_seed);
benchmark::DoNotOptimize(_sk_prf);
benchmark::DoNotOptimize(_pk_seed);
benchmark::DoNotOptimize(_skey);
benchmark::DoNotOptimize(_pkey);
benchmark::ClobberMemory();

#ifdef __x86_64__
const uint64_t end = cpu_cycles();
total_cycles += (end - start);
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}

state.SetItemsProcessed(state.iterations());

#ifdef __x86_64__
total_cycles /= static_cast<uint64_t>(state.iterations());
state.counters["average_cpu_cycles"] = static_cast<double>(total_cycles);
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif

std::free(sk_seed);
std::free(sk_prf);
std::free(pk_seed);
std::free(pkey);
std::free(skey);
}

// Benchmark SPHINCS+ signing algorithm
Expand All @@ -76,74 +77,73 @@ template<const size_t n,
static inline void
sign(benchmark::State& state)
{
namespace utils = sphincs_plus_utils;
constexpr size_t pklen = utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = utils::get_sphincs_skey_len<n>();
constexpr size_t siglen = utils::get_sphincs_sig_len<n, h, d, a, k, w>();
constexpr size_t mlen = 32;

uint8_t* sk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* sk_prf = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pkey = static_cast<uint8_t*>(std::malloc(pklen));
uint8_t* skey = static_cast<uint8_t*>(std::malloc(sklen));
uint8_t* msg = static_cast<uint8_t*>(std::malloc(mlen));
uint8_t* rand_bytes = static_cast<uint8_t*>(std::malloc(n));
uint8_t* sig = static_cast<uint8_t*>(std::malloc(siglen));

sphincs_plus_utils::random_data<uint8_t>(sk_seed, n);
sphincs_plus_utils::random_data<uint8_t>(sk_prf, n);
sphincs_plus_utils::random_data<uint8_t>(pk_seed, n);

sphincs_plus::keygen<n, h, d, w, v>(sk_seed, sk_prf, pk_seed, skey, pkey);

sphincs_plus_utils::random_data<uint8_t>(msg, mlen);
sphincs_plus_utils::random_data<uint8_t>(rand_bytes, n);
constexpr size_t pklen = sphincs_plus_utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = sphincs_plus_utils::get_sphincs_skey_len<n>();
constexpr size_t siglen = sphincs_plus_utils::get_sphincs_sig_len<n, h, d, a, k, w>();
const size_t mlen = state.range();

std::vector<uint8_t> sk_seed(n, 0);
std::vector<uint8_t> sk_prf(n, 0);
std::vector<uint8_t> pk_seed(n, 0);
std::vector<uint8_t> pkey(pklen, 0);
std::vector<uint8_t> skey(sklen, 0);
std::vector<uint8_t> msg(mlen, 0);
std::vector<uint8_t> rand_bytes(n, 0);
std::vector<uint8_t> sig(siglen, 0);

auto _sk_seed = std::span<uint8_t, n>(sk_seed);
auto _sk_prf = std::span<uint8_t, n>(sk_prf);
auto _pk_seed = std::span<uint8_t, n>(pk_seed);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
auto _msg = std::span(msg);
auto _rand_bytes = std::span<uint8_t, n>(rand_bytes);
auto _sig = std::span<uint8_t, siglen>(sig);

prng::prng_t prng;
prng.read(_sk_seed);
prng.read(_sk_prf);
prng.read(_pk_seed);
prng.read(_msg);
prng.read(_rand_bytes);

sphincs_plus::keygen<n, h, d, w, v>(_sk_seed, _sk_prf, _pk_seed, _skey, _pkey);

#ifdef __x86_64__
uint64_t total_cycles = 0ul;
uint64_t total_ticks = 0ul;
#endif

for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_cycles();
const uint64_t start = cpu_ticks();
#endif

if constexpr (randomize) {
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(msg, mlen, skey, rand_bytes, sig);
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(_msg, _skey, _rand_bytes, _sig);
} else {
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(msg, mlen, skey, nullptr, sig);
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(_msg, _skey, {}, _sig);
}

benchmark::DoNotOptimize(msg);
benchmark::DoNotOptimize(skey);
benchmark::DoNotOptimize(_msg);
benchmark::DoNotOptimize(_skey);
if constexpr (randomize) {
benchmark::DoNotOptimize(rand_bytes);
benchmark::DoNotOptimize(_rand_bytes);
}
benchmark::DoNotOptimize(sig);
benchmark::DoNotOptimize(_sig);
benchmark::ClobberMemory();

#ifdef __x86_64__
const uint64_t end = cpu_cycles();
total_cycles += (end - start);
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}

state.SetItemsProcessed(state.iterations());

#ifdef __x86_64__
total_cycles /= static_cast<uint64_t>(state.iterations());
state.counters["average_cpu_cycles"] = static_cast<double>(total_cycles);
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif

std::free(sk_seed);
std::free(sk_prf);
std::free(pk_seed);
std::free(pkey);
std::free(skey);
std::free(msg);
std::free(rand_bytes);
std::free(sig);
}

// Benchmark SPHINCS+ signature verification algorithm
Expand All @@ -158,74 +158,75 @@ template<const size_t n,
static inline void
verify(benchmark::State& state)
{
namespace utils = sphincs_plus_utils;
constexpr size_t pklen = utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = utils::get_sphincs_skey_len<n>();
constexpr size_t siglen = utils::get_sphincs_sig_len<n, h, d, a, k, w>();
constexpr size_t mlen = 32;

uint8_t* sk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* sk_prf = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pk_seed = static_cast<uint8_t*>(std::malloc(n));
uint8_t* pkey = static_cast<uint8_t*>(std::malloc(pklen));
uint8_t* skey = static_cast<uint8_t*>(std::malloc(sklen));
uint8_t* msg = static_cast<uint8_t*>(std::malloc(mlen));
uint8_t* rand_bytes = static_cast<uint8_t*>(std::malloc(n));
uint8_t* sig = static_cast<uint8_t*>(std::malloc(siglen));

sphincs_plus_utils::random_data<uint8_t>(sk_seed, n);
sphincs_plus_utils::random_data<uint8_t>(sk_prf, n);
sphincs_plus_utils::random_data<uint8_t>(pk_seed, n);
sphincs_plus_utils::random_data<uint8_t>(msg, mlen);
sphincs_plus_utils::random_data<uint8_t>(rand_bytes, n);

sphincs_plus::keygen<n, h, d, w, v>(sk_seed, sk_prf, pk_seed, skey, pkey);
constexpr size_t pklen = sphincs_plus_utils::get_sphincs_pkey_len<n>();
constexpr size_t sklen = sphincs_plus_utils::get_sphincs_skey_len<n>();
constexpr size_t siglen = sphincs_plus_utils::get_sphincs_sig_len<n, h, d, a, k, w>();
const size_t mlen = state.range();

std::vector<uint8_t> sk_seed(n, 0);
std::vector<uint8_t> sk_prf(n, 0);
std::vector<uint8_t> pk_seed(n, 0);
std::vector<uint8_t> pkey(pklen, 0);
std::vector<uint8_t> skey(sklen, 0);
std::vector<uint8_t> msg(mlen, 0);
std::vector<uint8_t> rand_bytes(n, 0);
std::vector<uint8_t> sig(siglen, 0);

auto _sk_seed = std::span<uint8_t, n>(sk_seed);
auto _sk_prf = std::span<uint8_t, n>(sk_prf);
auto _pk_seed = std::span<uint8_t, n>(pk_seed);
auto _pkey = std::span<uint8_t, pklen>(pkey);
auto _skey = std::span<uint8_t, sklen>(skey);
auto _msg = std::span(msg);
auto _rand_bytes = std::span<uint8_t, n>(rand_bytes);
auto _sig = std::span<uint8_t, siglen>(sig);

prng::prng_t prng;
prng.read(_sk_seed);
prng.read(_sk_prf);
prng.read(_pk_seed);
prng.read(_msg);
prng.read(_rand_bytes);

sphincs_plus::keygen<n, h, d, w, v>(_sk_seed, _sk_prf, _pk_seed, _skey, _pkey);

if constexpr (randomize) {
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(msg, mlen, skey, rand_bytes, sig);
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(_msg, _skey, _rand_bytes, _sig);
} else {
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(msg, mlen, skey, nullptr, sig);
sphincs_plus::sign<n, h, d, a, k, w, v, randomize>(_msg, _skey, {}, _sig);
}

#ifdef __x86_64__
uint64_t total_cycles = 0ul;
uint64_t total_ticks = 0ul;
#endif

bool flag = true;
for (auto _ : state) {
#ifdef __x86_64__
const uint64_t start = cpu_cycles();
const uint64_t start = cpu_ticks();
#endif

flag &= sphincs_plus::verify<n, h, d, a, k, w, v>(msg, mlen, sig, pkey);
flag &= sphincs_plus::verify<n, h, d, a, k, w, v>(_msg, _sig, _pkey);

benchmark::DoNotOptimize(flag);
benchmark::DoNotOptimize(msg);
benchmark::DoNotOptimize(sig);
benchmark::DoNotOptimize(pkey);
benchmark::DoNotOptimize(_msg);
benchmark::DoNotOptimize(_sig);
benchmark::DoNotOptimize(_pkey);
benchmark::ClobberMemory();

#ifdef __x86_64__
const uint64_t end = cpu_cycles();
total_cycles += (end - start);
const uint64_t end = cpu_ticks();
total_ticks += (end - start);
#endif
}

assert(flag);
state.SetItemsProcessed(state.iterations());

#ifdef __x86_64__
total_cycles /= static_cast<uint64_t>(state.iterations());
state.counters["average_cpu_cycles"] = static_cast<double>(total_cycles);
total_ticks /= static_cast<uint64_t>(state.iterations());
state.counters["rdtsc"] = static_cast<double>(total_ticks);
#endif

std::free(sk_seed);
std::free(sk_prf);
std::free(pk_seed);
std::free(pkey);
std::free(skey);
std::free(msg);
std::free(rand_bytes);
std::free(sig);
}

}
Loading

0 comments on commit 9e76ee0

Please sign in to comment.