diff --git a/CMakeLists.txt b/CMakeLists.txt index b57e6087..a63c4902 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,21 +73,18 @@ option(KAMINPAR_COMPRESSION_EDGE_WEIGHTS "Whether to compress edge weights." ON) option(KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING "Use high-degree encoding for the compressed graph." ON) option(KAMINPAR_COMPRESSION_INTERVAL_ENCODING "Use interval encoding for the compressed graph." ON) option(KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING "Use run-length encoding for the compressed graph." OFF) -option(KAMINPAR_COMPRESSION_STREAM_ENCODING "Use stream encoding for the compressed graph." OFF) +option(KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING "Use StreamVByte encoding for the compressed graph." OFF) option(KAMINPAR_COMPRESSION_FAST_DECODING "Use fast decoding for the compressed graph." OFF) -option(KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION "Whether all isolated nodes are the last nodes of the input graph" OFF) if (KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING AND KAMINPAR_COMPRESSION_STREAM_ENCODING) - message(FATAL_ERROR "Either run-length or stream encoding can be used for varints but not both.") + message(FATAL_ERROR "Either run-length or StreamVByte encoding can be used for varints but not both.") endif () if (KAMINPAR_64BIT_NODE_IDS AND KAMINPAR_COMPRESSION_STREAM_ENCODING) - message(FATAL_ERROR "Stream encoding cannot be used with 64-bit NodeIDs.") + message(FATAL_ERROR "StreamVByte encoding cannot be used with 64-bit NodeIDs.") endif () -if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_STREAM_ENCODING) - message(FATAL_ERROR "Stream encoding cannot be used together with compressed edge weights.") -elseif (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING) +if (KAMINPAR_COMPRESSION_EDGE_WEIGHTS AND KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING) message(FATAL_ERROR "Run-length encoding cannot be used together with compressed edge weights.") endif () @@ -245,11 +242,11 @@ else () message(" Run-length encoding: disabled") endif () -if (KAMINPAR_COMPRESSION_STREAM_ENCODING) - list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAM_ENCODING") - message(" Stream encoding: enabled") +if (KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING) + list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING") + message(" StreamVByte encoding: enabled") else () - message(" Stream encoding: disabled") + message(" StreamVByte encoding: disabled") endif () if (KAMINPAR_COMPRESSION_FAST_DECODING) @@ -260,13 +257,6 @@ else () message(" Fast decoding: disabled") endif () -if (KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION) - list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION") - message(" Isolated nodes separation: enabled") -else () - message(" Isolated nodes separation: disabled") -endif () - if (KAMINPAR_64BIT_NODE_IDS OR KAMINPAR_64BIT_IDS) list(APPEND KAMINPAR_DEFINITIONS "-DKAMINPAR_64BIT_NODE_IDS") set(KAMINPAR_SHM_NODE_ID_STR "std::uint64_t") diff --git a/apps/benchmarks/shm_variable_length_codec_benchmark.cc b/apps/benchmarks/shm_variable_length_codec_benchmark.cc index 746adc97..3bbc858a 100644 --- a/apps/benchmarks/shm_variable_length_codec_benchmark.cc +++ b/apps/benchmarks/shm_variable_length_codec_benchmark.cc @@ -13,9 +13,9 @@ #include "kaminpar-cli/CLI11.h" #include "kaminpar-common/console_io.h" -#include "kaminpar-common/graph-compression/varint_codec.h" -#include "kaminpar-common/graph-compression/varint_run_length_codec.h" -#include "kaminpar-common/graph-compression/varint_stream_codec.h" +#include "kaminpar-common/graph-compression/streamvbyte.h" +#include "kaminpar-common/graph-compression/varint.h" +#include "kaminpar-common/graph-compression/varint_rle.h" #include "kaminpar-common/logger.h" #include "kaminpar-common/timer.h" @@ -112,7 +112,7 @@ sv_encode_values(std::string_view name, const std::size_t count, Lambda &&l) { auto encoded_values = std::make_unique(count * sizeof(Int) + count); TIMED_SCOPE(name) { - VarIntStreamEncoder encoder(encoded_values.get(), count); + streamvbyte::StreamVByteEncoder encoder(count, encoded_values.get()); for (std::size_t i = 0; i < count; ++i) { const std::size_t bytes_written = encoder.add(l(i)); @@ -218,9 +218,7 @@ void benchmark( SCOPED_TIMER(name); for (std::size_t i = 0; i < count; ++i) { - const auto [value, bytes_decoded] = l(values_ptr); - values_ptr += bytes_decoded; - + const auto value = l(&values_ptr); do_not_optimize(value); } } @@ -229,7 +227,7 @@ template void benchmark_rle(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) { SCOPED_TIMER(name); - VarIntRunLengthDecoder decoder(values_ptr, count); + VarIntRunLengthDecoder decoder(count, values_ptr); decoder.decode([](const Int value) { do_not_optimize(value); }); } @@ -237,7 +235,7 @@ template void benchmark_sve(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) { SCOPED_TIMER(name); - VarIntStreamDecoder decoder(values_ptr, count); + streamvbyte::StreamVByteDecoder decoder(count, values_ptr); decoder.decode([](const Int value) { do_not_optimize(value); }); } @@ -299,7 +297,7 @@ template void run_benchmark(std::size_t count) { encoded_zero_values.get(), encoded_max_values.get(), encoded_random_values.get(), - [](const std::uint8_t *ptr) { return varint_decode_general(ptr); } + [](const std::uint8_t **ptr) { return varint_decode_loop(ptr); } ); benchmark( @@ -308,9 +306,10 @@ template void run_benchmark(std::size_t count) { encoded_zero_values.get(), encoded_max_values.get(), encoded_random_values.get(), - [](const std::uint8_t *ptr) { return varint_decode(ptr); } + [](const std::uint8_t **ptr) { return varint_decode_pext_unrolled(ptr); } ); + /* std::vector> random_signed_values = generate_random_values>(count); @@ -336,6 +335,7 @@ template void run_benchmark(std::size_t count) { encoded_random_signed_values.get(), [](const std::uint8_t *ptr) { return signed_varint_decode>(ptr); } ); + */ const auto [rl_encoded_zero_values, rl_encoded_max_values, rl_encoded_random_values] = rl_encode_values(count, random_values); diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc index 7e5fcf9c..1a61caac 100644 --- a/apps/io/shm_compressed_graph_binary.cc +++ b/apps/io/shm_compressed_graph_binary.cc @@ -31,8 +31,7 @@ struct CompressedBinaryHeader { bool use_high_degree_encoding; bool use_interval_encoding; bool use_run_length_encoding; - bool use_stream_vbyte_encoding; - bool use_isolated_nodes_separation; + bool use_streamvbyte_encoding; std::uint64_t high_degree_threshold; std::uint64_t high_degree_part_length; @@ -66,8 +65,7 @@ CompressedBinaryHeader create_header(const CompressedGraph &graph) { CompressedGraph::kHighDegreeEncoding, CompressedGraph::kIntervalEncoding, CompressedGraph::kRunLengthEncoding, - CompressedGraph::kStreamEncoding, - CompressedGraph::kIsolatedNodesSeparation, + CompressedGraph::kStreamVByteEncoding, CompressedGraph::kHighDegreeThreshold, CompressedGraph::kHighDegreePartLength, @@ -91,12 +89,12 @@ template static void write_int(std::ofstream &out, const T id) { static void write_header(std::ofstream &out, const CompressedBinaryHeader header) { const std::uint16_t boolean_values = - (header.use_isolated_nodes_separation << 12) | (header.use_stream_vbyte_encoding << 11) | - (header.use_run_length_encoding << 10) | (header.use_interval_encoding << 9) | - (header.use_high_degree_encoding << 8) | (header.compress_edge_weights << 7) | - (header.use_degree_bucket_order << 6) | (header.has_64_bit_edge_weight << 5) | - (header.has_64_bit_node_weight << 4) | (header.has_64_bit_edge_id << 3) | - (header.has_64_bit_node_id << 2) | (header.has_edge_weights << 1) | (header.has_node_weights); + (header.use_streamvbyte_encoding << 11) | (header.use_run_length_encoding << 10) | + (header.use_interval_encoding << 9) | (header.use_high_degree_encoding << 8) | + (header.compress_edge_weights << 7) | (header.use_degree_bucket_order << 6) | + (header.has_64_bit_edge_weight << 5) | (header.has_64_bit_node_weight << 4) | + (header.has_64_bit_edge_id << 3) | (header.has_64_bit_node_id << 2) | + (header.has_edge_weights << 1) | (header.has_node_weights); write_int(out, boolean_values); write_int(out, header.high_degree_threshold); @@ -155,14 +153,14 @@ template static T read_int(std::ifstream &in) { CompressedBinaryHeader read_header(std::ifstream &in) { const auto boolean_values = read_int(in); return { - (boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0, - (boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0, - (boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0, - (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0, - (boolean_values & 4096) != 0, read_int(in), read_int(in), - read_int(in), read_int(in), read_int(in), - read_int(in), read_int(in), read_int(in), - read_int(in), read_int(in), + (boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0, + (boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0, + (boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0, + (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0, + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), }; } @@ -263,8 +261,8 @@ void verify_header(const CompressedBinaryHeader header) { std::exit(1); } - if (header.use_stream_vbyte_encoding != CompressedGraph::kStreamEncoding) { - if (header.use_stream_vbyte_encoding) { + if (header.use_streamvbyte_encoding != CompressedGraph::kStreamVByteEncoding) { + if (header.use_streamvbyte_encoding) { LOG_ERROR << "The stored compressed graph uses stream encoding but this build does not."; } else { LOG_ERROR << "The stored compressed graph does not use stream encoding but this build does."; @@ -272,17 +270,6 @@ void verify_header(const CompressedBinaryHeader header) { std::exit(1); } - if (header.use_isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) { - if (header.use_isolated_nodes_separation) { - LOG_ERROR - << "The stored compressed graph uses isolated nodes separation but this build does not."; - } else { - LOG_ERROR << "The stored compressed graph does not use isolated nodes separation but this " - "build does."; - } - std::exit(1); - } - if (header.high_degree_threshold != CompressedGraph::kHighDegreeThreshold) { LOG_ERROR << "The stored compressed graph uses " << header.high_degree_threshold << " as the high degree threshold but this build uses " diff --git a/apps/io/shm_parhip_parser.cc b/apps/io/shm_parhip_parser.cc index 45d2c74a..1f6af24a 100644 --- a/apps/io/shm_parhip_parser.cc +++ b/apps/io/shm_parhip_parser.cc @@ -9,7 +9,6 @@ #include #include -#include #include #include @@ -105,7 +104,7 @@ class ParHIPHeader { (has_node_weights ? num_nodes * _node_weight_width : 0); } - [[nodiscard]] NodeID map_edge_offset(const EdgeID edge_offset) const { + [[nodiscard]] EdgeID map_edge_offset(const EdgeID edge_offset) const { return (edge_offset - _nodes_offset_base) / _node_id_width; } @@ -120,13 +119,13 @@ class ParHIPHeader { std::exit(1); } - if (has_64_bit_node_weight && sizeof(NodeWeight) == 4) { + if (has_node_weights && has_64_bit_node_weight && sizeof(NodeWeight) == 4) { LOG_ERROR << "The stored graph uses 64-Bit node weights but this build uses 32-Bit node weights."; std::exit(1); } - if (has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) { + if (has_edge_weights && has_64_bit_edge_weight && sizeof(EdgeWeight) == 4) { LOG_ERROR << "The stored graph uses 64-Bit edge weights but this build uses 32-Bit edge weights."; std::exit(1); @@ -351,32 +350,29 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node const bool sort_by_degree_bucket = ordering == NodeOrdering::DEGREE_BUCKETS; if (sort_by_degree_bucket) { - RECORD("degrees") StaticArray degrees(header.num_nodes, static_array::noinit); - TIMED_SCOPE("Read degrees") { - tbb::parallel_for(tbb::blocked_range(0, header.num_nodes), [&](const auto &r) { - for (NodeID u = r.begin(); u != r.end(); ++u) { - degrees[u] = header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u)); - } - }); + const auto degree = [&](const NodeID u) { + return static_cast( + header.map_edge_offset(node(u + 1)) - header.map_edge_offset(node(u)) + ); }; - const auto [perm, inv_perm] = - graph::sort_by_degree_buckets(header.num_nodes, [&](const NodeID u) { - return degrees[u]; - }); - return parallel_compress( + auto [perm, inv_perm] = graph::sort_by_degree_buckets(header.num_nodes, degree); + CompressedGraph compressed_graph = parallel_compress( header.num_nodes, header.num_edges, header.has_node_weights, header.has_edge_weights, true, [&](const NodeID u) { return inv_perm[u]; }, - [&](const NodeID u) { return degrees[u]; }, + degree, [&](const NodeID u) { return header.map_edge_offset(node(u)); }, [&](const EdgeID e) { return perm[edge(e)]; }, [&](const NodeID u) { return node_weight(u); }, [&](const EdgeID e) { return edge_weight(e); } ); + + compressed_graph.set_permutation(std::move(perm)); + return compressed_graph; } else { return parallel_compress( header.num_nodes, diff --git a/apps/tools/shm_graph_attach_weights_tool.cc b/apps/tools/shm_graph_attach_weights_tool.cc index 8f9ea906..5dfcb2c0 100644 --- a/apps/tools/shm_graph_attach_weights_tool.cc +++ b/apps/tools/shm_graph_attach_weights_tool.cc @@ -35,16 +35,22 @@ namespace { enum class WeightDistribution { UNIFORM, - ALTERNATING + ALTERNATING, + EXPONENTIAL, }; [[nodiscard]] std::unordered_map get_weight_distributions() { return { {"uniform", WeightDistribution::UNIFORM}, {"alternating", WeightDistribution::ALTERNATING}, + {"exponential", WeightDistribution::EXPONENTIAL}, }; } +[[nodiscard]] int local_seed(const int cpu, const int seed) { + return seed + (cpu + 42) * 3; +} + struct EdgeHasher { using Edge = std::pair; @@ -112,8 +118,7 @@ generate_edge_weights(const CSRGraph &graph, Lambda &&edge_weight_generator_fact const CSRGraph &graph, const int seed, const EdgeWeight min, const EdgeWeight max ) { return generate_edge_weights(graph, [&](const int cpu, auto &&edge_weight_fetcher) { - const int local_seed = seed + cpu; - std::mt19937 gen(local_seed); + std::mt19937 gen(local_seed(seed, cpu)); std::uniform_int_distribution dist(min, max); edge_weight_fetcher([&](const EdgeID, const NodeID, const NodeID) { @@ -132,8 +137,7 @@ generate_edge_weights(const CSRGraph &graph, Lambda &&edge_weight_generator_fact const EdgeWeight max_large_weights ) { return generate_edge_weights(graph, [&](const int cpu, auto &&edge_weight_fetcher) { - const int local_seed = seed + cpu; - std::mt19937 gen(local_seed); + std::mt19937 gen(local_seed(seed, cpu)); std::uniform_int_distribution small_dist(min_small_weights, max_small_weights); std::uniform_int_distribution large_dist(min_large_weights, max_large_weights); @@ -151,6 +155,19 @@ generate_edge_weights(const CSRGraph &graph, Lambda &&edge_weight_generator_fact }); } +[[nodiscard]] StaticArray +generate_exponential_edge_weights(const CSRGraph &graph, const int seed, const double lambda) { + return generate_edge_weights(graph, [&](const int cpu, auto &&edge_weight_fetcher) { + std::mt19937 gen(local_seed(seed, cpu)); + std::exponential_distribution dist(lambda); + + edge_weight_fetcher([&](const EdgeID e, const NodeID, const NodeID) { + const EdgeWeight weight = static_cast(dist(gen)) + 1; + return weight; + }); + }); +} + }; // namespace int main(int argc, char *argv[]) { @@ -186,12 +203,13 @@ int main(int argc, char *argv[]) { ->transform(CLI::CheckedTransformer(get_weight_distributions()).description("")) ->description(R"(Distribution used for generating edge weights: - uniform - - alternating)") + - alternating + - exponential)") ->required() ->capture_default_str(); EdgeWeight uniform_min_weight = 1; - EdgeWeight uniform_max_weight = 32768; + EdgeWeight uniform_max_weight = 65536; auto *uniform_group = app.add_option_group("Uniform Distribution"); uniform_group->add_option("--u-min", uniform_min_weight, "Minimum weight value.") ->capture_default_str(); @@ -199,10 +217,10 @@ int main(int argc, char *argv[]) { ->capture_default_str(); EdgeWeight alt_min_small_weights = 1; - EdgeWeight alt_max_small_weights = 128; - EdgeWeight alt_min_large_weights = 32768; - EdgeWeight alt_max_large_weights = 8388608; - auto *alt_group = app.add_option_group("Uniform Distribution"); + EdgeWeight alt_max_small_weights = 1; + EdgeWeight alt_min_large_weights = 65536; + EdgeWeight alt_max_large_weights = 65536; + auto *alt_group = app.add_option_group("Alternating Distribution"); alt_group ->add_option("--a-min-small", alt_min_small_weights, "Minimum weight value of small weights.") ->capture_default_str(); @@ -216,6 +234,10 @@ int main(int argc, char *argv[]) { ->add_option("--a-max-large", alt_max_large_weights, "Maximum weight value of large weights.") ->capture_default_str(); + double lambda = 0.0001; + auto *exp_group = app.add_option_group("Exponential Distribution"); + exp_group->add_option("--e-lambda", lambda, "Rate parameter.")->capture_default_str(); + CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, num_threads); @@ -238,6 +260,8 @@ int main(int argc, char *argv[]) { alt_min_large_weights, alt_max_large_weights ); + case WeightDistribution::EXPONENTIAL: + return generate_exponential_edge_weights(csr_graph, seed, lambda); default: __builtin_unreachable(); } diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index b653dfaf..de421cdf 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -205,6 +205,16 @@ CLI::Option_group *create_lp_coarsening_options(CLI::App *app, Context &ctx) { ) ->capture_default_str(); + lp->add_option("--c-lp-tie-breaking-strategy", ctx.coarsening.clustering.lp.tie_breaking_strategy) + ->transform(CLI::CheckedTransformer(get_tie_breaking_strategies()).description("")) + ->description( + R"(Determines the tie breaking strategy. +Options are: + - geometric: Prefer nodes with same rating located at the end of a neighborhood + - uniform: Select nodes with same rating uniformly at random + )" + ) + ->capture_default_str(); lp->add_option( "--c-lp-cluster-weights-struct", ctx.coarsening.clustering.lp.cluster_weights_structure ) @@ -322,6 +332,13 @@ Options are: "The fraction of the total edges with which to fill the edge buffer" ) ->capture_default_str(); + contraction + ->add_option( + "--c-con-use-growing-hash-tables", + ctx.coarsening.contraction.use_growing_hash_tables, + "Whether to use growing hash tables to collect coarse edges (only for unbuffered mode)" + ) + ->capture_default_str(); return contraction; } @@ -396,6 +413,16 @@ Options are: ) ->capture_default_str(); + lp->add_option("--r-lp-tie-breaking-strategy", ctx.refinement.lp.tie_breaking_strategy) + ->transform(CLI::CheckedTransformer(get_tie_breaking_strategies()).description("")) + ->description( + R"(Determines the tie breaking strategy. +Options are: + - geometric: Prefer nodes with same rating located at the end of a neighborhood + - uniform: Select nodes with same rating uniformly at random + )" + ) + ->capture_default_str(); lp->add_option( "--r-lp-second-phase-selection-strategy", ctx.refinement.lp.second_phase_selection_strategy ) diff --git a/kaminpar-common/datastructures/bitvector_rank.h b/kaminpar-common/datastructures/bitvector_rank.h index b3403909..6378e42f 100644 --- a/kaminpar-common/datastructures/bitvector_rank.h +++ b/kaminpar-common/datastructures/bitvector_rank.h @@ -64,6 +64,16 @@ class RankCombinedBitVector { } public: + /*! + * Constructs an empty bit vector. + */ + explicit RankCombinedBitVector() + : _length(0), + _num_blocks(0), + _data(0), + _num_superblocks(0), + _superblock_data(0) {} + /*! * Constructs an uninitialized bit vector. * diff --git a/kaminpar-common/datastructures/compact_static_array.h b/kaminpar-common/datastructures/compact_static_array.h index bd7136eb..898775e6 100644 --- a/kaminpar-common/datastructures/compact_static_array.h +++ b/kaminpar-common/datastructures/compact_static_array.h @@ -37,7 +37,7 @@ template class CompactStaticArray { using difference_type = std::ptrdiff_t; CompactStaticArrayIterator( - const std::uint8_t byte_width, const Int read_mask, const std::uint8_t *data + const std::size_t byte_width, const Int read_mask, const std::uint8_t *data ) : _byte_width(byte_width), _mask(read_mask), @@ -125,7 +125,7 @@ template class CompactStaticArray { } private: - const std::uint8_t _byte_width; + const std::size_t _byte_width; const Int _mask; const std::uint8_t *_data; }; @@ -141,7 +141,12 @@ template class CompactStaticArray { /*! * Constructs an unitialized CompactStaticArray. */ - CompactStaticArray() : _byte_width(0), _size(0), _unrestricted_size(0), _num_values(0) { + CompactStaticArray() + : _byte_width(0), + _size(0), + _num_values(0), + _unrestricted_size(0), + _unrestricted_num_values(0) { RECORD_DATA_STRUCT(0, _struct); } @@ -151,7 +156,7 @@ template class CompactStaticArray { * @param byte_width The number of bytes needed to store the largest integer in the array. * @param size num_values number of values to store. */ - CompactStaticArray(const std::uint8_t byte_width, const std::size_t num_values) { + CompactStaticArray(const std::size_t byte_width, const std::size_t num_values) { RECORD_DATA_STRUCT(0, _struct); resize(byte_width, num_values); } @@ -164,17 +169,18 @@ template class CompactStaticArray { * @param data The pointer to the memory location where the data is compactly stored. */ CompactStaticArray( - const std::uint8_t byte_width, + const std::size_t byte_width, const std::size_t actual_size, std::unique_ptr data ) : _byte_width(byte_width), _size(actual_size), - _unrestricted_size(actual_size), _num_values((_size - (sizeof(Int) - _byte_width)) / _byte_width), _values(std::move(data)), - _read_mask(std::numeric_limits::max() << (byte_width * 8)), - _write_mask(std::numeric_limits::max() << (byte_width * 8)) { + _read_mask(std::numeric_limits::max() >> ((sizeof(Int) - byte_width) * 8)), + _write_mask(std::numeric_limits::max() << (byte_width * 8)), + _unrestricted_size(_size), + _unrestricted_num_values(_num_values) { RECORD_DATA_STRUCT(0, _struct); KASSERT(actual_size >= sizeof(Int) - _byte_width); KASSERT(byte_width >= 1); @@ -193,13 +199,12 @@ template class CompactStaticArray { * @param byte_width The number of bytes needed to store the largest integer in the array. * @param num_values The number of values to store. */ - void resize(const std::uint8_t byte_width, const std::size_t num_values) { + void resize(const std::size_t byte_width, const std::size_t num_values) { KASSERT(byte_width >= 1); KASSERT(byte_width <= 8); _byte_width = byte_width; _size = num_values * byte_width + sizeof(Int) - byte_width; - _unrestricted_size = _size; _num_values = num_values; _values = std::make_unique(_size); @@ -207,6 +212,9 @@ template class CompactStaticArray { _read_mask = std::numeric_limits::max() >> ((sizeof(Int) - byte_width) * 8); _write_mask = std::numeric_limits::max() << (byte_width * 8); + _unrestricted_size = _size; + _unrestricted_num_values = num_values; + IF_HEAP_PROFILING(_struct->size = std::max(_struct->size, _size)); } @@ -218,10 +226,11 @@ template class CompactStaticArray { void restrict(const std::size_t new_num_values) { KASSERT(new_num_values <= _num_values); - _num_values = new_num_values; - _unrestricted_size = _size; _size = new_num_values * _byte_width + sizeof(Int) - _byte_width; + + _unrestricted_num_values = _num_values; + _num_values = new_num_values; } /*! @@ -230,6 +239,7 @@ template class CompactStaticArray { */ void unrestrict() { _size = _unrestricted_size; + _num_values = _unrestricted_num_values; } /*! @@ -238,12 +248,15 @@ template class CompactStaticArray { * @param pos The position in the array at which the integer is to be stored. * @param value The value to store. */ - void write(const std::size_t pos, const Int value) { + void write(const std::size_t pos, Int value) { KASSERT(pos < _num_values); - KASSERT(math::byte_width(value) <= _byte_width); + KASSERT(math::byte_width(value) <= _byte_width); - Int *data = reinterpret_cast(_values.get() + pos * _byte_width); - *data = value | (*data & _write_mask); + std::uint8_t *data = _values.get() + pos * _byte_width; + for (std::size_t i = 0; i < _byte_width; ++i) { + *data++ = value & 0b11111111; + value >>= 8; + } } /*! @@ -322,9 +335,8 @@ template class CompactStaticArray { } private: - std::uint8_t _byte_width; + std::size_t _byte_width; std::size_t _size; - std::size_t _unrestricted_size; std::size_t _num_values; std::unique_ptr _values; @@ -332,6 +344,9 @@ template class CompactStaticArray { Int _read_mask; Int _write_mask; + std::size_t _unrestricted_size; + std::size_t _unrestricted_num_values; + IF_HEAP_PROFILING(heap_profiler::DataStructure *_struct); }; diff --git a/kaminpar-common/datastructures/concurrent_fast_reset_array.h b/kaminpar-common/datastructures/concurrent_fast_reset_array.h index c0d97292..c56cd832 100644 --- a/kaminpar-common/datastructures/concurrent_fast_reset_array.h +++ b/kaminpar-common/datastructures/concurrent_fast_reset_array.h @@ -18,6 +18,7 @@ #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/heap_profiler.h" #include "kaminpar-common/parallel/aligned_element.h" +#include "kaminpar-common/ranges.h" namespace kaminpar { @@ -102,13 +103,25 @@ template class ConcurrentFastReset */ template void iterate_and_reset(Lambda &&l) { tbb::parallel_for(0, _used_entries_tls.size(), [&](const auto i) { - l(i, _used_entries_tls[i]); + auto &local_used_entries = _used_entries_tls[i].vec; + if (local_used_entries.empty()) { + return; + } - for (const size_type pos : _used_entries_tls[i]) { + auto local_entries = TransformedIotaRange( + static_cast(0), + local_used_entries.size(), + [this, &local_used_entries](const std::size_t j) { + const std::size_t pos = local_used_entries[j]; + return std::make_pair(pos, _data[pos]); + } + ); + l(i, local_entries); + + for (const size_type pos : local_used_entries) { _data[pos] = Value(); } - - _used_entries_tls[i].clear(); + local_used_entries.clear(); }); } diff --git a/kaminpar-common/datastructures/dynamic_map.h b/kaminpar-common/datastructures/dynamic_map.h index 9425d702..bd0e94ae 100644 --- a/kaminpar-common/datastructures/dynamic_map.h +++ b/kaminpar-common/datastructures/dynamic_map.h @@ -5,11 +5,14 @@ #include #include #include -#include -#include + +#include "kaminpar-common/datastructures/scalable_vector.h" +#include "kaminpar-common/parallel/tbb_malloc.h" namespace kaminpar { template class DynamicMapBase { + static constexpr std::size_t kTHPThreshold = 1024 * 1024 * 16; + public: DynamicMapBase(const DynamicMapBase &) = delete; DynamicMapBase &operator=(const DynamicMapBase &other) = delete; @@ -82,8 +85,8 @@ template class DynamicMapBase { _size = 0; _capacity = align_to_next_power_of_two(capacity); - const size_t alloc_size = static_cast(this)->size_in_bytes_impl(); - _data = std::make_unique(alloc_size); + const std::size_t alloc_size = static_cast(this)->size_in_bytes_impl(); + _data = parallel::make_unique(alloc_size, alloc_size >= kTHPThreshold); std::memset(_data.get(), 0, alloc_size); static_cast(this)->initialize_impl(); @@ -98,7 +101,7 @@ template class DynamicMapBase { const std::size_t old_size = _size; const std::size_t old_capacity = _capacity; const std::size_t new_capacity = 2UL * _capacity; - const std::unique_ptr old_data = std::move(_data); + const parallel::tbb_unique_ptr old_data = std::move(_data); const std::uint8_t *old_data_begin = old_data.get(); initialize(new_capacity); @@ -118,7 +121,7 @@ template class DynamicMapBase { std::size_t _capacity = 0; std::size_t _size = 0; - std::unique_ptr _data = nullptr; + parallel::tbb_unique_ptr _data = nullptr; }; template @@ -215,18 +218,18 @@ class DynamicFlatMap final : public DynamicMapBase +template class DynamicRememberingFlatMap final - : public DynamicMapBase> { - using Base = DynamicMapBase>; + : public DynamicMapBase> { + using Base = DynamicMapBase>; using Base::INVALID_POS_MASK; friend Base; struct MapElement { + Timestamp timestamp; Key key; Value value; - std::size_t timestamp; }; public: @@ -243,15 +246,17 @@ class DynamicRememberingFlatMap final ~DynamicRememberingFlatMap() = default; template void for_each(Lambda &&lambda) const { - for (const std::size_t pos : _positions) { - lambda(_elements[pos].key, _elements[pos].value); + for (const std::size_t pos : _used_elements) { + const MapElement element = _elements[pos]; + lambda(element.key, element.value); } } [[nodiscard]] auto entries() const { return TransformedIotaRange(static_cast(0), _size, [this](const std::size_t i) { - const std::size_t pos = _positions[i]; - return std::make_pair(_elements[pos].key, _elements[pos].value); + const std::size_t pos = _used_elements[i]; + const MapElement element = _elements[pos]; + return std::make_pair(element.key, element.value); }); } @@ -262,8 +267,10 @@ class DynamicRememberingFlatMap final std::size_t find_impl(const Key key) const { std::size_t hash = key & (_capacity - 1); - while (_elements[hash].timestamp == _timestamp) { - if (_elements[hash].key == key) { + + MapElement element; + while ((element = _elements[hash]).timestamp == _timestamp) { + if (element.key == key) { return hash; } hash = (hash + 1) & (_capacity - 1); @@ -277,15 +284,14 @@ class DynamicRememberingFlatMap final Value &add_element_impl(Key key, Value value, const std::size_t pos) { _size++; - _positions.push_back(pos); + _used_elements.push_back(pos); - _elements[pos] = MapElement{key, value, _timestamp}; + _elements[pos] = MapElement{_timestamp, key, value}; return _elements[pos].value; } void initialize_impl() { _elements = reinterpret_cast(_data.get()); - _old_timestamp = _timestamp; _timestamp = 1; } @@ -296,29 +302,29 @@ class DynamicRememberingFlatMap final const auto *elements = reinterpret_cast(old_data_begin); for (std::size_t i = 0; i < old_size; ++i) { - const std::size_t pos = _positions[i]; + const std::size_t pos = _used_elements[i]; + const MapElement element = elements[pos]; const Key key = elements[pos].key; const std::size_t new_pos = find_impl(key) & ~INVALID_POS_MASK; - _positions[i] = new_pos; - _elements[new_pos] = MapElement{key, elements[pos].value, _timestamp}; + _used_elements[i] = new_pos; + _elements[new_pos] = MapElement{_timestamp, key, element.value}; } } void clear_impl() { ++_timestamp; - _positions.clear(); + _used_elements.clear(); } using Base::_capacity; using Base::_data; using Base::_size; - std::size_t _old_timestamp = 0; - std::size_t _timestamp = 1; + Timestamp _timestamp = 1; MapElement *_elements = nullptr; - std::vector _positions; + ScalableVector _used_elements; }; } // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_edges_builder.h b/kaminpar-common/graph-compression/compressed_edges_builder.h index 2b499270..370f45b3 100644 --- a/kaminpar-common/graph-compression/compressed_edges_builder.h +++ b/kaminpar-common/graph-compression/compressed_edges_builder.h @@ -13,26 +13,44 @@ #include "kaminpar-common/graph-compression/compressed_neighborhoods.h" #include "kaminpar-common/heap_profiler.h" -#include "kaminpar-common/logger.h" namespace kaminpar { -SET_DEBUG(false); +/*! + * A builder to construct compressed edges. + * + * @tparam NodeID The type of integer to use to identify a node. + * @tparam EdgeID The type of integer to use to identify an edge. + * @tparam EdgeWeight The type of integer to use for edge weights. + */ template class CompressedEdgesBuilder { using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; - using SignedID = CompressedNeighborhoods::SignedID; static constexpr bool kCompressEdgeWeights = CompressedNeighborhoods::kCompressEdgeWeights; + static constexpr bool kHighDegreeEncoding = CompressedNeighborhoods::kHighDegreeEncoding; static constexpr NodeID kHighDegreeThreshold = CompressedNeighborhoods::kHighDegreeThreshold; static constexpr NodeID kHighDegreePartLength = CompressedNeighborhoods::kHighDegreePartLength; + static constexpr NodeID kIntervalEncoding = CompressedNeighborhoods::kIntervalEncoding; static constexpr NodeID kIntervalLengthTreshold = CompressedNeighborhoods::kIntervalLengthTreshold; + static constexpr bool kRunLengthEncoding = CompressedNeighborhoods::kRunLengthEncoding; - static constexpr bool kStreamEncoding = CompressedNeighborhoods::kStreamEncoding; - static constexpr bool kIsolatedNodesSeparation = - CompressedNeighborhoods::kIsolatedNodesSeparation; + + static constexpr bool kStreamVByteEncoding = CompressedNeighborhoods::kStreamVByteEncoding; + static constexpr NodeID kStreamVByteThreshold = CompressedNeighborhoods::kStreamVByteThreshold; + + static constexpr NodeID kInvalidNodeID = std::numeric_limits::max(); + + using SignedNodeID = std::int64_t; + using SignedEdgeWeight = std::make_signed_t; + + using StreamVByteGapEncoder = + streamvbyte::StreamVByteEncoder; + + using StreamVByteGapAndWeightEncoder = + streamvbyte::StreamVByteEncoder; public: /*! @@ -48,6 +66,7 @@ template class Compresse [[nodiscard]] static std::size_t compressed_edge_array_max_size( const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights ) { + std::size_t node_id_width = signed_varint_length(num_nodes); std::size_t edge_id_width; if constexpr (kActualNumEdges) { if constexpr (kIntervalEncoding) { @@ -59,19 +78,14 @@ template class Compresse edge_id_width = varint_max_length(); } - std::size_t max_size = num_nodes * edge_id_width + num_edges * varint_length(num_nodes); + std::size_t max_size = (num_nodes + 1) * edge_id_width + num_edges * node_id_width; if constexpr (kHighDegreeEncoding) { - if constexpr (kIntervalEncoding) { - max_size += 2 * num_nodes * varint_max_length(); - } else { - max_size += num_nodes * varint_max_length(); - } - - max_size += (num_edges / kHighDegreePartLength) * varint_max_length(); + max_size += num_nodes * varint_max_length() + + (num_edges / kHighDegreePartLength) * varint_max_length(); } - if (has_edge_weights) { + if (kCompressEdgeWeights && has_edge_weights) { max_size += num_edges * varint_max_length(); } @@ -97,8 +111,8 @@ template class Compresse _edge_weights(edge_weights) { const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); - _compressed_data_start = heap_profiler::overcommit_memory(max_size); - _compressed_data = _compressed_data_start.get(); + _compressed_edges = heap_profiler::overcommit_memory(max_size); + _cur_compressed_edges = _compressed_edges.get(); _compressed_data_max_size = 0; } @@ -124,8 +138,8 @@ template class Compresse _edge_weights(edge_weights) { const std::size_t max_size = compressed_edge_array_max_size(num_nodes, max_degree, has_edge_weights); - _compressed_data_start = heap_profiler::overcommit_memory(max_size); - _compressed_data = _compressed_data_start.get(); + _compressed_edges = heap_profiler::overcommit_memory(max_size); + _cur_compressed_edges = _compressed_edges.get(); _compressed_data_max_size = 0; } @@ -135,14 +149,13 @@ template class Compresse */ ~CompressedEdgesBuilder() { if constexpr (kHeapProfiling) { - if (_compressed_data_start) { - const auto prev_compressed_data_size = - static_cast(_compressed_data - _compressed_data_start.get()); + if (_compressed_edges) { + const auto prev_compressed_data_size = size(); const std::size_t compressed_data_size = std::max(_compressed_data_max_size, prev_compressed_data_size); heap_profiler::HeapProfiler::global().record_alloc( - _compressed_data_start.get(), compressed_data_size + _compressed_edges.get(), compressed_data_size ); } } @@ -155,20 +168,19 @@ template class Compresse CompressedEdgesBuilder &operator=(CompressedEdgesBuilder &&) noexcept = delete; /*! - * Initializes/resets the builder. + * Initializes the builder. * * @param first_edge The first edge ID of the first node to be added. */ void init(const EdgeID first_edge) { - const auto prev_compressed_data_size = - static_cast(_compressed_data - _compressed_data_start.get()); + const auto prev_compressed_data_size = size(); _compressed_data_max_size = std::max(_compressed_data_max_size, prev_compressed_data_size); - _compressed_data = _compressed_data_start.get(); + _cur_compressed_edges = _compressed_edges.get(); - _edge = first_edge; + _cur_edge = first_edge; _max_degree = 0; _total_edge_weight = 0; - _cur_edge_weight = 0; + _cur_edge_weight = first_edge; _num_high_degree_nodes = 0; _num_high_degree_parts = 0; @@ -184,16 +196,88 @@ template class Compresse * @param neighbourhood The neighbourhood of the node to add. * @return The offset into the compressed edge array of the node. */ - template EdgeID add(const NodeID node, Container &neighbourhood) { - if constexpr (std::is_same_v>) { - std::sort(neighbourhood.begin(), neighbourhood.end(), [](const auto &a, const auto &b) { + template EdgeID add(const NodeID node, Container &neighborhood) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kIsNeighbor = std::is_same_v; + constexpr bool kIsWeightedNeighbor = std::is_same_v>; + static_assert(kIsNeighbor || kIsWeightedNeighbor); + + const EdgeID offset = current_offset(); + NodeID degree = neighborhood.size(); + if (degree == 0) [[unlikely]] { + return offset; + } + + if constexpr (kIsWeightedNeighbor) { + std::sort(neighborhood.begin(), neighborhood.end(), [](const auto &a, const auto &b) { return a.first < b.first; }); } else { - std::sort(neighbourhood.begin(), neighbourhood.end()); + std::sort(neighborhood.begin(), neighborhood.end()); + } + + NodeID num_intervals; + if constexpr (kIntervalEncoding) { + bool has_intervals; + if (kHighDegreeEncoding && degree >= kHighDegreeThreshold) { + has_intervals = false; + } else { + num_intervals = count_intervals(neighborhood); + has_intervals = num_intervals > 0; + _num_interval_nodes += has_intervals ? 1 : 0; + } + + marked_varint_encode(_cur_edge, has_intervals, &_cur_compressed_edges); + } else { + varint_encode(_cur_edge, &_cur_compressed_edges); + } + + _cur_edge += degree; + + if constexpr (kHighDegreeEncoding) { + const bool split_neighbourhood = degree >= kHighDegreeThreshold; + + if (split_neighbourhood) { + const NodeID num_parts = math::div_ceil(degree, kHighDegreePartLength); + const NodeID last_part_length = math::mod_ceil(degree, kHighDegreePartLength); + + std::uint8_t *part_ptr = _cur_compressed_edges; + _cur_compressed_edges += sizeof(NodeID) * num_parts; + + bool has_intervals = false; + for (NodeID i = 0; i < num_parts; ++i) { + const bool last_part = (i + 1) == num_parts; + const NodeID part_length = last_part ? last_part_length : kHighDegreePartLength; + + auto part_begin = neighborhood.begin() + i * kHighDegreePartLength; + auto part_end = part_begin + part_length; + auto part_neighborhood = std::span(part_begin, part_end); + + NodeID *cur_part_ptr = reinterpret_cast(part_ptr) + i; + *cur_part_ptr = static_cast(_cur_compressed_edges - part_ptr); + + NodeID num_intervals; + if constexpr (kIntervalEncoding) { + num_intervals = count_intervals(part_neighborhood); + + if (num_intervals > 0) { + *cur_part_ptr |= math::kSetMSB; + has_intervals = true; + } + } + + add_edges(node, num_intervals, part_neighborhood); + } + + _num_high_degree_nodes += 1; + _num_high_degree_parts += num_parts; + _num_interval_nodes += has_intervals ? 1 : 0; + return offset; + } } - return add_node(node, neighbourhood); + add_edges(node, num_intervals, neighborhood); + return offset; } /*! @@ -202,7 +286,7 @@ template class Compresse * @return The number of bytes that the compressed data of the added neighborhoods take up. */ [[nodiscard]] std::size_t size() const { - return static_cast(_compressed_data - _compressed_data_start.get()); + return static_cast(current_offset()); } /*! @@ -211,7 +295,7 @@ template class Compresse * @return A pointer to the start of the compressed data. */ [[nodiscard]] const std::uint8_t *compressed_data() const { - return _compressed_data_start.get(); + return _compressed_edges.get(); } /*! @@ -220,7 +304,7 @@ template class Compresse * @return Ownership of the compressed data. */ [[nodiscard]] heap_profiler::unique_ptr take_compressed_data() { - return std::move(_compressed_data_start); + return std::move(_compressed_edges); } /*! @@ -278,314 +362,300 @@ template class Compresse } private: - heap_profiler::unique_ptr _compressed_data_start; - std::uint8_t *_compressed_data; - std::size_t _compressed_data_max_size; - - bool _has_edge_weights; - EdgeWeight _total_edge_weight; - EdgeID _cur_edge_weight; - StaticArray &_edge_weights; - - EdgeID _edge; - NodeID _max_degree; - - // Graph compression statistics - std::size_t _num_high_degree_nodes; - std::size_t _num_high_degree_parts; - std::size_t _num_interval_nodes; - std::size_t _num_intervals; - - // Debug graph compression statistics - std::size_t _num_adjacent_node_bytes; - std::size_t _num_edge_weights_bytes; + [[nodiscard]] std::uint64_t current_offset() const { + return static_cast(_cur_compressed_edges - _compressed_edges.get()); + } -private: - template EdgeID add_node(const NodeID node, Container &neighbourhood) { - // The offset into the compressed edge array to the start of the neighbourhood. - const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); + template + static void + set_adjacent_node(Container &neighborhood, const NodeID num_neighbor, const NodeID value) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kIsWeightedNeighbor = std::is_same_v>; - const NodeID degree = neighbourhood.size(); - if (degree == 0) { - return offset; + if constexpr (kIsWeightedNeighbor) { + neighborhood[num_neighbor].first = value; + } else { + neighborhood[num_neighbor] = value; } + } - _max_degree = std::max(_max_degree, degree); - - // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes - // in one of its bits whether interval encoding is used for this node, i.e., whether the nodes - // has intervals in its neighbourhood. - std::uint8_t *marked_byte = _compressed_data; + template + [[nodiscard]] static NodeID + get_adjacent_node(const Container &neighborhood, const NodeID num_neighbor) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kIsWeightedNeighbor = std::is_same_v>; - // Store only the first edge for the source node. The degree can be obtained by determining the - // difference between the first edge ids of a node and the next node. Additionally, store the - // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes - // array. - const EdgeID first_edge = _edge; - if constexpr (kIntervalEncoding) { - _compressed_data += marked_varint_encode(first_edge, false, _compressed_data); + if constexpr (kIsWeightedNeighbor) { + return neighborhood[num_neighbor].first; } else { - _compressed_data += varint_encode(first_edge, _compressed_data); + return neighborhood[num_neighbor]; } + } - _edge += degree; + template + [[nodiscard]] static EdgeWeight + get_edge_weight(const Container &neighborhood, const NodeID num_neighbor) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kIsWeightedNeighbor = std::is_same_v>; + static_assert(kIsWeightedNeighbor); - // If high-degree encoding is used then split the neighborhood if the degree crosses a - // threshold. The neighborhood is split into equally sized parts (except possible the last part) - // and each part is encoded independently. Furthermore, the offset at which the part is encoded - // is also stored. - if constexpr (kHighDegreeEncoding) { - const bool split_neighbourhood = degree >= kHighDegreeThreshold; + return neighborhood[num_neighbor].second; + } - if (split_neighbourhood) { - const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); - const NodeID last_part_length = ((degree % kHighDegreePartLength) == 0) - ? kHighDegreePartLength - : (degree % kHighDegreePartLength); + void encode_edge_weight(const EdgeWeight edge_weight, EdgeWeight &prev_edge_weight) { + if (!_has_edge_weights) { + return; + } - uint8_t *part_ptr = _compressed_data; - _compressed_data += sizeof(NodeID) * part_count; + _total_edge_weight += edge_weight; - for (NodeID i = 0; i < part_count; ++i) { - const bool last_part = (i + 1) == part_count; - const NodeID part_length = last_part ? last_part_length : kHighDegreePartLength; + if constexpr (kCompressEdgeWeights) { + const SignedEdgeWeight edge_weight_gap = + edge_weight - static_cast(prev_edge_weight); - auto part_begin = neighbourhood.begin() + i * kHighDegreePartLength; - auto part_end = part_begin + part_length; - - std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; - *((NodeID *)cur_part_ptr) = static_cast(_compressed_data - part_ptr); + signed_varint_encode(edge_weight_gap, &_cur_compressed_edges); + prev_edge_weight = edge_weight; + } else { + _edge_weights[_cur_edge_weight++] = edge_weight; + } + } - using Neighbour = typename Container::value_type; - add_edges(node, nullptr, std::span(part_begin, part_end)); - } + template + void add_edges(const NodeID node, const NodeID num_intervals, Container &neighborhood) { + NodeID degree = neighborhood.size(); + EdgeWeight prev_edge_weight = 0; - _num_high_degree_nodes += 1; - _num_high_degree_parts += part_count; - return offset; - } + if constexpr (kIntervalEncoding) { + const NodeID num_remaining_nodes = + encode_intervals(num_intervals, prev_edge_weight, neighborhood); + degree = num_remaining_nodes; } - add_edges(node, marked_byte, std::forward(neighbourhood)); - return offset; + encode_gaps(node, degree, prev_edge_weight, neighborhood); } - template - void add_edges(const NodeID node, std::uint8_t *marked_byte, Container &&neighbourhood) { - using Neighbour = std::remove_reference_t::value_type; - constexpr bool kHasEdgeWeights = std::is_same_v>; + template + void parse_intervals(const Container &neighborhood, Lambda &&l) const { + const NodeID degree = neighborhood.size(); + if (degree < kIntervalLengthTreshold) { + return; + } - const auto fetch_adjacent_node = [&](const NodeID i) { - if constexpr (kHasEdgeWeights) { - return neighbourhood[i].first; - } else { - return neighbourhood[i]; - } - }; + NodeID interval_len = 1; + NodeID prev_adjacent_node = get_adjacent_node(neighborhood, 0); + for (NodeID i = 1; i < degree; ++i) { + const NodeID adjacent_node = get_adjacent_node(neighborhood, i); - const auto set_adjacent_node = [&](const NodeID i, const NodeID value) { - if constexpr (kHasEdgeWeights) { - neighbourhood[i].first = value; - } else { - neighbourhood[i] = value; + const bool not_successive_increment = prev_adjacent_node + 1 != adjacent_node; + prev_adjacent_node = adjacent_node; + if (not_successive_increment) { + continue; } - }; - EdgeWeight prev_edge_weight = 0; - const auto add_edge_weight = [&](const NodeID i) { - if (!_has_edge_weights) { - return; + interval_len += 1; + if ((i + 1 < degree) && (adjacent_node + 1 == get_adjacent_node(neighborhood, i + 1))) { + continue; } - if constexpr (kHasEdgeWeights) { - const EdgeWeight edge_weight = neighbourhood[i].second; - _total_edge_weight += edge_weight; + if (interval_len >= kIntervalLengthTreshold) { + const NodeID right_extreme = adjacent_node; + const NodeID left_extreme = right_extreme - (interval_len - 1); + l(left_extreme, right_extreme, interval_len, i - (interval_len - 1)); + } - if constexpr (kCompressEdgeWeights) { - const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; + interval_len = 1; + } + } - const std::size_t edge_weight_gap_len = - signed_varint_encode(edge_weight_gap, _compressed_data); - _compressed_data += edge_weight_gap_len; - IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; + template + [[nodiscard]] NodeID count_intervals(const Container &neighborhood) const { + NodeID num_intervals = 0; - prev_edge_weight = edge_weight; - } else { - _edge_weights[_cur_edge_weight++] = edge_weight; - } - } else { - _edge_weights[_cur_edge_weight++] = 1; - _total_edge_weight += 1; - } - }; + parse_intervals(neighborhood, [&](const NodeID, const NodeID, const NodeID, const NodeID) { + num_intervals += 1; + }); - NodeID local_degree = neighbourhood.size(); + return num_intervals; + } - // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at - // least kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i - // and the length j - i + 1. Left extremes are stored using the differences between each left - // extreme and the previous right extreme minus 2 (because there must be at least one integer - // between the end of an interval and the beginning of the next one), except the first left - // extreme, which is stored directly. The lengths are decremented by kIntervalLengthTreshold, - // the minimum length of an interval. - if constexpr (kIntervalEncoding) { - NodeID interval_count = 0; - - // Save the pointer to the interval count and skip the amount of bytes needed to store the - // interval count as we can only determine the amount of intervals after finding all of - // them. - std::uint8_t *interval_count_ptr = _compressed_data; - _compressed_data += sizeof(NodeID); - - if (local_degree >= kIntervalLengthTreshold) { - NodeID interval_len = 1; - NodeID previous_right_extreme = 2; - NodeID prev_adjacent_node = fetch_adjacent_node(0); - - for (NodeID i = 1; i < neighbourhood.size(); ++i) { - const NodeID adjacent_node = fetch_adjacent_node(i); - - if (prev_adjacent_node + 1 == adjacent_node) { - ++interval_len; - - // The interval ends if there are no more nodes or the next node is not the increment of - // the current node. - if (i + 1 == neighbourhood.size() || fetch_adjacent_node(i + 1) != adjacent_node + 1) { - if (interval_len >= kIntervalLengthTreshold) { - const NodeID left_extreme = adjacent_node + 1 - interval_len; - const NodeID left_extreme_gap = left_extreme + 2 - previous_right_extreme; - const NodeID interval_length_gap = interval_len - kIntervalLengthTreshold; - - const std::size_t left_extreme_gap_len = - varint_encode(left_extreme_gap, _compressed_data); - _compressed_data += left_extreme_gap_len; - IF_DBG _num_adjacent_node_bytes += left_extreme_gap_len; - - const std::size_t interval_length_gap_len = - varint_encode(interval_length_gap, _compressed_data); - _compressed_data += interval_length_gap_len; - IF_DBG _num_adjacent_node_bytes += interval_length_gap_len; - - for (NodeID j = 0; j < interval_len; ++j) { - const NodeID k = i + 1 + j - interval_len; - - // Set the adjacent node to a special value, which indicates for the gap encoder - // that the node has been encoded through an interval. - set_adjacent_node(k, std::numeric_limits::max()); - add_edge_weight(k); - } - - previous_right_extreme = adjacent_node; - - local_degree -= interval_len; - interval_count += 1; + template + NodeID encode_intervals( + const NodeID num_intervals, EdgeWeight &prev_edge_weight, Container &neighborhood + ) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kHasEdgeWeights = std::is_same_v>; + + NodeID num_remaining_nodes = neighborhood.size(); + if (num_intervals > 0) { + varint_encode(num_intervals - 1, &_cur_compressed_edges); + _num_intervals += num_intervals; + + NodeID prev_right_extreme = 0; + parse_intervals( + neighborhood, + [&](const NodeID left_extreme, + const NodeID right_extreme, + const NodeID interval_len, + const NodeID index) { + const NodeID left_extreme_gap = left_extreme - prev_right_extreme; + const NodeID interval_len_gap = interval_len - kIntervalLengthTreshold; + + varint_encode(left_extreme_gap, &_cur_compressed_edges); + varint_encode(interval_len_gap, &_cur_compressed_edges); + + prev_right_extreme = right_extreme + 2; + num_remaining_nodes -= interval_len; + for (NodeID i = 0; i < interval_len; ++i) { + const NodeID pos = index + i; + + // Set the adjacent node to a special value, which indicates to the gap encoder + // that the node has been encoded through an interval. + set_adjacent_node(neighborhood, pos, kInvalidNodeID); + + if constexpr (kHasEdgeWeights) { + const EdgeWeight edge_weight = get_edge_weight(neighborhood, pos); + encode_edge_weight(edge_weight, prev_edge_weight); } - - interval_len = 1; } } + ); + } - prev_adjacent_node = adjacent_node; - } - } - - // If intervals have been encoded store the interval count and set the bit in the marked byte - // indicating that interval encoding has been used for the neighbourhood if the marked byte is - // given. Otherwise, fix the amount of bytes stored as we don't store the interval count if no - // intervals have been encoded. - if (marked_byte == nullptr) { - *((NodeID *)interval_count_ptr) = interval_count; - _num_adjacent_node_bytes += sizeof(NodeID); - } else if (interval_count > 0) { - *((NodeID *)interval_count_ptr) = interval_count; - *marked_byte |= 0b01000000; - _num_adjacent_node_bytes += sizeof(NodeID); - } else { - _compressed_data -= sizeof(NodeID); - } + return num_remaining_nodes; + } - if (interval_count > 0) { - _num_interval_nodes += 1; - _num_intervals += interval_count; - } + template + void encode_gaps( + const NodeID node, const NodeID degree, EdgeWeight &prev_edge_weight, Container &neighborhood + ) { + using Neighbor = std::remove_reference_t::value_type; + constexpr bool kHasEdgeWeights = std::is_same_v>; - // If all incident edges have been compressed using intervals then gap encoding cannot be - // applied. - if (local_degree == 0) { - return; - } + if (degree == 0) { + return; } - // Store the remaining adjacent nodes using gap encoding. That is instead of directly storing - // the nodes v_1, v_2, ..., v_{k - 1}, v_k, store the gaps v_1 - u, v_2 - v_1 - 1, ..., v_k - - // v_{k - 1} - 1 between the nodes, where u is the source node. Note that all gaps except the - // first one have to be positive as we sorted the nodes in ascending order. Thus, only for the - // first gap the sign is additionally stored. NodeID i = 0; + while (get_adjacent_node(neighborhood, i) == kInvalidNodeID) { + i += 1; + } + + const NodeID first_adjacent_node = get_adjacent_node(neighborhood, i); + const SignedNodeID first_gap = first_adjacent_node - static_cast(node); + signed_varint_encode(first_gap, &_cur_compressed_edges); + if constexpr (kHasEdgeWeights) { + const EdgeWeight edge_weight = get_edge_weight(neighborhood, i); + encode_edge_weight(edge_weight, prev_edge_weight); + } + + i += 1; + + if constexpr (kRunLengthEncoding) { + VarIntRunLengthEncoder rl_encoder(_cur_compressed_edges); + + NodeID prev_adjacent_node = first_adjacent_node; + while (i < neighborhood.size()) { + const NodeID adjacent_node = get_adjacent_node(neighborhood, i); + if (adjacent_node == kInvalidNodeID) { + i += 1; + continue; + } + + const NodeID gap = adjacent_node - prev_adjacent_node - 1; + prev_adjacent_node = adjacent_node; + + _cur_compressed_edges += rl_encoder.add(gap); + if constexpr (kHasEdgeWeights) { + const EdgeWeight edge_weight = get_edge_weight(neighborhood, i); + encode_edge_weight(edge_weight, prev_edge_weight); + } - // Go to the first adjacent node that has not been encoded through an interval. - if constexpr (kIntervalEncoding) { - while (fetch_adjacent_node(i) == std::numeric_limits::max()) { i += 1; } - } - const NodeID first_adjacent_node = fetch_adjacent_node(i); - const SignedID first_gap = first_adjacent_node - static_cast(node); + rl_encoder.flush(); + return; + } else if constexpr (kStreamVByteEncoding) { + const NodeID num_remaining_gaps = degree - 1; + + if (num_remaining_gaps >= kStreamVByteThreshold) [[likely]] { + if constexpr (kHasEdgeWeights) { + if (_has_edge_weights) { + StreamVByteGapAndWeightEncoder encoder(num_remaining_gaps * 2, _cur_compressed_edges); + + while (i < neighborhood.size()) { + const NodeID adjacent_node = get_adjacent_node(neighborhood, i); + if (adjacent_node == kInvalidNodeID) { + i += 1; + continue; + } - const std::size_t first_gap_len = signed_varint_encode(first_gap, _compressed_data); - _compressed_data += first_gap_len; - IF_DBG _num_adjacent_node_bytes += first_gap_len; + const EdgeWeight weight = get_edge_weight(neighborhood, i); + _cur_compressed_edges += encoder.add(adjacent_node); + _cur_compressed_edges += encoder.add(weight); - add_edge_weight(i); - i += 1; + i += 1; + } - const auto encode_gaps = [&](const auto &&encode_gap) { - NodeID prev_adjacent_node = first_adjacent_node; - while (i < neighbourhood.size()) { - const NodeID adjacent_node = fetch_adjacent_node(i); + encoder.flush(); + return; + } + } - // Skip the adjacent node if it has been encoded through an interval. - if constexpr (kIntervalEncoding) { - if (adjacent_node == std::numeric_limits::max()) { - i += 1; + StreamVByteGapEncoder encoder(num_remaining_gaps, _cur_compressed_edges); + while (i < neighborhood.size()) { + const NodeID adjacent_node = get_adjacent_node(neighborhood, i++); + if (adjacent_node == kInvalidNodeID) { continue; } + + _cur_compressed_edges += encoder.add(adjacent_node); } - const NodeID gap = adjacent_node - prev_adjacent_node - 1; - encode_gap(gap); - add_edge_weight(i); + encoder.flush(); + return; + } + } - prev_adjacent_node = adjacent_node; + NodeID prev_adjacent_node = first_adjacent_node; + while (i < neighborhood.size()) { + const NodeID adjacent_node = get_adjacent_node(neighborhood, i); + if (adjacent_node == kInvalidNodeID) { i += 1; + continue; } - }; - if constexpr (kRunLengthEncoding) { - VarIntRunLengthEncoder rl_encoder(_compressed_data); - encode_gaps([&](const NodeID gap) { - const std::size_t gap_len = rl_encoder.add(gap); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - }); - rl_encoder.flush(); - } else if constexpr (kStreamEncoding) { - VarIntStreamEncoder sv_encoder(_compressed_data, local_degree - 1); - encode_gaps([&](const NodeID gap) { - const std::size_t gap_len = sv_encoder.add(gap); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - }); - sv_encoder.flush(); - } else { - encode_gaps([&](const NodeID gap) { - const std::size_t gap_len = varint_encode(gap, _compressed_data); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - }); + const NodeID gap = adjacent_node - prev_adjacent_node - 1; + prev_adjacent_node = adjacent_node; + + varint_encode(gap, &_cur_compressed_edges); + if constexpr (kHasEdgeWeights) { + const EdgeWeight edge_weight = get_edge_weight(neighborhood, i); + encode_edge_weight(edge_weight, prev_edge_weight); + } + + i += 1; } } + +private: + heap_profiler::unique_ptr _compressed_edges; + std::uint8_t *_cur_compressed_edges; + std::size_t _compressed_data_max_size; + + bool _has_edge_weights; + EdgeWeight _total_edge_weight; + EdgeID _cur_edge_weight; + StaticArray &_edge_weights; + + EdgeID _cur_edge; + NodeID _max_degree; + + // Graph compression statistics + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; }; } // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_neighborhoods.h b/kaminpar-common/graph-compression/compressed_neighborhoods.h index e6e78c5a..0d085969 100644 --- a/kaminpar-common/graph-compression/compressed_neighborhoods.h +++ b/kaminpar-common/graph-compression/compressed_neighborhoods.h @@ -10,29 +10,76 @@ #include "kaminpar-common/constexpr_utils.h" #include "kaminpar-common/datastructures/compact_static_array.h" #include "kaminpar-common/datastructures/static_array.h" -#include "kaminpar-common/graph-compression/varint_codec.h" -#include "kaminpar-common/graph-compression/varint_run_length_codec.h" -#include "kaminpar-common/graph-compression/varint_stream_codec.h" +#include "kaminpar-common/graph-compression/streamvbyte.h" +#include "kaminpar-common/graph-compression/varint.h" +#include "kaminpar-common/graph-compression/varint_rle.h" #include "kaminpar-common/math.h" #include "kaminpar-common/ranges.h" +#define INVOKE_CALLBACKU(edge, adjacent_node) \ + if constexpr (kNonStoppable) { \ + callback(edge, adjacent_node); \ + } else { \ + const bool stop = callback(edge, adjacent_node); \ + if (stop) [[unlikely]] { \ + return true; \ + } \ + } + +#define INVOKE_CALLBACKW(edge, adjacent_node) \ + EdgeWeight edge_weight; \ + if constexpr (kCompressEdgeWeights) { \ + const SignedEdgeWeight edge_weight_gap = signed_varint_decode(&node_data); \ + edge_weight = static_cast(edge_weight_gap + prev_edge_weight); \ + } else { \ + edge_weight = _edge_weights[edge]; \ + } \ + \ + if constexpr (kNonStoppable) { \ + callback(edge, adjacent_node, edge_weight); \ + } else { \ + const bool stop = callback(edge, adjacent_node, edge_weight); \ + if (stop) [[unlikely]] { \ + return true; \ + } \ + } \ + \ + prev_edge_weight = edge_weight; + +#define INVOKE_CALLBACK(edge, adjacent_node) \ + if constexpr (kHasEdgeWeights) { \ + INVOKE_CALLBACKW(edge, adjacent_node); \ + } else { \ + INVOKE_CALLBACKU(edge, adjacent_node); \ + } + namespace kaminpar { +/*! + * The neighborhoods of a graph, which are stored in compressed format through variable-length + * encoding, gap encoding, interval encoding and high-degree encoding. + * + * @tparam NodeID The type of integer to use to identify a node. + * @tparam EdgeID The type of integer to use to identify an edge. + * @tparam EdgeWeight The type of integer to use for edge weights. + */ template class CompressedNeighborhoods { static_assert(std::numeric_limits::is_integer); static_assert(std::numeric_limits::is_integer); static_assert(std::numeric_limits::is_integer); - struct NeighborhoodHeader { - EdgeID first_edge; - NodeID degree; - bool uses_intervals; - std::size_t length; - }; + using SignedNodeID = std::int64_t; + using SignedEdgeWeight = std::make_signed_t; -public: - using SignedID = std::int64_t; + using StreamVByteGapDecoder = + streamvbyte::StreamVByteDecoder; + + using StreamVByteGapAndWeightsDecoder = + streamvbyte::StreamVByteDecoder; + + static constexpr EdgeWeight kDefaultEdgeWeight = 1; +public: /*! * Whether edge weights are compressed. */ @@ -43,7 +90,7 @@ template class Compresse #endif /*! - * Whether high degree encoding is used. + * Whether high-degree encoding is used. */ #ifdef KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING static constexpr bool kHighDegreeEncoding = true; @@ -57,7 +104,7 @@ template class Compresse static constexpr NodeID kHighDegreeThreshold = 10000; /*! - * The length of a part when splitting the neighbourhood of a high degree + * The length of each part when splitting the neighbourhood of a high degree * node. */ static constexpr NodeID kHighDegreePartLength = 1000; @@ -86,40 +133,47 @@ template class Compresse #endif /*! - * Whether stream encoding is used. + * Whether StreamVByte encoding is used. */ -#ifdef KAMINPAR_COMPRESSION_STREAM_ENCODING - static constexpr bool kStreamEncoding = true; +#ifdef KAMINPAR_COMPRESSION_STREAMVBYTE_ENCODING + static constexpr bool kStreamVByteEncoding = true; #else - static constexpr bool kStreamEncoding = false; + static constexpr bool kStreamVByteEncoding = false; #endif + /*! + * The minimum number of adjacent nodes required to use StreamVByte encoding. + */ + static constexpr NodeID kStreamVByteThreshold = 3; + static_assert( - !kRunLengthEncoding || !kStreamEncoding, - "Either run-length or stream encoding can be used for varints " + !kRunLengthEncoding || !kStreamVByteEncoding, + "Either run-length or StreamVByte encoding can be used for varints " "but not both." ); - /*! - * Whether the isolated nodes of the compressed graph are continuously stored - * at the end of the nodes array. - */ -#ifdef KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION - static constexpr bool kIsolatedNodesSeparation = true; -#else - static constexpr bool kIsolatedNodesSeparation = false; -#endif + static_assert( + !kRunLengthEncoding || !kCompressEdgeWeights, + "Run-length cannot be used together with compressed edge weights." + ); - /** + static_assert( + !kStreamVByteEncoding || !kCompressEdgeWeights || sizeof(NodeID) == sizeof(EdgeWeight), + "StreamVByte together with compressed edge weights can only be used when the node IDs and " + "edge weights have the same width." + ); + + /*! * Constructs a new CompressedNeighborhoods. * - * @param nodes The nodes of the compressed neighborhoods. - * @param compressed_edges The edges and edge weights of the compressed neighborhoods. + * @param nodes The offsets for each node into the compressed edges where the corresponding + * adjacent nodes and edge weights are encoded. + * @param compressed_edges The edges and edge weights in compresed format. * @param edge_weights The edge weights of the graph, which is only used when the graph has edge - * weights and graph compression is disabled. + * weights and edg weight compression is disabled. * @param max_degree The maximum degree of the nodes. * @param num_edges The number of edges. - * @param has_edge_weights Whether edge weights are stored + * @param has_edge_weights Whether edge weights are stored. * @param total_edge_weight The total edge weight. * @param num_high_degree_nodes The number of nodes that have high degree. * @param num_high_degree_parts The total number of parts that result from splitting high degree @@ -155,6 +209,8 @@ template class Compresse KASSERT(kHighDegreeEncoding || _num_high_degree_parts == 0); KASSERT(kIntervalEncoding || _num_interval_nodes == 0); KASSERT(kIntervalEncoding || _num_intervals == 0); + KASSERT(!has_edge_weights || edge_weights.size() == num_edges); + KASSERT(has_edge_weights || edge_weights.empty()); } CompressedNeighborhoods(const CompressedNeighborhoods &) = delete; @@ -163,7 +219,43 @@ template class Compresse CompressedNeighborhoods(CompressedNeighborhoods &&) noexcept = default; CompressedNeighborhoods &operator=(CompressedNeighborhoods &&) noexcept = default; - /** + /*! + * Returns the number of nodes. + * + * @return The number of nodes. + */ + [[nodiscard]] EdgeID num_nodes() const { + return _nodes.size() - 1; + } + + /*! + * Returns the number of edges. + * + * @return The number of edges. + */ + [[nodiscard]] EdgeID num_edges() const { + return _num_edges; + } + + /*! + * Returns whether the edges are weighted. + * + * @return Whether the edges are weighted. + */ + [[nodiscard]] bool has_edge_weights() const { + return _has_edge_weights; + } + + /*! + * Returns the total edge weight. + * + * @return The total edge weight. + */ + [[nodiscard]] EdgeWeight total_edge_weight() const { + return _total_edge_weight; + } + + /*! * Returns the maximum degree of the nodes. * * @return The maximum degree of the nodes. @@ -172,169 +264,133 @@ template class Compresse return _max_degree; } - /** + /*! * Returns the degree of a node. * * @param node The node whose degree is to be returned. * @return The degree of the node. */ [[nodiscard]] NodeID degree(const NodeID node) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + _nodes[node]; - const std::uint8_t *next_node_data = data + _nodes[node + 1]; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) [[unlikely]] { - return 0; - } - - const auto header = decode_header(node, node_data, next_node_data); - return header.degree; + return static_cast(first_invalid_edge(node) - first_edge(node)); } - /** + /*! * Returns incident edges of a nodes. * - * @param node The node whose incident edges is to be returned. + * @param node The node whose incident edges are to be returned. * @return The incident edges of the node. */ [[nodiscard]] IotaRange incident_edges(const NodeID node) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + _nodes[node]; - const std::uint8_t *next_node_data = data + _nodes[node + 1]; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) [[unlikely]] { - return {0, 0}; - } - - const auto header = decode_header(node, node_data, next_node_data); - return {header.first_edge, header.first_edge + header.degree}; + return {first_edge(node), first_invalid_edge(node)}; } - /** - * Decodes a neighborhood and invokes a caller with each adjacent node and corresponding edge - * weight. + /*! + * Decodes the adjacent nodes of a node. * - * @tparam kParallelDecoding Whether to decode the neighborhood in parallel. - * @tparam Lambda The type of the caller to invoke. - * @param u The node whose neighborhood is to be decoded. - * @param l The caller to invoke. + * @tparam The type of callback to invoke with the adjacent nodes. + * @param node The node whose adjacent nodes are to be decoded. + * @param callback The function to invoke with each adjacent node. */ - template - void decode(const NodeID u, Lambda &&l) const { - KASSERT(u < num_nodes()); - constexpr bool kInvokeDirectly = std::is_invocable_v; - - if (_has_edge_weights) [[unlikely]] { - decode_neighborhood(u, std::forward(l)); - } else { - if constexpr (kInvokeDirectly) { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return l(e, v, 1); - }); - } else { - decode_neighborhood(u, [&](auto &&l2) { - l([&](auto &&l3) { l2([&](const EdgeID e, const NodeID v) { return l3(e, v, 1); }); }); - }); - } - } + template void adjacent_nodes(const NodeID node, Callback &&callback) const { + decode_adjacent_nodes(node, std::forward(callback)); } - /** - * Decodes the leading edges of a neighborhood and invokes a caller with each adjacent node and - * corresponding edge weight. + /*! + * Decodes the neighbors of a node. * - * @tparam Lambda The type of the caller to invoke. - * @param u The node whose neighborhood is to be decoded. - * @param max_num_neighbors The number of neighbors to decode. - * @param l The caller to invoke. + * @tparam The type of callback to invoke with the neighbor. + * @param node The node whose neighbors are to be decoded. + * @param callback The function to invoke with each neighbor. */ - template - void decode(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { - KASSERT(u < num_nodes()); - KASSERT(max_num_neighbors > 0); + template void neighbors(const NodeID node, Callback &&callback) const { + decode_neighbors(node, std::forward(callback)); + } - static_assert(std::is_invocable_v); + /*! + * Decodes a part of the neighbors of a node. + * + * @tparam The type of callback to invoke with the neighbor. + * @param node The node whose neighbors are to be decoded. + * @param callback The function to invoke with each neighbor. + */ + template + void neighbors(const NodeID node, const NodeID max_num_neighbors, Callback &&callback) const { + static_assert(std::is_invocable_v); constexpr bool kNonStoppable = - std::is_void_v>; + std::is_void_v>; NodeID num_neighbors_visited = 1; const auto invoke_and_check = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { bool abort = num_neighbors_visited++ >= max_num_neighbors; if constexpr (kNonStoppable) { - l(e, v, w); + callback(e, v, w); } else { - abort |= l(e, v, w); + abort |= callback(e, v, w); } return abort; }; if (_has_edge_weights) [[unlikely]] { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + decode(node, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { return invoke_and_check(e, v, w); }); } else { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return invoke_and_check(e, v, 1); + decode(node, [&](const EdgeID e, const NodeID v) { + return invoke_and_check(e, v, kDefaultEdgeWeight); }); } } - /** - * Restricts the node array to a specific number of nodes. + /*! + * Decodes the adjacent nodes of a node in parallel. * - * @param new_n The new number of nodes. + * @tparam The type of callback to invoke with the adjacent nodes. + * @param node The node whose adjacent nodes are to be decoded. + * @param callback The function to invoke with each adjacent node. */ - void restrict_nodes(const NodeID new_n) { - _nodes.restrict(new_n); + template + void parallel_adjacent_nodes(const NodeID node, Callback &&callback) const { + decode_adjacent_nodes(node, std::forward(callback)); } - /** - * Unrestricts the node array. - */ - void unrestrict_nodes() { - _nodes.unrestrict(); - } - - /** - * Returns the number of nodes. + /*! + * Decodes the neighbors of a node in parallel. * - * @return The number of nodes. + * @tparam The type of callback to invoke with the neighbor. + * @param node The node whose neighbors are to be decoded. + * @param callback The function to invoke with each neighbor. */ - [[nodiscard]] EdgeID num_nodes() const { - return _nodes.size() - 1; + template + void parallel_neighbors(const NodeID node, Callback &&callback) const { + decode_neighbors(node, std::forward(callback)); } - /** - * Returns the number of edges. + /*! + * Restricts the node array to a specific number of nodes. * - * @return The number of edges. + * @param new_num_nodes The new number of nodes. */ - [[nodiscard]] EdgeID num_edges() const { - return _num_edges; + void restrict_nodes(const NodeID new_num_nodes) { + _nodes.restrict(new_num_nodes); } - /** - * Returns whether the edges are weighted. - * - * @return Whether the edges are weighted. + /*! + * Unrestricts the node array. */ - [[nodiscard]] bool has_edge_weights() const { - return _has_edge_weights; + void unrestrict_nodes() { + _nodes.unrestrict(); } - /** - * Returns the total edge weight. + /*! + * Returns the used memory space in bytes. * - * @return The total edge weight. + * @return The used memory space in bytes. */ - [[nodiscard]] EdgeWeight total_edge_weight() const { - return _total_edge_weight; + [[nodiscard]] std::size_t memory_space() const { + return _nodes.memory_space() + _compressed_edges.size() + + _edge_weights.size() * sizeof(EdgeWeight); } /*! @@ -373,17 +429,7 @@ template class Compresse return _num_intervals; } - /** - * Returns the used memory space in bytes. - * - * @return The used memory space in bytes. - */ - [[nodiscard]] std::size_t memory_space() const { - return _nodes.memory_space() + _compressed_edges.size() + - _edge_weights.size() * sizeof(EdgeWeight); - } - - /** + /*! * Returns ownership of the raw node array. * * @return Ownership of the raw node array. @@ -392,7 +438,7 @@ template class Compresse return std::move(_nodes); } - /** + /*! * Returns a reference to the raw node array. * * @return A reference to the raw node array. @@ -401,7 +447,7 @@ template class Compresse return _nodes; } - /** + /*! * Returns a reference to the raw node array. * * @return A reference to the raw node array. @@ -410,7 +456,7 @@ template class Compresse return _nodes; } - /** + /*! * Returns a reference to the raw compressed edges. * * @return A reference to the raw compressed edges. @@ -419,7 +465,7 @@ template class Compresse return _compressed_edges; } - /** + /*! * Returns a reference to the raw edge weights. * * Note that the weights are only valid when edge weight compression is enabled and when the @@ -432,136 +478,169 @@ template class Compresse } private: - CompactStaticArray _nodes; - StaticArray _compressed_edges; - StaticArray _edge_weights; + [[nodiscard]] EdgeID first_edge(const NodeID node) const { + const std::uint8_t *node_data = _compressed_edges.data() + _nodes[node]; - EdgeID _num_edges; - NodeID _max_degree; + if constexpr (kIntervalEncoding) { + const auto [first_edge, _] = marked_varint_decode(node_data); + return first_edge; + } else { + return varint_decode(node_data); + } + } - bool _has_edge_weights; - EdgeWeight _total_edge_weight; + [[nodiscard]] EdgeID first_invalid_edge(const NodeID node) const { + return first_edge(node + 1); + } - std::size_t _num_high_degree_nodes; - std::size_t _num_high_degree_parts; - std::size_t _num_interval_nodes; - std::size_t _num_intervals; + template + void decode_adjacent_nodes(const NodeID node, Callback &&callback) const { + constexpr bool kInvokeDirectly = std::is_invocable_v; -private: - template - void decode_neighborhood(const NodeID node, Lambda &&l) const { - constexpr bool kInvokeDirectly = []() { - if constexpr (kHasEdgeWeights) { - return std::is_invocable_v; + if (_has_edge_weights) [[unlikely]] { + decode(node, [&](const EdgeID, const NodeID v, const EdgeWeight w) { + return callback(v, w); + }); + } else { + if constexpr (kInvokeDirectly) { + decode(node, [&](const EdgeID, const NodeID v) { + return callback(v, kDefaultEdgeWeight); + }); } else { - return std::is_invocable_v; + decode(node, [&](auto &&local_decode) { + callback([&](auto &&actual_callback) { + local_decode([&](const EdgeID, const NodeID v) { + return actual_callback(v, kDefaultEdgeWeight); + }); + }); + }); } - }(); + } + } - const std::uint8_t *data = _compressed_edges.data(); + template + void decode_neighbors(const NodeID node, Callback &&callback) const { + constexpr bool kInvokeDirectly = std::is_invocable_v; + + if (_has_edge_weights) [[unlikely]] { + decode(node, std::forward(callback)); + } else { + if constexpr (kInvokeDirectly) { + decode(node, [&](const EdgeID e, const NodeID v) { + return callback(e, v, kDefaultEdgeWeight); + }); + } else { + decode(node, [&](auto &&local_decode) { + callback([&](auto &&actual_callback) { + local_decode([&](const EdgeID e, const NodeID v) { + return actual_callback(e, v, kDefaultEdgeWeight); + }); + }); + }); + } + } + } + + template + void decode(const NodeID node, Callback &&callback) const { + constexpr bool kInvokeDirectly = std::conditional_t< + kHasEdgeWeights, + std::is_invocable, + std::is_invocable>::value; + const std::uint8_t *data = _compressed_edges.data(); const std::uint8_t *node_data = data + _nodes[node]; const std::uint8_t *next_node_data = data + _nodes[node + 1]; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) [[unlikely]] { + if (node_data == next_node_data) [[unlikely]] { return; } - const auto header = decode_header(node, node_data, next_node_data); - node_data += header.length; + EdgeID edge; + EdgeID last_edge; + bool has_intervals; + if constexpr (kIntervalEncoding) { + const auto header = marked_varint_decode(&node_data); + edge = header.first; + has_intervals = header.second; + last_edge = marked_varint_decode(next_node_data).first; + } else { + edge = varint_decode(&node_data); + last_edge = varint_decode(next_node_data); + } if constexpr (kHighDegreeEncoding) { - if (header.degree >= kHighDegreeThreshold) { - decode_parts( - node_data, node, header.degree, header.first_edge, std::forward(l) + const NodeID degree = static_cast(last_edge - edge); + const bool split_neighbourhood = degree >= kHighDegreeThreshold; + + if (split_neighbourhood) [[unlikely]] { + decode_parts( + node_data, node, degree, edge, last_edge, std::forward(callback) ); return; } } - invoke_indirect(std::forward(l), [&](auto &&l2) { + invoke_indirect(std::forward(callback), [&](auto &&actual_callback) { decode_edges( node_data, node, - header.degree, - header.first_edge, - header.uses_intervals, - std::forward(l2) + edge, + last_edge, + has_intervals, + std::forward(actual_callback) ); }); } - [[nodiscard]] NeighborhoodHeader decode_header( - const NodeID node, - const std::uint8_t *const node_data, - const std::uint8_t *const next_node_data - ) const { - const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { - if constexpr (kIntervalEncoding) { - const auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); - const auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); - - return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); - } else { - const auto [first_edge, len] = varint_decode(node_data); - const auto [next_first_edge, _] = varint_decode(next_node_data); - - return std::make_tuple(first_edge, next_first_edge, false, len); - } - }(); - - if constexpr (kIsolatedNodesSeparation) { - const EdgeID ungapped_first_edge = first_edge + node; - const NodeID degree = static_cast(1 + next_first_edge - first_edge); - return {ungapped_first_edge, degree, uses_intervals, len}; - } else { - const NodeID degree = static_cast(next_first_edge - first_edge); - return {first_edge, degree, uses_intervals, len}; - } - } - - template + template void decode_parts( - const std::uint8_t *data, + const std::uint8_t *node_data, const NodeID node, const NodeID degree, const EdgeID edge, - Lambda &&l + const EdgeID last_edge, + Callback &&callback ) const { - constexpr bool kInvokeDirectly = []() { - if constexpr (kHasEdgeWeights) { - return std::is_invocable_v; - } else { - return std::is_invocable_v; - } - }(); - - const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); + constexpr bool kInvokeDirectly = std::conditional_t< + kHasEdgeWeights, + std::is_invocable, + std::is_invocable>::value; - const auto iterate_part = [&](const NodeID part) { - const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); - const std::uint8_t *part_data = data + part_offset; + const NodeID num_parts = math::div_ceil(degree, kHighDegreePartLength); + const auto decode_part = [&](const NodeID part) { + NodeID part_offset = *(reinterpret_cast(node_data) + part); - const NodeID part_count_m1 = part_count - 1; - const bool last_part = part == part_count_m1; + bool has_intervals; + if constexpr (kIntervalEncoding) { + has_intervals = math::is_msb_set(part_offset); + part_offset &= ~math::kSetMSB; + } const EdgeID part_edge = edge + kHighDegreePartLength * part; - const NodeID part_degree = - last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; - - return invoke_indirect2(std::forward(l), [&](auto &&l2) { - return decode_edges( - part_data, node, part_degree, part_edge, true, std::forward(l2) - ); - }); + const EdgeID part_last_edge = + ((part + 1) == num_parts) ? last_edge : part_edge + kHighDegreePartLength; + + const std::uint8_t *part_data = node_data + part_offset; + return invoke_indirect2( + std::forward(callback), + [&](auto &&actual_callback) { + return decode_edges( + part_data, + node, + part_edge, + part_last_edge, + has_intervals, + std::forward(actual_callback) + ); + } + ); }; - if constexpr (kParallelDecoding) { - tbb::parallel_for(0, part_count, iterate_part); + if constexpr (kParallel) { + tbb::parallel_for(0, num_parts, decode_part); } else { - for (NodeID part = 0; part < part_count; ++part) { - const bool stop = iterate_part(part); + for (NodeID part = 0; part < num_parts; ++part) { + const bool stop = decode_part(part); if (stop) [[unlikely]] { return; } @@ -569,189 +648,152 @@ template class Compresse } } - template + template bool decode_edges( - const std::uint8_t *data, + const std::uint8_t *node_data, const NodeID node, - const NodeID degree, EdgeID edge, - bool uses_intervals, - Lambda &&l + const EdgeID last_edge, + const bool has_intervals, + Callback &&callback ) const { - const EdgeID max_edge = edge + degree; - EdgeWeight prev_edge_weight = 0; + using CallbackReturnType = std::conditional_t< + kHasEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + EdgeWeight prev_edge_weight = 0; if constexpr (kIntervalEncoding) { - if (uses_intervals) { - const bool stop = decode_intervals( - data, edge, prev_edge_weight, std::forward(l) - ); - if (stop) [[unlikely]] { - return true; - } + if (has_intervals) { + NodeID num_intervals = varint_decode(&node_data) + 1; + NodeID prev_right_extreme = 0; - if (edge == max_edge) [[unlikely]] { - return false; - } - } - } + do { + const NodeID left_extreme_gap = varint_decode(&node_data); + const NodeID length_gap = varint_decode(&node_data); - return decode_gaps( - data, node, edge, prev_edge_weight, max_edge, std::forward(l) - ); - } + const NodeID left_extreme = left_extreme_gap + prev_right_extreme; + const NodeID length = length_gap + kIntervalLengthTreshold; + prev_right_extreme = left_extreme + (length - 1) + 2; - template - bool decode_intervals( - const std::uint8_t *&data, EdgeID &edge, EdgeWeight &prev_edge_weight, Lambda &&l - ) const { - using LambdaReturnType = std::conditional_t< - kHasEdgeWeights, - std::invoke_result, - std::invoke_result>::type; - constexpr bool kNonStoppable = std::is_void_v; + static_assert(kIntervalLengthTreshold == 3, "Optimized for length threshold = 3."); + INVOKE_CALLBACK(edge, left_extreme); + INVOKE_CALLBACK(edge + 1, left_extreme + 1); + INVOKE_CALLBACK(edge + 2, left_extreme + 2); + edge += kIntervalLengthTreshold; + + for (NodeID j = kIntervalLengthTreshold; j < length; ++j) { + const NodeID adjacent_node = left_extreme + j; - const auto invoke_caller = [&](const NodeID adjacent_node) { - if constexpr (kHasEdgeWeights) { - if constexpr (kCompressEdgeWeights) { - const auto [edge_weight_gap, length] = signed_varint_decode(data); - data += length; + INVOKE_CALLBACK(edge, adjacent_node); + edge += 1; + } - const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; - prev_edge_weight = edge_weight; + num_intervals -= 1; + } while (num_intervals > 0); - return l(edge, adjacent_node, edge_weight); - } else { - return l(edge, adjacent_node, _edge_weights[edge]); + if (edge == last_edge) [[unlikely]] { + return false; } - } else { - return l(edge, adjacent_node); } - }; + } - const NodeID interval_count = *((NodeID *)data); - data += sizeof(NodeID); + const SignedNodeID first_gap = signed_varint_decode(&node_data); + const NodeID first_adjacent_node = static_cast(first_gap + node); + INVOKE_CALLBACK(edge, first_adjacent_node); + edge += 1; - NodeID previous_right_extreme = 2; - for (NodeID i = 0; i < interval_count; ++i) { - const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); - data += left_extreme_gap_len; + if constexpr (kRunLengthEncoding) { + const NodeID num_remaining_gaps = static_cast(last_edge - edge); + VarIntRunLengthDecoder rl_decoder(num_remaining_gaps, node_data); - const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); - data += interval_length_gap_len; + bool stop = false; + NodeID prev_adjacent_node = first_adjacent_node; + rl_decoder.decode([&](const NodeID gap) { + const NodeID adjacent_node = gap + prev_adjacent_node + 1; + prev_adjacent_node = adjacent_node; - const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; - const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; - previous_right_extreme = cur_left_extreme + cur_interval_len - 1; + if constexpr (kHasEdgeWeights) { + EdgeWeight edge_weight = _edge_weights[edge]; - for (NodeID j = 0; j < cur_interval_len; ++j) { - if constexpr (kNonStoppable) { - invoke_caller(cur_left_extreme + j); + if constexpr (kNonStoppable) { + callback(edge++, adjacent_node, edge_weight); + } else { + stop = callback(edge++, adjacent_node, edge_weight); + return stop; + } } else { - const bool stop = invoke_caller(cur_left_extreme + j); - if (stop) [[unlikely]] { - return true; + if constexpr (kNonStoppable) { + callback(edge++, adjacent_node); + } else { + stop = callback(edge++, adjacent_node); + return stop; } } + }); - edge += 1; - } - } - - return false; - } - - template - bool decode_gaps( - const std::uint8_t *data, - NodeID node, - EdgeID &edge, - EdgeWeight &prev_edge_weight, - const EdgeID max_edge, - Lambda &&l - ) const { - using LambdaReturnType = std::conditional_t< - kHasEdgeWeights, - std::invoke_result, - std::invoke_result>::type; - constexpr bool kNonStoppable = std::is_void_v; - - const auto invoke_caller = [&](const NodeID adjacent_node) { - if constexpr (kHasEdgeWeights) { - if constexpr (kCompressEdgeWeights) { - const auto [edge_weight_gap, length] = signed_varint_decode(data); - data += length; - - const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; - prev_edge_weight = edge_weight; - return l(edge, adjacent_node, edge_weight); + return stop; + } else if constexpr (kStreamVByteEncoding) { + const NodeID num_remaining_gaps = static_cast(last_edge - edge); + + if (num_remaining_gaps >= kStreamVByteThreshold) { + bool stop = false; + + if constexpr (kHasEdgeWeights) { + StreamVByteGapAndWeightsDecoder decoder(num_remaining_gaps * 2, node_data); + decoder.decode([&](const NodeID adjacent_node, const EdgeWeight edge_weight) { + if constexpr (kNonStoppable) { + callback(edge++, adjacent_node, edge_weight); + } else { + stop = callback(edge++, adjacent_node, edge_weight); + return stop; + } + }); } else { - return l(edge, adjacent_node, _edge_weights[edge]); + StreamVByteGapDecoder decoder(num_remaining_gaps, node_data); + decoder.decode([&](const NodeID adjacent_node) { + if constexpr (kNonStoppable) { + callback(edge++, adjacent_node); + } else { + stop = callback(edge++, adjacent_node); + return stop; + } + }); } - } else { - return l(edge, adjacent_node); - } - }; - - const auto [first_gap, first_gap_len] = signed_varint_decode(data); - data += first_gap_len; - const NodeID first_adjacent_node = static_cast(first_gap + node); - NodeID prev_adjacent_node = first_adjacent_node; - - if constexpr (kNonStoppable) { - invoke_caller(first_adjacent_node); - } else { - const bool stop = invoke_caller(first_adjacent_node); - if (stop) [[unlikely]] { - return true; + return stop; } } - edge += 1; - const auto handle_gap = [&](const NodeID gap) { + NodeID prev_adjacent_node = first_adjacent_node; + while (edge < last_edge) { + const NodeID gap = varint_decode(&node_data); const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; - if constexpr (kNonStoppable) { - invoke_caller(adjacent_node); - edge += 1; - } else { - const bool stop = invoke_caller(adjacent_node); - edge += 1; - return stop; - } - }; + INVOKE_CALLBACK(edge, adjacent_node); + prev_adjacent_node = adjacent_node; + edge += 1; + } - if constexpr (kRunLengthEncoding) { - VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); - rl_decoder.decode(handle_gap); - } else if constexpr (kStreamEncoding) { - VarIntStreamDecoder sv_encoder(data, max_edge - edge); - sv_encoder.decode(handle_gap); - } else { - while (edge != max_edge) { - const auto [gap, gap_len] = varint_decode(data); - data += gap_len; + return false; + } - const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; +private: + CompactStaticArray _nodes; + StaticArray _compressed_edges; + StaticArray _edge_weights; - if constexpr (kNonStoppable) { - invoke_caller(adjacent_node); - } else { - const bool stop = invoke_caller(adjacent_node); - if (stop) [[unlikely]] { - return true; - } - } + EdgeID _num_edges; + NodeID _max_degree; - edge += 1; - } - } + bool _has_edge_weights; + EdgeWeight _total_edge_weight; - return false; - } + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; }; } // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h b/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h index 229f10e8..a9e584e9 100644 --- a/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h +++ b/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h @@ -89,14 +89,14 @@ class CompressedNeighborhoodsBuilder { const EdgeID last_edge = _num_edges; std::uint8_t *compressed_edges_end = compressed_edges.get() + compressed_edges_size; if constexpr (CompressedNeighborhoods::kIntervalEncoding) { - compressed_edges_size += marked_varint_encode(last_edge, false, compressed_edges_end); + marked_varint_encode(last_edge, false, &compressed_edges_end); } else { - compressed_edges_size += varint_encode(last_edge, compressed_edges_end); + varint_encode(last_edge, &compressed_edges_end); } // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. - if constexpr (CompressedNeighborhoods::kStreamEncoding) { + if constexpr (CompressedNeighborhoods::kStreamVByteEncoding) { compressed_edges_size += 15; } @@ -257,14 +257,14 @@ class ParallelCompressedNeighborhoodsBuilder { std::uint8_t *_compressed_edges_end = _compressed_edges.get() + _compressed_edges_size; const EdgeID last_edge = _num_edges; if constexpr (CompressedNeighborhoods::kIntervalEncoding) { - _compressed_edges_size += marked_varint_encode(last_edge, false, _compressed_edges_end); + marked_varint_encode(last_edge, false, &_compressed_edges_end); } else { - _compressed_edges_size += varint_encode(last_edge, _compressed_edges_end); + varint_encode(last_edge, &_compressed_edges_end); } // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. - if constexpr (CompressedNeighborhoods::kStreamEncoding) { + if constexpr (CompressedNeighborhoods::kStreamVByteEncoding) { _compressed_edges_size += 15; } @@ -281,7 +281,7 @@ class ParallelCompressedNeighborhoodsBuilder { _max_degree, _num_edges, _has_edge_weights, - _total_edge_weight, + _has_edge_weights ? _total_edge_weight : _num_edges, _num_high_degree_nodes, _num_high_degree_parts, _num_interval_nodes, diff --git a/kaminpar-common/graph-compression/streamvbyte.h b/kaminpar-common/graph-compression/streamvbyte.h new file mode 100644 index 00000000..3d0d3f9c --- /dev/null +++ b/kaminpar-common/graph-compression/streamvbyte.h @@ -0,0 +1,899 @@ +/******************************************************************************* + * Endoder and decoder for StreamVByte. + * + * @file: streamvbyte.h + * @author: Daniel Salwasser + * @date: 29.12.2023 + ******************************************************************************/ +#pragma once + +#include +#include +#include +#include +#include + +#if defined(__x86_64__) +#include +#elif defined(__aarch64__) +#include +#endif + +#include "kaminpar-common/constexpr_utils.h" +#include "kaminpar-common/math.h" + +namespace kaminpar::streamvbyte { + +enum class DifferentialCodingKind { + NONE, + D1, + D2, + DM, + D4, +}; + +template +class StreamVByteEncoder { + static constexpr std::size_t kIntByteWidth = sizeof(Int); + static_assert( + kIntByteWidth == 4 || kIntByteWidth == 8, + "StreamVByte only supports 32-bit or 64-bit integers." + ); + + [[nodiscard]] static std::size_t required_byte_width(const Int value) { + if constexpr (kIntByteWidth == 4) { + return math::byte_width(value); + } else if constexpr (kIntByteWidth == 8) { + switch (math::byte_width(value)) { + case 1: + return 1; + case 2: + return 2; + case 3: + [[fallthrough]]; + case 4: + return 4; + case 5: + [[fallthrough]]; + case 6: + [[fallthrough]]; + case 7: + [[fallthrough]]; + case 8: + return 8; + default: + __builtin_unreachable(); + } + } else { + static_assert("Unexpected integer width."); + } + } + + [[nodiscard]] static std::uint8_t encoded_byte_width(const Int value) { + if constexpr (kIntByteWidth == 4) { + return required_byte_width(value) - 1; + } else if constexpr (kIntByteWidth == 8) { + switch (required_byte_width(value)) { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + default: + __builtin_unreachable(); + } + } else { + static_assert("Unexpected integer width."); + } + } + +public: + explicit StreamVByteEncoder(const std::size_t num_values, std::uint8_t *ptr) + : _num_values(num_values), + _control_bytes_ptr(ptr), + _data_ptr(ptr + math::div_ceil(num_values, 4)), + _num_buffered(0), + _prev_value(0), + _prev2_value(0), + _prev3_value(0), + _prev4_value(0), + _prev_max_value(0), + _next_max_value(0) { + std::fill(std::begin(_buffer), std::end(_buffer), 0); + } + + std::size_t add(Int value) { + if constexpr (GapKind == DifferentialCodingKind::D1) { + const Int next_prev_value = value; + value = value - _prev_value; + + _prev_value = next_prev_value; + } else if constexpr (GapKind == DifferentialCodingKind::D2) { + const Int next_prev_value = value; + value = value - _prev2_value; + + _prev2_value = _prev_value; + _prev_value = next_prev_value; + } else if constexpr (GapKind == DifferentialCodingKind::DM) { + _next_max_value = std::max(_prev_max_value, value); + value = value - _prev_max_value; + } else if constexpr (GapKind == DifferentialCodingKind::D4) { + const Int next_prev_value = value; + value = value - _prev4_value; + + _prev4_value = _prev3_value; + _prev3_value = _prev2_value; + _prev2_value = _prev_value; + _prev_value = next_prev_value; + } + + _buffer[_num_buffered] = value; + + if (_num_buffered == 3) { + if constexpr (GapKind == DifferentialCodingKind::DM) { + _prev_max_value = _next_max_value; + _next_max_value = 0; + } + + unchecked_flush(); + return required_byte_width(value); + } + + const bool first_element = _num_buffered++ == 0; + return required_byte_width(value) + (first_element ? 1 : 0); + } + + std::uint8_t *flush() { + if (_num_buffered > 0) [[likely]] { + unchecked_flush(_num_buffered); + } + + return _data_ptr; + } + +private: + std::size_t _num_values; + std::uint8_t *_control_bytes_ptr; + std::uint8_t *_data_ptr; + + std::size_t _num_buffered; + std::array _buffer; + + Int _prev_value; + Int _prev2_value; + Int _prev3_value; + Int _prev4_value; + + Int _prev_max_value; + Int _next_max_value; + +private: + void unchecked_flush(const std::size_t num_values = 4) { + const std::uint8_t control_byte = + (encoded_byte_width(_buffer[3]) << 6) | (encoded_byte_width(_buffer[2]) << 4) | + (encoded_byte_width(_buffer[1]) << 2) | encoded_byte_width(_buffer[0]); + *_control_bytes_ptr++ = control_byte; + + for (std::size_t i = 0; i < num_values; ++i) { + Int value = _buffer[i]; + + do { + *_data_ptr++ = static_cast(value); + value >>= 8; + } while (value > 0); + + if constexpr (kIntByteWidth == 8) { + std::size_t num_padding_bytes = required_byte_width(value) - math::byte_width(value); + while (num_padding_bytes > 0) { + *_data_ptr++ = static_cast(0); + num_padding_bytes -= 1; + } + } + } + + _num_buffered = 0; + std::fill(std::begin(_buffer), std::end(_buffer), 0); + } +}; + +template < + std::integral Int, + bool PassPairs = false, + DifferentialCodingKind GapKind = DifferentialCodingKind::NONE> +class StreamVByteDecoder { + static constexpr std::size_t kIntByteWidth = sizeof(Int); + static_assert( + kIntByteWidth == 4 || kIntByteWidth == 8, + "StreamVByte only supports 32-bit or 64-bit integers." + ); + + static constexpr bool k32BitInts = kIntByteWidth == 4; + using LengthTable = + std::conditional_t, std::array>; + using ShuffleTable = std::conditional_t< + k32BitInts, + std::array, 256>, + std::array, 16>>; + + [[nodiscard]] static consteval LengthTable create_length_table() { + LengthTable length_table{}; + + if constexpr (k32BitInts) { + constexpr_for<256>([&](const std::uint8_t control_byte) { + length_table[control_byte] = 0; + + constexpr_for<4>([&](const std::uint8_t i) { + const std::uint8_t header = (control_byte >> (2 * i)) & 0b11; + const std::uint8_t length = header + 1; + length_table[control_byte] += length; + }); + }); + } else { + const auto actual_length = [&](const std::uint8_t header) { + switch (header) { + case 0: + return 1; + case 1: + return 2; + case 2: + return 4; + case 3: + return 8; + default: + __builtin_unreachable(); + } + }; + + constexpr_for<16>([&](const std::uint8_t control_byte) { + length_table[control_byte] = 0; + + constexpr_for<2>([&](const std::uint8_t i) { + const std::uint8_t header = (control_byte >> (2 * i)) & 0b11; + const std::uint8_t length = actual_length(header); + length_table[control_byte] += length; + }); + }); + } + + return length_table; + } + + [[nodiscard]] static consteval ShuffleTable create_shuffle_table() { + ShuffleTable shuffle_table{}; + + if constexpr (k32BitInts) { + constexpr_for<256>([&](const std::uint8_t control_byte) { + std::uint8_t byte = 0; + std::uint8_t pos = 0; + + constexpr_for<4>([&](const std::uint8_t i) { + const std::uint8_t header = (control_byte >> (2 * i)) & 0b11; + const std::uint8_t length = header + 1; + + std::uint8_t j = 0; + while (j < length) { + shuffle_table[control_byte][pos++] = byte++; + j += 1; + } + + while (j < 4) { + shuffle_table[control_byte][pos++] = 0b11111111; + j += 1; + } + }); + }); + } else { + const auto actual_length = [&](const std::uint8_t value) { + switch (value) { + case 0: + return 1; + case 1: + return 2; + case 2: + return 4; + case 3: + return 8; + default: + __builtin_unreachable(); + } + }; + + constexpr_for<16>([&](const std::uint8_t control_byte) { + std::uint8_t byte = 0; + std::uint8_t pos = 0; + + constexpr_for<2>([&](const std::uint8_t i) { + const std::uint8_t header = (control_byte >> (2 * i)) & 0b11; + const std::uint8_t length = actual_length(header); + + std::uint8_t j = 0; + while (j < length) { + shuffle_table[control_byte][pos++] = byte++; + j += 1; + } + + while (j < 8) { + shuffle_table[control_byte][pos++] = 0b11111111; + j += 1; + } + }); + }); + } + + return shuffle_table; + } + + static constexpr const LengthTable kLengthTable = create_length_table(); + static constexpr const ShuffleTable kShuffleTable = create_shuffle_table(); + +public: + explicit StreamVByteDecoder(const std::size_t num_values, const std::uint8_t *ptr) + : _num_control_bytes(num_values / 4), + _control_bytes_ptr(ptr), + _num_values(num_values), + _data_ptr(ptr + _num_control_bytes + ((num_values % 4) != 0)) {} + + template void decode(Lambda &&l) { + if constexpr (k32BitInts) { + decode32(std::forward(l)); + } else { + decode64(std::forward(l)); + } + } + + [[nodiscard]] const std::uint8_t *get() { + return _data_ptr; + } + +private: +#if defined(__x86_64__) + template void decode32(Lambda &&l) { + static_assert(std::is_invocable_v || PassPairs && std::is_invocable_v); + + using LambdaReturnType = std::conditional_t< + PassPairs, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + __m128i prev = _mm_setzero_si128(); + const auto decode_gaps = [&](__m128i data) { + if constexpr (GapKind == DifferentialCodingKind::NONE) { + prev = data; + return; + } + + if constexpr (GapKind == DifferentialCodingKind::D1) { + const __m128i temp = _mm_add_epi32(_mm_slli_si128(data, 8), data); + prev = _mm_add_epi32( + _mm_add_epi32(_mm_slli_si128(temp, 4), temp), _mm_shuffle_epi32(prev, 0xff) + ); + } else if constexpr (GapKind == DifferentialCodingKind::D2) { + prev = _mm_add_epi32( + _mm_add_epi32(_mm_slli_si128(data, 8), data), + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 2, 3, 2)) + ); + } else if constexpr (GapKind == DifferentialCodingKind::DM) { + prev = _mm_add_epi32(data, _mm_shuffle_epi32(prev, 0xff)); + } else if constexpr (GapKind == DifferentialCodingKind::D4) { + prev = _mm_add_epi32(data, prev); + } else { + static_assert("Unexpected differential coding kind."); + } + }; + + for (std::size_t i = 0; i < _num_control_bytes; ++i) { + const std::uint8_t control_byte = _control_bytes_ptr[i]; + const std::uint8_t length = kLengthTable[control_byte]; + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + _data_ptr += length; + + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + data = _mm_shuffle_epi8(data, mask); + decode_gaps(data); + + if constexpr (kNonStoppable) { + if constexpr (PassPairs) { + l(_mm_extract_epi32(prev, 0), _mm_extract_epi32(prev, 1)); + l(_mm_extract_epi32(prev, 2), _mm_extract_epi32(prev, 3)); + } else { + l(_mm_extract_epi32(prev, 0)); + l(_mm_extract_epi32(prev, 1)); + l(_mm_extract_epi32(prev, 2)); + l(_mm_extract_epi32(prev, 3)); + } + } else { + if constexpr (PassPairs) { + if (l(_mm_extract_epi32(prev, 0), _mm_extract_epi32(prev, 1))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 2), _mm_extract_epi32(prev, 3))) [[unlikely]] { + return; + } + } else { + if (l(_mm_extract_epi32(prev, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 1))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 2))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 3))) [[unlikely]] { + return; + } + } + } + } + + if constexpr (PassPairs) { + if (_num_values % 4 == 2) { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t length = kLengthTable[control_byte]; + + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + _data_ptr += length - 2; + + data = _mm_shuffle_epi8(data, mask); + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi32(prev, 0), _mm_extract_epi32(prev, 1)); + } else { + if (l(_mm_extract_epi32(prev, 0), _mm_extract_epi32(prev, 1))) [[unlikely]] { + return; + } + } + } + } else { + switch (_num_values % 4) { + case 1: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + data = _mm_shuffle_epi8(data, mask); + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi32(prev, 0)); + } else { + if (l(_mm_extract_epi32(prev, 0))) [[unlikely]] { + return; + } + } + break; + } + case 2: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + data = _mm_shuffle_epi8(data, mask); + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi32(prev, 0)); + l(_mm_extract_epi32(prev, 1)); + } else { + if (l(_mm_extract_epi32(prev, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 1))) [[unlikely]] { + return; + } + } + break; + } + case 3: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + data = _mm_shuffle_epi8(data, mask); + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi32(prev, 0)); + l(_mm_extract_epi32(prev, 1)); + l(_mm_extract_epi32(prev, 2)); + } else { + if (l(_mm_extract_epi32(prev, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 1))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi32(prev, 2))) [[unlikely]] { + return; + } + } + break; + } + } + } + } +#elif defined(__aarch64__) + template void decode32(Lambda &&l) { + static_assert(std::is_invocable_v || PassPairs && std::is_invocable_v); + + using LambdaReturnType = std::conditional_t< + PassPairs, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + uint32x4_t prev = vmovq_n_u32(0); + const auto decode_gaps = [&](__m128i data) { + if constexpr (GapKind == DifferentialCodingKind::NONE) { + prev = data; + return; + } else { + static_assert("Unsupported differential coding kind (ARM)."); + } + }; + + for (std::size_t i = 0; i < _num_control_bytes; ++i) { + const std::uint8_t control_byte = _control_bytes_ptr[i]; + const std::uint8_t length = kLengthTable[control_byte]; + + uint32x4_t data = vld1q_u32(reinterpret_cast(_data_ptr)); + _data_ptr += length; + + const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); + data = vqtbl1q_u8(data, shuffle_mask); + + decode_gaps(data); + + if constexpr (kNonStoppable) { + if constexpr (PassPairs) { + l(vgetq_lane_u32(prev, 0), vgetq_lane_u32(prev, 1)); + l(vgetq_lane_u32(prev, 2), vgetq_lane_u32(prev, 3)); + } else { + l(vgetq_lane_u32(prev, 0)); + l(vgetq_lane_u32(prev, 1)); + l(vgetq_lane_u32(prev, 2)); + l(vgetq_lane_u32(prev, 3)); + } + } else { + if constexpr (PassPairs) { + if (l(vgetq_lane_u32(prev, 0), vgetq_lane_u32(prev, 1))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 2), vgetq_lane_u32(prev, 3))) [[unlikely]] { + return; + } + } else { + if (l(vgetq_lane_u32(prev, 0))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 1))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 2))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 3))) [[unlikely]] { + return; + } + } + } + } + + if constexpr (PassPairs) { + if (_num_values % 4 == 2) { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t length = kLengthTable[control_byte]; + + uint32x4_t data = vld1q_u32(reinterpret_cast(_data_ptr)); + _data_ptr += length; + + const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); + data = vqtbl1q_u8(data, shuffle_mask); + + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(vgetq_lane_u32(prev, 0), vgetq_lane_u32(prev, 1)); + } else { + if (l(vgetq_lane_u32(prev, 0), vgetq_lane_u32(prev, 1))) [[unlikely]] { + return; + } + } + } + } else { + switch (_num_values % 4) { + case 1: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + uint32x4_t data = vld1q_u32(reinterpret_cast(_data_ptr)); + _data_ptr += length; + + const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); + data = vqtbl1q_u8(data, shuffle_mask); + + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(vgetq_lane_u32(prev, 0)); + } else { + if (l(vgetq_lane_u32(prev, 0))) [[unlikely]] { + return; + } + } + break; + } + case 2: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + uint32x4_t data = vld1q_u32(reinterpret_cast(_data_ptr)); + _data_ptr += length; + + const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); + data = vqtbl1q_u8(data, shuffle_mask); + + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(vgetq_lane_u32(prev, 0)); + l(vgetq_lane_u32(prev, 1)); + } else { + if (l(vgetq_lane_u32(prev, 0))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 1))) [[unlikely]] { + return; + } + } + break; + } + case 3: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); + + uint32x4_t data = vld1q_u32(reinterpret_cast(_data_ptr)); + _data_ptr += length; + + const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); + data = vqtbl1q_u8(data, shuffle_mask); + + decode_gaps(data); + + if constexpr (kNonStoppable) { + l(vgetq_lane_u32(prev, 0)); + l(vgetq_lane_u32(prev, 1)); + l(vgetq_lane_u32(prev, 2)); + } else { + if (l(vgetq_lane_u32(prev, 0))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 1))) [[unlikely]] { + return; + } + + if (l(vgetq_lane_u32(prev, 2))) [[unlikely]] { + return; + } + } + break; + } + } + } + } +#else +#error "Only x64 and ARM are supported" +#endif + +#if defined(__x86_64__) + template void decode64(Lambda &&l) { + static_assert(std::is_invocable_v); + constexpr bool kNonStoppable = std::is_void_v>; + + __m128i prev = _mm_setzero_si128(); + const auto decode_gaps = [&](__m128i data) { + if constexpr (GapKind == DifferentialCodingKind::NONE) { + prev = data; + return; + } + + if constexpr (GapKind == DifferentialCodingKind::D1) { + const __m128i temp = _mm_add_epi64(_mm_slli_si128(data, 8), data); + prev = _mm_add_epi32( + _mm_add_epi32(_mm_slli_si128(temp, 4), temp), _mm_shuffle_epi32(prev, 0xff) + ); + } else if constexpr (GapKind == DifferentialCodingKind::D2) { + prev = _mm_add_epi32( + _mm_add_epi32(_mm_slli_si128(data, 8), data), + _mm_shuffle_epi32(prev, _MM_SHUFFLE(3, 2, 3, 2)) + ); + } else if constexpr (GapKind == DifferentialCodingKind::DM) { + prev = _mm_add_epi32(data, _mm_shuffle_epi32(prev, 0xff)); + } else if constexpr (GapKind == DifferentialCodingKind::D4) { + prev = _mm_add_epi32(data, prev); + } else { + static_assert("Unexpected differential coding kind."); + } + }; + + for (std::size_t i = 0; i < _num_control_bytes; ++i) { + const std::uint8_t control_byte = _control_bytes_ptr[i]; + const std::uint8_t control_byte_lh = control_byte & 0b1111; + const std::uint8_t control_byte_uh = control_byte >> 4; + + const std::uint8_t length1 = kLengthTable[control_byte_lh]; + const std::uint8_t length2 = kLengthTable[control_byte_uh]; + + __m128i data1 = _mm_loadu_si128((const __m128i *)_data_ptr); + _data_ptr += length1; + + __m128i data2 = _mm_loadu_si128((const __m128i *)_data_ptr); + _data_ptr += length2; + + const std::uint8_t *shuffle_mask1 = kShuffleTable[control_byte_lh].data(); + const __m128i mask1 = _mm_loadu_si128((const __m128i *)shuffle_mask1); + + const std::uint8_t *shuffle_mask2 = kShuffleTable[control_byte_uh].data(); + const __m128i mask2 = _mm_loadu_si128((const __m128i *)shuffle_mask2); + + data1 = _mm_shuffle_epi8(data1, mask1); + data2 = _mm_shuffle_epi8(data2, mask2); + + if constexpr (GapKind == DifferentialCodingKind::NONE) { + if constexpr (kNonStoppable) { + l(_mm_extract_epi64(data1, 0)); + l(_mm_extract_epi64(data1, 1)); + l(_mm_extract_epi64(data2, 0)); + l(_mm_extract_epi64(data2, 1)); + } else { + if (l(_mm_extract_epi64(data1, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data1, 1))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data2, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data2, 1))) [[unlikely]] { + return; + } + } + } else { + decode_gaps(data1); + } + } + + switch (_num_values % 4) { + case 1: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t control_byte_lh = control_byte & 0b1111; + + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte_lh].data(); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + data = _mm_shuffle_epi8(data, mask); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi64(data, 0)); + } else { + if (l(_mm_extract_epi64(data, 0))) [[unlikely]] { + return; + } + } + break; + } + case 2: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t control_byte_lh = control_byte & 0b1111; + + const std::uint8_t *shuffle_mask = kShuffleTable[control_byte_lh].data(); + const __m128i mask = _mm_loadu_si128((const __m128i *)shuffle_mask); + + __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); + data = _mm_shuffle_epi8(data, mask); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi64(data, 0)); + l(_mm_extract_epi64(data, 1)); + } else { + if (l(_mm_extract_epi64(data, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data, 1))) [[unlikely]] { + return; + } + } + break; + } + case 3: { + const std::uint8_t control_byte = _control_bytes_ptr[_num_control_bytes]; + const std::uint8_t control_byte_lh = control_byte & 0b1111; + const std::uint8_t control_byte_uh = control_byte >> 4; + + const std::uint8_t length1 = kLengthTable[control_byte_lh]; + __m128i data1 = _mm_loadu_si128((const __m128i *)_data_ptr); + + _data_ptr += length1; + __m128i data2 = _mm_loadu_si128((const __m128i *)_data_ptr); + + const std::uint8_t *shuffle_mask1 = kShuffleTable[control_byte_lh].data(); + const __m128i mask1 = _mm_loadu_si128((const __m128i *)shuffle_mask1); + + const std::uint8_t *shuffle_mask2 = kShuffleTable[control_byte_uh].data(); + const __m128i mask2 = _mm_loadu_si128((const __m128i *)shuffle_mask2); + + data1 = _mm_shuffle_epi8(data1, mask1); + data2 = _mm_shuffle_epi8(data2, mask2); + + if constexpr (kNonStoppable) { + l(_mm_extract_epi64(data1, 0)); + l(_mm_extract_epi64(data1, 1)); + l(_mm_extract_epi64(data2, 0)); + } else { + if (l(_mm_extract_epi64(data1, 0))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data1, 1))) [[unlikely]] { + return; + } + + if (l(_mm_extract_epi64(data2, 0))) [[unlikely]] { + return; + } + } + break; + } + } + } +#elif defined(__aarch64__) + template void decode64(Lambda &&l) { + static_assert("Unsupported streamvbyte configuration (ARM)."); + } +#endif + +private: + const std::size_t _num_control_bytes; + const std::uint8_t *_control_bytes_ptr; + + const std::size_t _num_values; + const std::uint8_t *_data_ptr; +}; + +} // namespace kaminpar::streamvbyte diff --git a/kaminpar-common/graph-compression/varint.h b/kaminpar-common/graph-compression/varint.h new file mode 100644 index 00000000..0a45a158 --- /dev/null +++ b/kaminpar-common/graph-compression/varint.h @@ -0,0 +1,511 @@ +/******************************************************************************* + * Encoding and decoding methods for VarInts. + * + * @file: varint.h + * @author: Daniel Salwasser + * @date: 11.11.2023 + ******************************************************************************/ +#pragma once + +#include +#include +#include +#include + +#ifdef KAMINPAR_COMPRESSION_FAST_DECODING +#include +#endif + +namespace kaminpar { + +/*! + * Returns the maximum number of bytes that a VarInt needs to be stored. + * + * @tparam Int The type of integer whose encoded maximum length is returned. + */ +template [[nodiscard]] constexpr std::size_t varint_max_length() { + return (sizeof(Int) * 8) / 7 + 1; +} + +/*! + * Returns the number of bytes a VarInt needs to be stored. + * + * @tparam Int The type of integer whose encoded length is returned. + * @param Int The integer to store. + * @return The number of bytes the integer needs to be stored. + */ +template [[nodiscard]] std::size_t varint_length(Int i) { + std::size_t len = 1; + + while (i > 0b01111111) { + i >>= 7; + len++; + } + + return len; +} + +/*! + * Writes an integer to a memory location as a VarInt. + * + * @tparam Int The type of integer to encode. + * @param Int The integer to store. + * @param ptr The pointer to the memory location to write the integer to. + * @return The number of bytes that the integer occupies at the memory location. + */ +template std::size_t varint_encode(Int i, std::uint8_t *ptr) { + std::size_t len = 1; + + while (i > 0b01111111) { + const std::uint8_t octet = (i & 0b01111111) | 0b10000000; + *ptr = octet; + + i >>= 7; + ptr += 1; + len += 1; + } + + const std::uint8_t last_octet = i & 0b01111111; + *ptr = last_octet; + + return len; +} + +/*! + * Writes an integer to a memory location as a VarInt. + * + * @tparam Int The type of integer to encode. + * @param Int The integer to store. + * @param ptr A pointer to the pointer to the memory location to write the integer to, which is + * incremented accordingly. + */ +template void varint_encode(Int i, std::uint8_t **ptr) { + while (i > 0b01111111) { + const std::uint8_t octet = (i & 0b01111111) | 0b10000000; + **ptr = octet; + + i >>= 7; + *ptr += 1; + } + + const std::uint8_t last_octet = i & 0b01111111; + **ptr = last_octet; + *ptr += 1; +} + +/*! + * Reads an integer encoded as a VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the memory location to read the integer from. + * @return The decoded integer. + */ +template [[nodiscard]] Int varint_decode(const std::uint8_t *data) { + Int value = 0; + + Int shift = 0; + while (true) { + const std::uint8_t byte = *data; + + if ((byte & 0b10000000) == 0) { + value |= static_cast(byte) << shift; + break; + } else { + value |= static_cast(byte & 0b01111111) << shift; + } + + shift += 7; + data += 1; + } + + return value; +} + +/*! + * Reads an integer encoded as a VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return The decoded integer. + */ +template [[nodiscard]] Int varint_decode_loop(const std::uint8_t **data) { + Int value = 0; + + Int shift = 0; + while (true) { + const std::uint8_t octet = **data; + *data += 1; + + if ((octet & 0b10000000) == 0) { + value |= static_cast(octet) << shift; + break; + } else { + value |= static_cast(octet & 0b01111111) << shift; + } + + shift += 7; + } + + return value; +} + +/*! + * Reads an integer encoded as a VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return The decoded integer. + */ +template [[nodiscard]] Int varint_decode_pext_unrolled(const std::uint8_t **data) { +#ifdef KAMINPAR_COMPRESSION_FAST_DECODING + if constexpr (sizeof(Int) == 4) { + const std::uint8_t *data_ptr = *data; + if ((data_ptr[0] & 0b10000000) == 0) { + const std::uint32_t result = *data_ptr & 0b01111111; + *data += 1; + return result; + } + + if ((data_ptr[1] & 0b10000000) == 0) { + const std::uint32_t result = + _pext_u32(*reinterpret_cast(data_ptr), 0x7F7F); + *data += 2; + return result; + } + + if ((data_ptr[2] & 0b10000000) == 0) { + const std::uint32_t result = + _pext_u32(*reinterpret_cast(data_ptr), 0x7F7F7F); + *data += 3; + return result; + } + + if ((data_ptr[3] & 0b10000000) == 0) { + const std::uint32_t result = + _pext_u32(*reinterpret_cast(data_ptr), 0x7F7F7F7F); + *data += 4; + return result; + } + + const std::uint32_t result = static_cast( + _pext_u64(*reinterpret_cast(data_ptr), 0x7F7F7F7F7F) + ); + *data += 5; + return result; + } else if constexpr (sizeof(Int) == 8) { + if ((ptr[0] & 0b10000000) == 0) { + const std::uint64_t result = *ptr & 0b01111111; + *data += 1; + return result; + } + + if ((ptr[1] & 0b10000000) == 0) { + const std::uint64_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F); + *data += 2; + return result; + } + + if ((ptr[2] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F); + *data += 3; + return result; + } + + if ((ptr[3] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F7F); + *data += 4; + return result; + } + + if ((ptr[4] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F); + *data += 5; + return result; + } + + if ((ptr[5] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F); + *data += 6; + return result; + } + + if ((ptr[6] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F); + *data += 7; + return result; + } + + if ((ptr[7] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F); + *data += 8; + return result; + } + + if ((ptr[8] & 0b10000000) == 0) { + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F) | + (static_cast(ptr[8] & 0b01111111) << 56); + *data += 9; + return result; + } + + const std::uint64_t result = + _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F) | + (static_cast(ptr[8] & 0b01111111) << 56) | + (static_cast(ptr[9]) << 63); + *data += 10; + return result; + } +#else + return varint_decode_loop(data); +#endif +} + +/*! + * Reads an integer encoded as a VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return The decoded integer. + */ +template [[nodiscard]] Int varint_decode_pext_branchless(const std::uint8_t **data) { +#ifdef KAMINPAR_COMPRESSION_FAST_DECODING + if constexpr (sizeof(Int) == 4) { + const std::uint8_t *data_ptr = *data; + + const std::uint64_t word = *reinterpret_cast(data_ptr); + const std::uint64_t continuation_bits = ~word & 0x8080808080; + const std::uint64_t mask = continuation_bits ^ (continuation_bits - 1); + const std::uint64_t length = (std::countr_zero(continuation_bits) + 1) / 8; + + const Int result = _pext_u64(word & mask, 0x7F7F7F7F7F); + *data += length; + return result; + } +#else + return varint_decode_loop(data); +#endif +} + +/*! + * Reads an integer encoded as a VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return The decoded integer. + */ +template [[nodiscard]] Int varint_decode(const std::uint8_t **data) { + return varint_decode_pext_unrolled(data); +} + +/*! + * Returns the number of bytes a marked VarInt needs to be stored. + * + * @tparam Int The type of integer whose encoded length is returned. + * @param Int The integer to store. + * @return The number of bytes the integer needs to be stored. + */ +template [[nodiscard]] std::size_t marked_varint_length(Int i) { + std::size_t len = 1; + i >>= 6; + + if (i > 0) { + len += varint_length(i); + } + + return len; +} + +/*! + * Writes an integer to a memory location as a marked VarInt. + * + * @tparam Int The type of integer to encode. + * @param Int The integer to store. + * @param marker_set Whether the integer is marked. + * @param ptr The pointer to the memory location to write the integer to. + */ +template void marked_varint_encode(Int i, const bool marked, std::uint8_t **ptr) { + std::uint8_t first_octet = i & 0b00111111; + if (marked) { + first_octet |= 0b01000000; + } + + i >>= 6; + + if (i == 0) { + **ptr = first_octet; + *ptr += 1; + return; + } + + first_octet |= 0b10000000; + **ptr = first_octet; + *ptr += 1; + + varint_encode(i, ptr); +} + +/*! + * Reads an integer encoded as a marked VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the memory location to read the integer from. + * @return A pair consisting of the decoded integer and whether the marker is set. + */ +template +[[nodiscard]] std::pair marked_varint_decode(const std::uint8_t *ptr) { + const std::uint8_t first_octet = *ptr; + ptr += 1; + + const bool is_continuation_bit_set = (first_octet & 0b10000000) != 0; + const bool is_marked = (first_octet & 0b01000000) != 0; + + Int result = first_octet & 0b00111111; + if (is_continuation_bit_set) { + Int shift = 6; + + while (true) { + const std::uint8_t octet = *ptr; + ptr += 1; + + if ((octet & 0b10000000) == 0) { + result |= static_cast(octet) << shift; + break; + } else { + result |= static_cast(octet & 0b01111111) << shift; + } + + shift += 7; + } + } + + return std::make_pair(result, is_marked); +} + +/*! + * Reads an integer encoded as a marked VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return A pair consisting of the decoded integer and whether the markes is set. + */ +template +[[nodiscard]] std::pair marked_varint_decode(const std::uint8_t **ptr) { + const std::uint8_t first_octet = **ptr; + *ptr += 1; + + const bool is_continuation_bit_set = (first_octet & 0b10000000) != 0; + const bool is_marked = (first_octet & 0b01000000) != 0; + + Int result = first_octet & 0b00111111; + if (is_continuation_bit_set) { + Int shift = 6; + + while (true) { + const std::uint8_t octet = **ptr; + *ptr += 1; + + if ((octet & 0b10000000) == 0) { + result |= static_cast(octet) << shift; + break; + } else { + result |= static_cast(octet & 0b01111111) << shift; + } + + shift += 7; + } + } + + return std::make_pair(result, is_marked); +} + +/*! + * Encodes a signed integer using zigzag encoding. + * + * @param i The signed integer to encode. + * @return The encoded integer. + */ +template [[nodiscard]] std::make_unsigned_t zigzag_encode(const Int i) { + return (i >> (sizeof(Int) * 8 - 1)) ^ (i << 1); +} + +/*! + * Decodes a zigzag encoded integer. + * + * @param i The zigzag encoded integer to decode. + * @return The decoded integer. + */ +template [[nodiscard]] std::make_signed_t zigzag_decode(const Int i) { + return (i >> 1) ^ -(i & 1); +} + +/*! + * Returns the number of bytes a signed VarInt needs to be stored. + * + * @tparam Int The type of integer whose encoded length is returned. + * @param Int The integer to store. + * @return The number of bytes the integer needs to be stored. + */ +template [[nodiscard]] std::size_t signed_varint_length(const Int i) { + return varint_length(zigzag_encode(i)); +} + +/*! + * Writes an integer to a memory location as a signed VarInt. + * + * @tparam Int The type of integer to encode. + * @param Int The integer to store. + * @param ptr The pointer to the memory location to write the integer to. + * @return The number of bytes that the integer occupies at the memory location. + */ +template std::size_t signed_varint_encode(const Int i, std::uint8_t *ptr) { + return varint_encode(zigzag_encode(i), ptr); +} + +/*! + * Writes an integer to a memory location as a signed VarInt. + * + * @tparam Int The type of integer to encode. + * @param Int The integer to store. + * @param ptr A pointer to the pointer to the memory location to write the integer to, which is + * incremented accordingly. + */ +template void signed_varint_encode(const Int i, std::uint8_t **ptr) { + varint_encode(zigzag_encode(i), ptr); +} + +/*! + * Reads an integer encoded as a signed VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the memory location to read the integer from. + * @return The decoded integer. + */ +template [[nodiscard]] Int signed_varint_decode(const std::uint8_t *data) { + return zigzag_decode(varint_decode>(data)); +} + +/*! + * Reads an integer encoded as a signed VarInt from a memory location. + * + * @tparam Int The type of integer to decode. + * @param ptr A pointer to the pointer to the memory location to read the integer from, which is + * incremented accordingly. + * @return The decoded integer. + */ +template [[nodiscard]] Int signed_varint_decode(const std::uint8_t **data) { + return zigzag_decode(varint_decode>(data)); +} + +} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/varint_codec.cc b/kaminpar-common/graph-compression/varint_codec.cc deleted file mode 100644 index 0905c592..00000000 --- a/kaminpar-common/graph-compression/varint_codec.cc +++ /dev/null @@ -1,32 +0,0 @@ -/******************************************************************************* - * Encoding and decoding methods for VarInts. - * - * @file: varint_codec.cc - * @author: Daniel Salwasser - * @date: 26.12.2023 - ******************************************************************************/ -#include "kaminpar-common/graph-compression/varint_codec.h" - -namespace kaminpar { - -namespace debug { - -static VarIntStats stats = {0, 0, 0, 0, 0, 0}; - -void varint_stats_reset() { - stats.varint_count = 0; - stats.signed_varint_count = 0; - stats.marked_varint_count = 0; - - stats.varint_bytes = 0; - stats.signed_varint_bytes = 0; - stats.marked_varint_bytes = 0; -} - -VarIntStats &varint_stats_global() { - return stats; -} - -} // namespace debug - -} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/varint_codec.h b/kaminpar-common/graph-compression/varint_codec.h deleted file mode 100644 index 98d279e1..00000000 --- a/kaminpar-common/graph-compression/varint_codec.h +++ /dev/null @@ -1,558 +0,0 @@ -/******************************************************************************* - * Encoding and decoding methods for VarInts. - * - * @file: varint_codec.h - * @author: Daniel Salwasser - * @date: 11.11.2023 - ******************************************************************************/ -#pragma once - -#include -#include -#include - -#ifdef KAMINPAR_COMPRESSION_FAST_DECODING -#include -#endif // KAMINPAR_COMPRESSION_FAST_DECODING - -namespace kaminpar { - -namespace debug { - -/*! - * Whether to track statistics on encoded VarInts. - */ -static constexpr bool kTrackVarintStats = false; - -/*! - * Statistics about encoded VarInts. - */ -struct VarIntStats { - std::size_t varint_count; - std::size_t signed_varint_count; - std::size_t marked_varint_count; - - std::size_t varint_bytes; - std::size_t signed_varint_bytes; - std::size_t marked_varint_bytes; -}; - -/*! - * Reset the global statistics on encoded VarInts. - */ -void varint_stats_reset(); - -/*! - * Returns a reference to the global statistics on encoded VarInts. - * - * @return A reference to the global statistics on encoded VarInts. - */ -VarIntStats &varint_stats_global(); - -} // namespace debug - -/*! - * Encodes a signed integer using zigzag encoding. - * - * @param i The signed integer to encode. - * @return The encoded integer. - */ -template [[nodiscard]] std::make_unsigned_t zigzag_encode(Int i) { - return (i >> (sizeof(Int) * 8 - 1)) ^ (i << 1); -} - -/*! - * Decodes a zigzag encoded integer. - * - * @param i The zigzag encoded integer to decode. - * @return The decoded integer. - */ -template [[nodiscard]] std::make_signed_t zigzag_decode(Int i) { - return (i >> 1) ^ -(i & 1); -} - -/*! - * Returns the maximum number of bytes that a VarInt needs to be stored. - * - * @tparam Int The type of integer whose encoded maximum length is returned. - */ -template [[nodiscard]] constexpr std::size_t varint_max_length() { - return (sizeof(Int) * 8) / 7 + 1; -} - -/*! - * Returns the number of bytes a VarInt needs to be stored. - * - * @tparam Int The type of integer whose encoded length is returned. - * @param Int The integer to store. - * @return The number of bytes the integer needs to be stored. - */ -template [[nodiscard]] std::size_t varint_length(Int i) { - std::size_t len = 1; - - while (i > 0b01111111) { - i >>= 7; - len++; - } - - return len; -} - -/*! - * Returns the number of bytes a signed VarInt needs to be stored. - * - * @tparam Int The type of integer whose encoded length is returned. - * @param Int The integer to store. - * @return The number of bytes the integer needs to be stored. - */ -template [[nodiscard]] std::size_t signed_varint_length(Int i) { - return varint_length(zigzag_encode(i)); -} - -/*! - * Returns the number of bytes a marked VarInt needs to be stored. - * - * @tparam Int The type of integer whose encoded length is returned. - * @param Int The integer to store. - * @return The number of bytes the integer needs to be stored. - */ -template [[nodiscard]] std::size_t marked_varint_length(Int i) { - std::size_t len = 1; - - i >>= 6; - if (i > 0) { - len += varint_length(i); - } - - return len; -} - -/*! - * Writes an integer to a memory location as a VarInt. - * - * @tparam Int The type of integer to encode. - * @param Int The integer to store. - * @param ptr The pointer to the memory location to write the integer to. - * @return The number of bytes that the integer occupies at the memory location. - */ -template std::size_t varint_encode(Int i, std::uint8_t *ptr) { - std::size_t len = 1; - - while (i > 0b01111111) { - std::uint8_t octet = (i & 0b01111111) | 0b10000000; - *ptr = octet; - - i >>= 7; - ptr++; - len++; - } - - std::uint8_t last_octet = i & 0b01111111; - *ptr = last_octet; - - if (debug::kTrackVarintStats) { - debug::varint_stats_global().varint_count++; - debug::varint_stats_global().varint_bytes += len; - } - - return len; -} - -/*! - * Writes an integer to a memory location as a signed VarInt. - * - * @tparam Int The type of integer to encode. - * @param Int The integer to store. - * @param ptr The pointer to the memory location to write the integer to. - * @return The number of bytes that the integer occupies at the memory location. - */ -template std::size_t signed_varint_encode(Int i, std::uint8_t *ptr) { - const std::size_t len = varint_encode(zigzag_encode(i), ptr); - - if (debug::kTrackVarintStats) { - debug::varint_stats_global().signed_varint_count++; - debug::varint_stats_global().signed_varint_bytes += len; - } - - return len; -} - -/*! - * Writes an integer to a memory location as a marked VarInt. - * - * @tparam Int The type of integer to encode. - * @param Int The integer to store. - * @param marker_set Whether the integer is marked. - * @param ptr The pointer to the memory location to write the integer to. - * @return The number of bytes that the integer occupies at the memory location. - */ -template -std::size_t marked_varint_encode(Int i, bool marker_set, std::uint8_t *ptr) { - std::uint8_t first_octet; - - if (marker_set) { - first_octet = (i & 0b00111111) | 0b01000000; - } else { - first_octet = (i & 0b00111111); - } - - i >>= 6; - - if (i > 0) { - first_octet |= 0b10000000; - *ptr = first_octet; - - std::size_t len = varint_encode(i, ptr + 1) + 1; - - if (debug::kTrackVarintStats) { - debug::varint_stats_global().marked_varint_count++; - debug::varint_stats_global().marked_varint_bytes += len; - } - - return len; - } - - if (debug::kTrackVarintStats) { - debug::varint_stats_global().marked_varint_count++; - debug::varint_stats_global().marked_varint_bytes++; - } - - *ptr = first_octet; - return 1; -} - -/*! - * Reads an integer encoded as a VarInt from a memory location. The decoding is implemented as a - * loop with non intrinsic operations. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template -[[nodiscard]] std::pair varint_decode_general(const std::uint8_t *ptr) { - Int result = 0; - std::size_t shift = 0; - std::size_t position = 0; - - while (true) { - const std::uint8_t byte = ptr[position++]; - - if ((byte & 0b10000000) == 0) { - result |= static_cast(byte) << shift; - break; - } else { - result |= static_cast(byte & 0b01111111) << shift; - } - - shift += 7; - } - - return std::make_pair(result, position); -} - -/*! - * Reads an integer encoded as a VarInt from a memory location. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template -[[nodiscard]] std::pair varint_decode(const std::uint8_t *ptr) { - return varint_decode_general(ptr); -} - -#ifdef KAMINPAR_COMPRESSION_FAST_DECODING -/*! - * Reads a 32-bit integer encoded as a VarInt from a memory location. The decoding is implemented - * as an unrolled loop with intrinsic operations. - * - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template <> -inline std::pair varint_decode(const std::uint8_t *ptr) { - if ((ptr[0] & 0b10000000) == 0) { - const std::uint32_t result = *ptr & 0b01111111; - return std::make_pair(result, 1); - } - - if ((ptr[1] & 0b10000000) == 0) { - const std::uint32_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F); - return std::make_pair(result, 2); - } - - if ((ptr[2] & 0b10000000) == 0) { - const std::uint32_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F); - return std::make_pair(result, 3); - } - - if ((ptr[3] & 0b10000000) == 0) { - const std::uint32_t result = - _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F7F); - return std::make_pair(result, 4); - } - - const std::uint32_t result = static_cast( - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F) - ); - return std::make_pair(result, 5); -} - -/*! - * Reads a 64-bit integer encoded as a VarInt from a memory location. The decoding is implemented - * as an unrolled loop with intrinsic operations. - * - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template <> -inline std::pair varint_decode(const std::uint8_t *ptr) { - if ((ptr[0] & 0b10000000) == 0) { - const std::uint64_t result = *ptr & 0b01111111; - return std::make_pair(result, 1); - } - - if ((ptr[1] & 0b10000000) == 0) { - const std::uint64_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F); - return std::make_pair(result, 2); - } - - if ((ptr[2] & 0b10000000) == 0) { - const std::uint64_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F); - return std::make_pair(result, 3); - } - - if ((ptr[3] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F7F); - return std::make_pair(result, 4); - } - - if ((ptr[4] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F); - return std::make_pair(result, 5); - } - - if ((ptr[5] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F); - return std::make_pair(result, 6); - } - - if ((ptr[6] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F); - return std::make_pair(result, 7); - } - - if ((ptr[7] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F); - return std::make_pair(result, 8); - } - - if ((ptr[8] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F) | - (static_cast(ptr[8] & 0b01111111) << 56); - return std::make_pair(result, 9); - } - - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F7F) | - (static_cast(ptr[8] & 0b01111111) << 56) | - (static_cast(ptr[9]) << 63); - return std::make_pair(result, 10); -} -#endif - -/*! - * Reads an integer encoded as a signed VarInt from a memory location. The decoding is implemented - * as a loop with non intrinsic operations. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template -[[nodiscard]] std::pair signed_varint_decode_general(const std::uint8_t *ptr) { - const auto [unsigned_value, len] = varint_decode_general>(ptr); - return std::make_pair(zigzag_decode(unsigned_value), len); -} - -/*! - * Reads an integer encoded as a signed VarInt from a memory location. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A pair consisting of the decoded integer and the number of bytes that the encoded integer - * occupied at the memory location. - */ -template -[[nodiscard]] std::pair signed_varint_decode(const std::uint8_t *ptr) { - const auto [unsigned_value, len] = varint_decode>(ptr); - return std::make_pair(zigzag_decode(unsigned_value), len); -} - -/*! - * Reads an integer encoded as a marked VarInt from a memory location. The decoding is implemented - * as a loop with non intrinsic operations. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A tuple consisting of the decoded integer, whether the markes is set and the number of - * bytes that the encoded integer occupied at the memory location. - */ -template -[[nodiscard]] std::tuple marked_varint_decode(const std::uint8_t *ptr) { - const std::uint8_t first_byte = *ptr; - const bool is_continuation_bit_set = (first_byte & 0b10000000) != 0; - const bool is_marker_set = (first_byte & 0b01000000) != 0; - - Int result = first_byte & 0b00111111; - std::size_t shift = 0; - std::size_t position = 1; - - if (is_continuation_bit_set) { - while (true) { - const std::uint8_t byte = ptr[position++]; - - if ((byte & 0b10000000) == 0) { - result |= static_cast(byte) << (shift + 6); - break; - } else { - result |= static_cast(byte & 0b01111111) << (shift + 6); - } - - shift += 7; - } - } - - return std::make_tuple(result, is_marker_set, position); -} - -#ifdef KAMINPAR_COMPRESSION_FAST_DECODING -/*! - * Reads a 32-bit integer encoded as a marked VarInt from a memory location. The decoding is - * implemented as an unrolled loop with intrinsic operations. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A tuple consisting of the decoded integer, whether the markes is set and the number of - * bytes that the encoded integer occupied at the memory location. - */ -template <> -inline std::tuple -marked_varint_decode(const std::uint8_t *ptr) { - const bool is_marker_set = (*ptr & 0b01000000) != 0; - - if ((ptr[0] & 0b10000000) == 0) { - const std::uint32_t result = *ptr & 0b00111111; - return std::make_tuple(result, is_marker_set, 1); - } - - if ((ptr[1] & 0b10000000) == 0) { - const std::uint32_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F3F); - return std::make_tuple(result, is_marker_set, 2); - } - - if ((ptr[2] & 0b10000000) == 0) { - const std::uint32_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F3F); - return std::make_tuple(result, is_marker_set, 3); - } - - if ((ptr[3] & 0b10000000) == 0) { - const std::uint32_t result = - _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F3F); - return std::make_tuple(result, is_marker_set, 4); - } - - const std::uint32_t result = static_cast( - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F3F) - ); - return std::make_tuple(result, is_marker_set, 5); -} - -/*! - * Reads a 64-bit integer encoded as a marked VarInt from a memory location. The decoding is - * implemented as an unrolled loop with intrinsic operations. - * - * @tparam Int The type of integer to decode. - * @param ptr The pointer to the memory location to read the integer from. - * @return A tuple consisting of the decoded integer, whether the markes is set and the number of - * bytes that the encoded integer occupied at the memory location. - */ -template <> -inline std::tuple -marked_varint_decode(const std::uint8_t *ptr) { - const bool is_marker_set = (*ptr & 0b01000000) != 0; - - if ((ptr[0] & 0b10000000) == 0) { - const std::uint64_t result = *ptr & 0b00111111; - return std::make_tuple(result, is_marker_set, 1); - } - - if ((ptr[1] & 0b10000000) == 0) { - const std::uint64_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F3F); - return std::make_tuple(result, is_marker_set, 2); - } - - if ((ptr[2] & 0b10000000) == 0) { - const std::uint64_t result = _pext_u32(*reinterpret_cast(ptr), 0x7F7F3F); - return std::make_tuple(result, is_marker_set, 3); - } - - if ((ptr[3] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u32(*reinterpret_cast(ptr), 0x7F7F7F3F); - return std::make_tuple(result, is_marker_set, 4); - } - - if ((ptr[4] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F3F); - return std::make_tuple(result, is_marker_set, 5); - } - - if ((ptr[5] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F3F); - return std::make_tuple(result, is_marker_set, 6); - } - - if ((ptr[6] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F3F); - return std::make_tuple(result, is_marker_set, 7); - } - - if ((ptr[7] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F3F); - return std::make_tuple(result, is_marker_set, 8); - } - - if ((ptr[8] & 0b10000000) == 0) { - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F3F) | - (static_cast(ptr[8] & 0b01111111) << 55); - return std::make_tuple(result, is_marker_set, 9); - } - - const std::uint64_t result = - _pext_u64(*reinterpret_cast(ptr), 0x7F7F7F7F7F7F7F3F) | - (static_cast(ptr[8] & 0b01111111) << 55) | - (static_cast(ptr[9]) << 62); - return std::make_tuple(result, is_marker_set, 10); -} -#endif - -} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/varint_run_length_codec.h b/kaminpar-common/graph-compression/varint_rle.h similarity index 75% rename from kaminpar-common/graph-compression/varint_run_length_codec.h rename to kaminpar-common/graph-compression/varint_rle.h index 17c7b84b..ffbb41ce 100644 --- a/kaminpar-common/graph-compression/varint_run_length_codec.h +++ b/kaminpar-common/graph-compression/varint_rle.h @@ -1,7 +1,7 @@ /******************************************************************************* * Encoding and decoding methods for run-length VarInts. * - * @file: varint_run_length_codec.h + * @file: varint_rle.h * @author: Daniel Salwasser * @date: 29.12.2023 ******************************************************************************/ @@ -12,6 +12,8 @@ #include #include +#include "kaminpar-common/math.h" + namespace kaminpar { /*! @@ -40,13 +42,13 @@ template class VarIntRunLengthEncoder { * includes the control byte if it is the first integer of a block. */ std::size_t add(Int i) { - std::uint8_t size = needed_bytes(i); + std::uint8_t size = math::byte_width(i); if (_buffer.empty()) { - _buffered_size = size++; - } else if (_buffer.size() == kBufferSize || _buffered_size != size) { + _num_buffered = size++; + } else if (_buffer.size() == kBufferSize || _num_buffered != size) { flush(); - _buffered_size = size++; + _num_buffered = size++; } _buffer.push_back(i); @@ -64,17 +66,17 @@ template class VarIntRunLengthEncoder { const std::uint8_t *begin = _ptr; if constexpr (sizeof(Int) == 4) { - const std::uint8_t header = (static_cast(_buffer.size() - 1) << 2) | - ((_buffered_size - 1) & 0b00000011); + const std::uint8_t header = + (static_cast(_buffer.size() - 1) << 2) | ((_num_buffered - 1) & 0b00000011); *_ptr++ = header; } else if constexpr (sizeof(Int) == 8) { - const std::uint8_t header = (static_cast(_buffer.size() - 1) << 3) | - ((_buffered_size - 1) & 0b00000111); + const std::uint8_t header = + (static_cast(_buffer.size() - 1) << 3) | ((_num_buffered - 1) & 0b00000111); *_ptr++ = header; } for (Int value : _buffer) { - for (std::uint8_t i = 0; i < _buffered_size; ++i) { + for (std::uint8_t i = 0; i < _num_buffered; ++i) { *_ptr++ = static_cast(value); value >>= 8; } @@ -86,19 +88,8 @@ template class VarIntRunLengthEncoder { private: std::uint8_t *_ptr; - std::uint8_t _buffered_size; + std::uint8_t _num_buffered; std::vector _buffer; - - std::uint8_t needed_bytes(Int i) const { - std::size_t len = 1; - - while (i > 0b11111111) { - i >>= 8; - len++; - } - - return len; - } }; /*! @@ -113,12 +104,12 @@ template class VarIntRunLengthDecoder { /*! * Constructs a new VarIntRunLengthDecoder. * + * @param num_values The number of integers that are encoded. * @param ptr The pointer to the memory location where the encoded integers are stored. - * @param count The number of integers that are encoded. */ - VarIntRunLengthDecoder(const std::uint8_t *ptr, const std::size_t count) - : _ptr(ptr), - _count(count) {} + VarIntRunLengthDecoder(const std::size_t num_values, const std::uint8_t *ptr) + : _num_values(num_values), + _ptr(ptr) {} /*! * Decodes the encoded integers. @@ -127,19 +118,19 @@ template class VarIntRunLengthDecoder { * parameter of type Int. */ template void decode(Lambda &&l) { - constexpr bool non_stoppable = std::is_void_v>; + constexpr bool kNonStoppable = std::is_void_v>; - std::size_t decoded = 0; - while (decoded < _count) { + std::size_t num_decoded = 0; + while (num_decoded < _num_values) { const std::uint8_t run_header = *_ptr++; if constexpr (sizeof(Int) == 4) { - const std::uint8_t run_length = (run_header >> 2) + 1; - const std::uint8_t run_size = (run_header & 0b00000011) + 1; + const std::size_t run_length = (run_header >> 2) + 1; + const std::size_t run_size = (run_header & 0b00000011) + 1; - decoded += run_length; + num_decoded += run_length; - if constexpr (non_stoppable) { + if constexpr (kNonStoppable) { decode32(run_length, run_size, std::forward(l)); } else { const bool stop = decode32(run_length, run_size, std::forward(l)); @@ -148,12 +139,12 @@ template class VarIntRunLengthDecoder { } } } else if constexpr (sizeof(Int) == 8) { - const std::uint8_t run_length = (run_header >> 3) + 1; - const std::uint8_t run_size = (run_header & 0b00000111) + 1; + const std::size_t run_length = (run_header >> 3) + 1; + const std::size_t run_size = (run_header & 0b00000111) + 1; - decoded += run_length; + num_decoded += run_length; - if constexpr (non_stoppable) { + if constexpr (kNonStoppable) { decode64(run_length, run_size, std::forward(l)); } else { const bool stop = decode64(run_length, run_size, std::forward(l)); @@ -166,16 +157,13 @@ template class VarIntRunLengthDecoder { } private: - const std::uint8_t *_ptr; - const std::size_t _count; - template - bool decode32(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { + bool decode32(const std::size_t run_length, const std::size_t run_size, Lambda &&l) { constexpr bool kNonStoppable = std::is_void_v>; switch (run_size) { case 1: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint32_t value = static_cast(*_ptr); _ptr += 1; @@ -190,7 +178,7 @@ template class VarIntRunLengthDecoder { } break; case 2: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint32_t value = *((std::uint16_t *)_ptr); _ptr += 2; @@ -205,7 +193,7 @@ template class VarIntRunLengthDecoder { } break; case 3: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint32_t value = *((std::uint32_t *)_ptr) & 0xFFFFFF; _ptr += 3; @@ -220,7 +208,7 @@ template class VarIntRunLengthDecoder { } break; case 4: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint32_t value = *((std::uint32_t *)_ptr); _ptr += 4; @@ -242,12 +230,12 @@ template class VarIntRunLengthDecoder { } template - bool decode64(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { + bool decode64(const std::size_t run_length, const std::size_t run_size, Lambda &&l) { constexpr bool kNonStoppable = std::is_void_v>; switch (run_size) { case 1: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = static_cast(*_ptr); _ptr += 1; @@ -262,7 +250,7 @@ template class VarIntRunLengthDecoder { } break; case 2: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint16_t *)_ptr); _ptr += 2; @@ -277,7 +265,7 @@ template class VarIntRunLengthDecoder { } break; case 3: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint32_t *)_ptr) & 0xFFFFFF; _ptr += 3; @@ -292,7 +280,7 @@ template class VarIntRunLengthDecoder { } break; case 4: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint32_t *)_ptr); _ptr += 4; @@ -307,7 +295,7 @@ template class VarIntRunLengthDecoder { } break; case 5: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint64_t *)_ptr) & 0xFFFFFFFFFF; _ptr += 5; @@ -322,7 +310,7 @@ template class VarIntRunLengthDecoder { } break; case 6: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint64_t *)_ptr) & 0xFFFFFFFFFFFF; _ptr += 6; @@ -337,7 +325,7 @@ template class VarIntRunLengthDecoder { } break; case 7: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint64_t *)_ptr) & 0xFFFFFFFFFFFFFF; _ptr += 7; @@ -352,7 +340,7 @@ template class VarIntRunLengthDecoder { } break; case 8: - for (std::uint8_t i = 0; i < run_length; ++i) { + for (std::size_t i = 0; i < run_length; ++i) { const std::uint64_t value = *((std::uint64_t *)_ptr); _ptr += 8; @@ -372,6 +360,10 @@ template class VarIntRunLengthDecoder { return false; } + +private: + const std::size_t _num_values; + const std::uint8_t *_ptr; }; }; // namespace kaminpar diff --git a/kaminpar-common/graph-compression/varint_stream_codec.h b/kaminpar-common/graph-compression/varint_stream_codec.h deleted file mode 100644 index b38639d4..00000000 --- a/kaminpar-common/graph-compression/varint_stream_codec.h +++ /dev/null @@ -1,440 +0,0 @@ -/******************************************************************************* - * Encoding and decoding methods for the StreamVByte codec. - * - * @file: varint_stream_codec.h - * @author: Daniel Salwasser - * @date: 29.12.2023 - ******************************************************************************/ -#pragma once - -#include -#include -#include - -#if defined(__x86_64__) -#include -#elif defined(__aarch64__) -#include -#endif - -#include "kaminpar-common/constexpr_utils.h" - -namespace kaminpar { - -/*! - * An encoder for writing variable length integers with the StreamVByte codec. - * - * @tparam Int The type of integer to encode. - */ -template class VarIntStreamEncoder { - static_assert(sizeof(Int) == 4); - -public: - /*! - * Constructs a new VarIntStreamEncoder. - * - * @param ptr The pointer to the memory location where the encoded integers are written. - * @param count The amount of integers to encode. - */ - VarIntStreamEncoder(std::uint8_t *ptr, std::size_t count) - : _control_bytes_ptr(ptr), - _data_ptr(ptr + count / 4 + ((count % 4) != 0)), - _count(count), - _buffered(0) {} - - /*! - * Encodes an integer. - * - * @param i The integer to encode. - * @return The number of bytes that the integer requires to be stored in encoded format. It - * includes the control byte if it is the last integer of a block. - */ - std::size_t add(Int i) { - if (_buffered == 3) { - _buffer[3] = i; - write_stream(); - - _buffered = 0; - return needed_bytes(i); - } - - _buffer[_buffered] = i; - return needed_bytes(i) + (_buffered++ == 0); - } - - /*! - * Writes the remaining integers added to the encoder which do not form a complete block to - * memory. - */ - void flush() { - if (_buffered == 0) { - return; - } - - const std::uint8_t control_byte = - ((needed_bytes(_buffer[3]) - 1) << 6) | (((needed_bytes(_buffer[2]) - 1) & 0b11) << 4) | - (((needed_bytes(_buffer[1]) - 1) & 0b11) << 2) | ((needed_bytes(_buffer[0]) - 1) & 0b11); - *_control_bytes_ptr++ = control_byte; - - for (std::size_t i = 0; i < _buffered; ++i) { - Int value = _buffer[i]; - do { - *_data_ptr++ = static_cast(value); - value >>= 8; - } while (value > 0); - } - } - -private: - std::uint8_t *_control_bytes_ptr; - std::uint8_t *_data_ptr; - const std::size_t _count; - - std::size_t _buffered; - std::array _buffer; - - void write_stream() { - const std::uint8_t control_byte = - ((needed_bytes(_buffer[3]) - 1) << 6) | (((needed_bytes(_buffer[2]) - 1) & 0b11) << 4) | - (((needed_bytes(_buffer[1]) - 1) & 0b11) << 2) | ((needed_bytes(_buffer[0]) - 1) & 0b11); - *_control_bytes_ptr++ = control_byte; - - for (Int value : _buffer) { - do { - *_data_ptr++ = static_cast(value); - value >>= 8; - } while (value > 0); - } - } - - std::uint8_t needed_bytes(Int i) const { - std::size_t len = 1; - - while (i > 0b11111111) { - i >>= 8; - len++; - } - - return len; - } -}; - -/*! - * A decoder for reading variable length integers stored with the StreamVByte codec. - * - * @tparam Int The type of integer to decode. - */ -template class VarIntStreamDecoder { - static_assert(sizeof(Int) == 4); - - static constexpr std::array create_length_table() { - std::array length_table{}; - - constexpr_for<256>([&](const std::uint8_t control_byte) { - length_table[control_byte] = 0; - - constexpr_for<4>([&](const std::uint8_t i) { - const std::uint8_t length = ((control_byte >> (2 * i)) & 0b11) + 1; - length_table[control_byte] += length; - }); - }); - - return length_table; - } - - static constexpr std::array, 256> create_shuffle_table() { - std::array, 256> shuffle_table{}; - - constexpr_for<256>([&](const std::uint8_t control_byte) { - std::uint8_t byte = 0; - std::uint8_t pos = 0; - - constexpr_for<4>([&](const std::uint8_t i) { - std::uint8_t c = (control_byte >> (2 * i)) & 0b11; - - std::uint8_t j = 0; - while (j <= c) { - shuffle_table[control_byte][pos++] = byte++; - j += 1; - } - - while (j < 4) { - shuffle_table[control_byte][pos++] = 0b11111111; - j += 1; - } - }); - }); - - return shuffle_table; - } - - static constexpr const std::array kLengthTable = create_length_table(); - - static constexpr const std::array, 256> kShuffleTable = - create_shuffle_table(); - -public: - /*! - * Constructs a new VarIntStreamDecoder. - * - * @param ptr The pointer to the memory location where the encoded integers are stored. - * @param count The amount of integers that are stored at the memory location. - */ - VarIntStreamDecoder(const std::uint8_t *ptr, const std::size_t count) - : _control_bytes_ptr(ptr), - _control_bytes(count / 4), - _data_ptr(ptr + _control_bytes + ((count % 4) != 0)), - _count(count) {} - - /*! - * Decodes the encoded integers. - * - * @param l The function to be called with the decoded integers, i.e. the function has one - * parameter of type Int. - */ -#if defined(__x86_64__) - template void decode(Lambda &&l) { - constexpr bool kNonStoppable = std::is_void_v>; - - for (std::size_t i = 0; i < _control_bytes; ++i) { - const std::uint8_t control_byte = _control_bytes_ptr[i]; - const std::uint8_t length = kLengthTable[control_byte]; - - __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - _data_ptr += length; - - const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - - if constexpr (kNonStoppable) { - l(_mm_extract_epi32(data, 0)); - l(_mm_extract_epi32(data, 1)); - l(_mm_extract_epi32(data, 2)); - l(_mm_extract_epi32(data, 3)); - } else { - if (l(_mm_extract_epi32(data, 0))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 1))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 2))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 3))) [[unlikely]] { - return; - } - } - } - - switch (_count % 4) { - case 1: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - - __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - - if constexpr (kNonStoppable) { - l(_mm_extract_epi32(data, 0)); - } else { - if (l(_mm_extract_epi32(data, 0))) [[unlikely]] { - return; - } - } - break; - } - case 2: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - - __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - - if constexpr (kNonStoppable) { - l(_mm_extract_epi32(data, 0)); - l(_mm_extract_epi32(data, 1)); - } else { - if (l(_mm_extract_epi32(data, 0))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 1))) [[unlikely]] { - return; - } - } - break; - } - case 3: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - - __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - - if constexpr (kNonStoppable) { - l(_mm_extract_epi32(data, 0)); - l(_mm_extract_epi32(data, 1)); - l(_mm_extract_epi32(data, 2)); - } else { - if (l(_mm_extract_epi32(data, 0))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 1))) [[unlikely]] { - return; - } - - if (l(_mm_extract_epi32(data, 2))) [[unlikely]] { - return; - } - } - break; - } - } - } -#elif defined(__aarch64__) - template void decode(Lambda &&l) { - constexpr bool kNonStoppable = std::is_void_v>; - - for (std::size_t i = 0; i < _control_bytes; ++i) { - const std::uint8_t control_byte = _control_bytes_ptr[i]; - const std::uint8_t length = kLengthTable[control_byte]; - - //__m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - uint8x16_t data = vld1q_u8(_data_ptr); - _data_ptr += length; - - // const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - // data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); - data = vqtbl1q_u8(data, shuffle_mask); - - std::array out; - vst1q_u8(reinterpret_cast(out.data()), data); - - if constexpr (kNonStoppable) { - l(out[0]); - l(out[1]); - l(out[2]); - l(out[3]); - } else { - if (l(out[0])) [[unlikely]] { - return; - } - - if (l(out[1])) [[unlikely]] { - return; - } - - if (l(out[2])) [[unlikely]] { - return; - } - - if (l(out[3])) [[unlikely]] { - return; - } - } - } - - switch (_count % 4) { - case 1: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - // const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); - - // __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - // data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - uint8x16_t data = vld1q_u8(_data_ptr); - data = vqtbl1q_u8(data, shuffle_mask); - - std::array out; - vst1q_u8(reinterpret_cast(out.data()), data); - - if constexpr (kNonStoppable) { - l(out[0]); - } else { - if (l(out[0])) [[unlikely]] { - return; - } - } - break; - } - case 2: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - // const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); - - // __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - // data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - uint8x16_t data = vld1q_u8(_data_ptr); - data = vqtbl1q_u8(data, shuffle_mask); - - std::array out; - vst1q_u8(reinterpret_cast(out.data()), data); - - if constexpr (kNonStoppable) { - l(out[0]); - l(out[1]); - } else { - if (l(out[0])) [[unlikely]] { - return; - } - - if (l(out[1])) [[unlikely]] { - return; - } - } - break; - } - case 3: { - const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; - // const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); - const uint8x16_t shuffle_mask = vld1q_u8(kShuffleTable[control_byte].data()); - - // __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); - // data = _mm_shuffle_epi8(data, *(const __m128i *)shuffle_mask); - uint8x16_t data = vld1q_u8(_data_ptr); - data = vqtbl1q_u8(data, shuffle_mask); - - std::array out; - vst1q_u8(reinterpret_cast(out.data()), data); - - if constexpr (kNonStoppable) { - l(out[0]); - l(out[1]); - l(out[2]); - } else { - if (l(out[0])) [[unlikely]] { - return; - } - - if (l(out[1])) [[unlikely]] { - return; - } - - if (l(out[2])) [[unlikely]] { - return; - } - } - break; - } - } - } -#else - template void decode(Lambda &&l) { - throw std::runtime_error("not implemented"); - } -#endif - -private: - const std::uint8_t *_control_bytes_ptr; - const std::size_t _control_bytes; - const std::uint8_t *_data_ptr; - const std::size_t _count; -}; - -} // namespace kaminpar diff --git a/kaminpar-common/math.h b/kaminpar-common/math.h index 00171ffd..7b73e8a5 100644 --- a/kaminpar-common/math.h +++ b/kaminpar-common/math.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include #include @@ -18,6 +19,12 @@ namespace kaminpar::math { +template constexpr Int kSetMSB = static_cast(1) << (sizeof(Int) * 8 - 1); + +template [[nodiscard]] constexpr bool is_msb_set(const Int x) { + return (x & kSetMSB) != 0; +} + /*! * Divides two integers with ceil rounding. * @@ -55,6 +62,12 @@ template constexpr Int1 div_ceil(const Int1 x, co return x / y + (x % y != 0); } +template +[[nodiscard]] constexpr Int1 mod_ceil(const Int1 x, const Int2 y) { + const Int1 mod = x % y; + return mod == 0 ? y : mod; +} + template bool is_square(const Int value) { const Int sqrt = std::sqrt(value); return sqrt * sqrt == value; @@ -101,7 +114,7 @@ template constexpr Int byte_width(const Int i) { return 1; } - const Int bit_width = 1 + floor_log2(i); + const Int bit_width = std::bit_width(i); return div_ceil(bit_width, 8); } diff --git a/kaminpar-common/parallel/aligned_element.h b/kaminpar-common/parallel/aligned_element.h index 92c871e2..9da9fd0c 100644 --- a/kaminpar-common/parallel/aligned_element.h +++ b/kaminpar-common/parallel/aligned_element.h @@ -7,8 +7,7 @@ #pragma once #include - -#include "kaminpar-common/ranges.h" +#include namespace kaminpar::parallel { @@ -38,16 +37,19 @@ template struct alignas(64) Aligned { }; template struct alignas(64) AlignedVec { + using value_type = typename Vector::value_type; + using size_type = typename Vector::size_type; + Vector vec; AlignedVec() : vec() {} AlignedVec(Vector vec) : vec(std::move(vec)) {} - decltype(auto) operator[](std::size_t pos) { + decltype(auto) operator[](size_type pos) { return vec[pos]; } - decltype(auto) operator[](std::size_t pos) const { + decltype(auto) operator[](size_type pos) const { return vec[pos]; } @@ -67,20 +69,20 @@ template struct alignas(64) AlignedVec { return vec.end(); } + decltype(auto) size() const { + return vec.size(); + } + void clear() noexcept { vec.clear(); } - void resize(std::size_t count) { + void resize(size_type count) { vec.resize(count); } - [[nodiscard]] decltype(auto) entries() const { - return TransformedIotaRange( - static_cast(0), - vec.size(), - [this](const std::size_t pos) { return std::make_pair(pos, vec[pos]); } - ); + void resize(size_type count, const value_type &value) { + vec.resize(count, value); } }; diff --git a/kaminpar-common/ranges.h b/kaminpar-common/ranges.h index 50e3a41d..7ab848b2 100644 --- a/kaminpar-common/ranges.h +++ b/kaminpar-common/ranges.h @@ -66,6 +66,8 @@ template class IotaRange { }; template class TransformedIotaRange { + using Self = TransformedIotaRange; + public: class iterator { public: @@ -123,6 +125,10 @@ template class TransformedIotaRange { return _end; } + const Self &entries() const { + return *this; + } + private: iterator _begin; iterator _end; diff --git a/kaminpar-dist/context_io.cc b/kaminpar-dist/context_io.cc index 8c82f9ff..374d2946 100644 --- a/kaminpar-dist/context_io.cc +++ b/kaminpar-dist/context_io.cc @@ -353,7 +353,7 @@ void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostre << (ctx.scale_chunks_with_threads ? std::string(" / ") + std::to_string(parallel.num_threads) : "") - << "]\n"; + << ")]\n"; } else { out << " Number of chunks: " << ctx.fixed_num_chunks << "\n"; } @@ -380,8 +380,8 @@ void print( out << "Enabled: " << (ctx.enabled ? "yes" : "no") << "\n"; if (ctx.enabled) { out << "Compression Scheme: Gap Encoding + "; - if constexpr (Compression::kStreamEncoding) { - out << "VarInt Stream Encoding\n"; + if constexpr (Compression::kStreamVByteEncoding) { + out << "StreamVByte Encoding\n"; } else if constexpr (Compression::kRunLengthEncoding) { out << "VarInt Run-Length Encoding\n"; } else { @@ -399,9 +399,6 @@ void print( out << " Length Threshold: " << Compression::kIntervalLengthTreshold << "\n"; } - out << " Isolated Nodes Separation: " << yeyornay(Compression::kIsolatedNodesSeparation) - << "\n"; - out << "Compression ratio: [Min=" << round(ctx.min_compression_ratio) << " | Mean=" << round(ctx.avg_compression_ratio) << " | Max=" << round(ctx.max_compression_ratio) << "]" diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index 854c1052..9c7370df 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -283,7 +283,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - _compressed_neighborhoods.decode(u, [&](const EdgeID, const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(v, w); } else { @@ -297,7 +297,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - _compressed_neighborhoods.decode(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(e, v, w); } else { @@ -313,7 +313,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); _compressed_neighborhoods - .decode(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + .neighbors(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(e, v, w); } else { diff --git a/kaminpar-dist/datastructures/ghost_node_mapper.h b/kaminpar-dist/datastructures/ghost_node_mapper.h index 31e07bb2..a4e0177b 100644 --- a/kaminpar-dist/datastructures/ghost_node_mapper.h +++ b/kaminpar-dist/datastructures/ghost_node_mapper.h @@ -34,18 +34,45 @@ class CompactGhostNodeMapping { ) : _num_nodes(num_nodes), _num_ghost_nodes(num_ghost_nodes), + _use_dense_global_to_ghost(true), _global_to_ghost_bitmap(std::move(global_to_ghost_bitmap)), _dense_global_to_ghost(std::move(dense_global_to_ghost)), _ghost_to_global(std::move(ghost_to_global)), _ghost_owner(std::move(ghost_owner)) {} + explicit CompactGhostNodeMapping( + const NodeID num_nodes, + const NodeID num_ghost_nodes, + growt::StaticGhostNodeMapping sparse_global_to_ghost, + CompactStaticArray ghost_to_global, + CompactStaticArray ghost_owner + ) + : _num_nodes(num_nodes), + _num_ghost_nodes(num_ghost_nodes), + _use_dense_global_to_ghost(false), + _sparse_global_to_ghost(std::move(sparse_global_to_ghost)), + _ghost_to_global(std::move(ghost_to_global)), + _ghost_owner(std::move(ghost_owner)) {} + + [[nodiscard]] NodeID num_ghost_nodes() const { + return _num_ghost_nodes; + } + [[nodiscard]] bool contains_global_as_ghost(const GlobalNodeID global_node) const { - return _global_to_ghost_bitmap.is_set(global_node); + if (_use_dense_global_to_ghost) [[likely]] { + return _global_to_ghost_bitmap.is_set(global_node); + } else { + return _sparse_global_to_ghost.find(global_node + 1) != _sparse_global_to_ghost.end(); + } } [[nodiscard]] NodeID global_to_ghost(const GlobalNodeID global_node) const { - const NodeID dense_index = _global_to_ghost_bitmap.rank(global_node); - return _dense_global_to_ghost[dense_index] + _num_nodes; + if (_use_dense_global_to_ghost) [[likely]] { + const NodeID dense_index = _global_to_ghost_bitmap.rank(global_node); + return _dense_global_to_ghost[dense_index] + _num_nodes; + } else { + return (*_sparse_global_to_ghost.find(global_node + 1)).second; + } } [[nodiscard]] GlobalNodeID ghost_to_global(const NodeID ghost_node) const { @@ -56,15 +83,16 @@ class CompactGhostNodeMapping { return _ghost_owner[ghost_node]; } - [[nodiscard]] NodeID num_ghost_nodes() const { - return _num_ghost_nodes; - } - private: NodeID _num_nodes; NodeID _num_ghost_nodes; + + bool _use_dense_global_to_ghost; + growt::StaticGhostNodeMapping _sparse_global_to_ghost; + RankCombinedBitVector<> _global_to_ghost_bitmap; CompactStaticArray _dense_global_to_ghost; + CompactStaticArray _ghost_to_global; CompactStaticArray _ghost_owner; }; @@ -81,15 +109,13 @@ class CompactGhostNodeMappingBuilder { ) : _num_nodes(static_cast(node_distribution[rank + 1] - node_distribution[rank])), _node_distribution(node_distribution.begin(), node_distribution.end()), - _next_ghost_node(_num_nodes), - _global_to_ghost_bitmap(node_distribution.back()) {} + _next_ghost_node(_num_nodes) {} NodeID new_ghost_node(const GlobalNodeID global_node) { GhostNodeMap::accessor entry; if (_global_to_ghost.insert(entry, global_node)) { const NodeID ghost_node = _next_ghost_node++; entry->second = ghost_node; - _global_to_ghost_bitmap.set(global_node); } else { [[maybe_unused]] const bool found = _global_to_ghost.find(entry, global_node); KASSERT(found); @@ -108,11 +134,6 @@ class CompactGhostNodeMappingBuilder { const GlobalNodeID num_global_nodes = _node_distribution.back(); const std::size_t num_processes = _node_distribution.size() - 1; - RECORD("dense_global_to_ghost") - CompactStaticArray dense_global_to_ghost( - math::byte_width(num_ghost_nodes - 1), num_ghost_nodes - ); - RECORD("ghost_to_global") CompactStaticArray ghost_to_global( math::byte_width(num_global_nodes - 1), num_ghost_nodes @@ -121,33 +142,76 @@ class CompactGhostNodeMappingBuilder { RECORD("ghost_owner") CompactStaticArray ghost_owner(math::byte_width(num_processes - 1), num_ghost_nodes); - _global_to_ghost_bitmap.update(); - for (const auto [global_node, local_node] : _global_to_ghost) { - const NodeID local_ghost = local_node - _num_nodes; + const auto foreach_global_to_ghost = [&](auto &&l) { + for (const auto [global_node, local_node] : _global_to_ghost) { + const NodeID local_ghost = local_node - _num_nodes; - const auto owner_it = - std::upper_bound(_node_distribution.begin() + 1, _node_distribution.end(), global_node); - const auto owner = static_cast(std::distance(_node_distribution.begin(), owner_it) - 1); + const auto owner_it = + std::upper_bound(_node_distribution.begin() + 1, _node_distribution.end(), global_node); + const auto owner = + static_cast(std::distance(_node_distribution.begin(), owner_it) - 1); - KASSERT(local_ghost < dense_global_to_ghost.size()); - KASSERT(local_ghost < ghost_to_global.size()); - KASSERT(local_ghost < ghost_owner.size()); + l(global_node, local_node, local_ghost, owner); + } + }; - const std::size_t dense_index = _global_to_ghost_bitmap.rank(global_node); - dense_global_to_ghost.write(dense_index, local_ghost); + const std::size_t sparse_size = + num_ghost_nodes * sizeof(growt::StaticGhostNodeMapping::atomic_slot_type); + const std::size_t dense_size = + num_global_nodes / 8 + num_ghost_nodes * math::byte_width(num_ghost_nodes - 1); + + if (sparse_size >= dense_size) { + RankCombinedBitVector global_to_ghost_bitmap(_node_distribution.back()); + foreach_global_to_ghost([&](const GlobalNodeID global_node, + const NodeID local_node, + const NodeID local_ghost, + const PEID owner) { global_to_ghost_bitmap.set(global_node); }); + global_to_ghost_bitmap.update(); + + RECORD("dense_global_to_ghost") + CompactStaticArray dense_global_to_ghost( + math::byte_width(num_ghost_nodes - 1), num_ghost_nodes + ); + foreach_global_to_ghost([&](const GlobalNodeID global_node, + const NodeID local_node, + const NodeID local_ghost, + const PEID owner) { + const std::size_t dense_index = global_to_ghost_bitmap.rank(global_node); + dense_global_to_ghost.write(dense_index, local_ghost); + + ghost_to_global.write(local_ghost, global_node); + ghost_owner.write(local_ghost, owner); + }); + + return CompactGhostNodeMapping( + _num_nodes, + num_ghost_nodes, + std::move(global_to_ghost_bitmap), + std::move(dense_global_to_ghost), + std::move(ghost_to_global), + std::move(ghost_owner) + ); + } else { + growt::StaticGhostNodeMapping global_to_ghost(num_ghost_nodes); + foreach_global_to_ghost([&](const GlobalNodeID global_node, + const NodeID local_node, + const NodeID local_ghost, + const PEID owner) { + DBG << "Map global node " << global_node << " to local ghost node " << local_node; + global_to_ghost.insert(global_node + 1, local_node); - ghost_to_global.write(local_ghost, global_node); - ghost_owner.write(local_ghost, owner); + ghost_to_global.write(local_ghost, global_node); + ghost_owner.write(local_ghost, owner); + }); + + return CompactGhostNodeMapping( + _num_nodes, + num_ghost_nodes, + std::move(global_to_ghost), + std::move(ghost_to_global), + std::move(ghost_owner) + ); } - - return CompactGhostNodeMapping( - _num_nodes, - num_ghost_nodes, - std::move(_global_to_ghost_bitmap), - std::move(dense_global_to_ghost), - std::move(ghost_to_global), - std::move(ghost_owner) - ); } private: @@ -156,7 +220,6 @@ class CompactGhostNodeMappingBuilder { NodeID _next_ghost_node; GhostNodeMap _global_to_ghost; - RankCombinedBitVector<> _global_to_ghost_bitmap; }; namespace graph { diff --git a/kaminpar-shm/coarsening/cluster_coarsener.cc b/kaminpar-shm/coarsening/cluster_coarsener.cc index 42f534fe..6caed5e1 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.cc +++ b/kaminpar-shm/coarsening/cluster_coarsener.cc @@ -95,6 +95,18 @@ PartitionedGraph ClusteringCoarsener::uncoarsen(PartitionedGraph &&p_graph) { return {current(), p_graph_k, std::move(partition)}; } +void ClusteringCoarsener::release_allocated_memory() { + SCOPED_HEAP_PROFILER("Deallocation"); + SCOPED_TIMER("Deallocation"); + + _clustering_algorithm.reset(); + + _contraction_m_ctx.buckets.free(); + _contraction_m_ctx.buckets_index.free(); + _contraction_m_ctx.leader_mapping.free(); + _contraction_m_ctx.all_buffered_nodes.free(); +} + std::unique_ptr ClusteringCoarsener::pop_hierarchy(PartitionedGraph &&p_graph) { KASSERT(!empty(), "cannot pop from an empty graph hierarchy", assert::light); diff --git a/kaminpar-shm/coarsening/cluster_coarsener.h b/kaminpar-shm/coarsening/cluster_coarsener.h index 6f443a02..84355429 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.h +++ b/kaminpar-shm/coarsening/cluster_coarsener.h @@ -38,6 +38,8 @@ class ClusteringCoarsener : public Coarsener { return _hierarchy.size(); } + void release_allocated_memory() final; + private: std::unique_ptr pop_hierarchy(PartitionedGraph &&p_graph); diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index 3e88ae72..aff20e7a 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -49,6 +49,7 @@ class LPClusteringImpl final Base::set_max_degree(_lp_ctx.large_degree_threshold); Base::set_max_num_neighbors(_lp_ctx.max_num_neighbors); Base::set_implementation(_lp_ctx.impl); + Base::set_tie_breaking_strategy(_lp_ctx.tie_breaking_strategy); Base::set_second_phase_selection_strategy(_lp_ctx.second_phase_selection_strategy); Base::set_second_phase_aggregation_strategy(_lp_ctx.second_phase_aggregation_strategy); Base::set_relabel_before_second_phase(_lp_ctx.relabel_before_second_phase); @@ -246,55 +247,100 @@ class LPClusteringImpl final template [[nodiscard]] ClusterID select_best_cluster( const bool store_favored_cluster, + const EdgeWeight gain_delta, Base::ClusterSelectionState &state, RatingMap &map, - ScalableVector &tie_breaking_clusters + ScalableVector &tie_breaking_clusters, + ScalableVector &tie_breaking_favored_clusters ) { + const bool use_uniform_tie_breaking = _tie_breaking_strategy == TieBreakingStrategy::UNIFORM; + ClusterID favored_cluster = state.initial_cluster; + if (use_uniform_tie_breaking) { + const auto accept_cluster = [&] { + return state.current_cluster_weight + state.u_weight <= + max_cluster_weight(state.current_cluster) || + state.current_cluster == state.initial_cluster; + }; + + for (const auto [cluster, rating] : map.entries()) { + state.current_cluster = cluster; + state.current_gain = rating - gain_delta; + state.current_cluster_weight = cluster_weight(cluster); + + if (state.current_gain > state.best_gain) { + if (store_favored_cluster) { + tie_breaking_favored_clusters.clear(); + tie_breaking_favored_clusters.push_back(state.current_cluster); + + favored_cluster = state.current_cluster; + } - const EdgeWeight gain_delta = (Config::kUseActualGain) ? map[state.initial_cluster] : 0; - for (const auto [cluster, rating] : map.entries()) { - state.current_cluster = cluster; - state.current_gain = rating - gain_delta; - state.current_cluster_weight = cluster_weight(cluster); + if (accept_cluster()) { + tie_breaking_clusters.clear(); + tie_breaking_clusters.push_back(state.current_cluster); - if (state.current_gain > state.best_gain) { - if (store_favored_cluster) { - favored_cluster = state.current_cluster; + state.best_cluster = state.current_cluster; + state.best_gain = state.current_gain; + } + } else if (state.current_gain == state.best_gain) { + if (store_favored_cluster) { + tie_breaking_favored_clusters.push_back(state.current_cluster); + } + + if (accept_cluster()) { + tie_breaking_clusters.push_back(state.current_cluster); + } } + } - if (accept_cluster(state)) { - tie_breaking_clusters.clear(); - tie_breaking_clusters.push_back(state.current_cluster); + if (tie_breaking_clusters.size() > 1) { + const ClusterID i = state.local_rand.random_index(0, tie_breaking_clusters.size()); + const ClusterID best_cluster = tie_breaking_clusters[i]; + state.best_cluster = best_cluster; + } + tie_breaking_clusters.clear(); + if (tie_breaking_favored_clusters.size() > 1) { + const ClusterID i = state.local_rand.random_index(0, tie_breaking_favored_clusters.size()); + const ClusterID best_favored_cluster = tie_breaking_favored_clusters[i]; + favored_cluster = best_favored_cluster; + } + tie_breaking_favored_clusters.clear(); + + return favored_cluster; + } else { + const auto accept_cluster = [&] { + return (state.current_gain > state.best_gain || + (state.current_gain == state.best_gain && state.local_rand.random_bool())) && + (state.current_cluster_weight + state.u_weight <= + max_cluster_weight(state.current_cluster) || + state.current_cluster == state.initial_cluster); + }; + + for (const auto [cluster, rating] : map.entries()) { + state.current_cluster = cluster; + state.current_gain = rating - gain_delta; + state.current_cluster_weight = cluster_weight(cluster); + + if (store_favored_cluster && state.current_gain > state.best_gain) { + favored_cluster = state.current_cluster; + } + + if (accept_cluster()) { state.best_cluster = state.current_cluster; + state.best_cluster_weight = state.current_cluster_weight; state.best_gain = state.current_gain; } - } else if (state.current_gain == state.best_gain && accept_cluster(state)) { - tie_breaking_clusters.push_back(state.current_cluster); } - } - if (tie_breaking_clusters.size() > 1) { - const ClusterID index = state.local_rand.random_index(0, tie_breaking_clusters.size()); - const ClusterID best_cluster = tie_breaking_clusters[index]; - state.best_cluster = best_cluster; + return favored_cluster; } - - tie_breaking_clusters.clear(); - return favored_cluster; - } - - [[nodiscard]] bool accept_cluster(const Base::ClusterSelectionState &state) { - return (state.current_gain > state.best_gain || - (state.current_gain == state.best_gain && state.local_rand.random_bool())) && - (state.current_cluster_weight + state.u_weight <= - max_cluster_weight(state.current_cluster) || - state.current_cluster == state.initial_cluster); } using Base::_current_num_clusters; using Base::_graph; + using Base::_tie_breaking_strategy; const LabelPropagationCoarseningContext &_lp_ctx; NodeWeight _max_cluster_weight = kInvalidBlockWeight; diff --git a/kaminpar-shm/coarsening/coarsener.h b/kaminpar-shm/coarsening/coarsener.h index e3e608e1..929e0a14 100644 --- a/kaminpar-shm/coarsening/coarsener.h +++ b/kaminpar-shm/coarsening/coarsener.h @@ -67,5 +67,11 @@ class Coarsener { * @return partition of the *new* coarsest graph. */ virtual PartitionedGraph uncoarsen(PartitionedGraph &&p_graph) = 0; + + /** + * Releases the memory reserved for coarsening, thus making it not possible to call coarsen() + * afterwards. + */ + virtual void release_allocated_memory() = 0; }; } // namespace kaminpar::shm diff --git a/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc index 665966d5..d9423e40 100644 --- a/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc @@ -12,6 +12,7 @@ #include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/datastructures/compact_static_array.h" +#include "kaminpar-common/datastructures/dynamic_map.h" #include "kaminpar-common/datastructures/rating_map.h" #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/heap_profiler.h" @@ -19,7 +20,7 @@ namespace kaminpar::shm::contraction { namespace { -class ConstantSizeNeighborhoodsBuffer { +class NeighborhoodsBuffer { static constexpr NodeID kSize = 30000; // Chosen such that its about 1 MB in size public: @@ -27,7 +28,7 @@ class ConstantSizeNeighborhoodsBuffer { return degree >= kSize; } - ConstantSizeNeighborhoodsBuffer( + NeighborhoodsBuffer( EdgeID *nodes, NodeID *edges, NodeWeight *node_weights, @@ -123,29 +124,18 @@ std::unique_ptr contract_clustering_unbuffered( // Overcomit memory for the edge and edge weight array as we only know the amount of edges of // the coarse graph afterwards. - const EdgeID edge_count = graph.m(); - auto c_edges = heap_profiler::overcommit_memory(edge_count); - auto c_edge_weights = heap_profiler::overcommit_memory(edge_count); + const EdgeID num_fine_edges = graph.m(); + auto c_edges = heap_profiler::overcommit_memory(num_fine_edges); + auto c_edge_weights = heap_profiler::overcommit_memory(num_fine_edges); START_HEAP_PROFILER("Construct coarse graph"); START_TIMER("Construct coarse graph"); - CompactStaticArray remapping(static_cast(math::byte_width(c_n)), c_n); - - tbb::enumerable_thread_specific> collector{[&] { - return RatingMap(c_n); - }}; - - tbb::enumerable_thread_specific neighborhoods_buffer_ets{[&] { - return ConstantSizeNeighborhoodsBuffer( - c_nodes.data(), c_edges.get(), c_node_weights.data(), c_edge_weights.get(), remapping - ); - }}; - + CompactStaticArray remapping(math::byte_width(c_n), c_n); const auto write_neighbourhood = [&](const NodeID c_u, + const NodeWeight c_u_weight, const NodeID new_c_u, EdgeID edge, - const NodeWeight c_u_weight, auto &map) { remapping.write(c_u, new_c_u); @@ -160,86 +150,117 @@ std::unique_ptr contract_clustering_unbuffered( }; __uint128_t next_coarse_node_info = 0; - const auto &atomic_fetch_next_coarse_node_info = [&](std::uint64_t nodes, std::uint64_t degree) { - std::uint64_t old_c_v; - std::uint64_t old_edge; + const auto atomic_fetch_next_coarse_node_info = [&](const std::uint64_t nodes, + const std::uint64_t degree) { + std::uint64_t c_v; + std::uint64_t edge; bool success; do { - __uint128_t expected = next_coarse_node_info; - old_c_v = (expected >> 64) & 0xFFFFFFFFFFFFFFFF; - old_edge = expected & 0xFFFFFFFFFFFFFFFF; + const __uint128_t expected = next_coarse_node_info; + c_v = (expected >> 64) & 0xFFFFFFFFFFFFFFFF; + edge = expected & 0xFFFFFFFFFFFFFFFF; - __uint128_t desired = (static_cast<__uint128_t>(old_c_v + nodes) << 64) | - static_cast<__uint128_t>(old_edge + degree); + const __uint128_t desired = + (static_cast<__uint128_t>(c_v + nodes) << 64) | static_cast<__uint128_t>(edge + degree); success = __sync_bool_compare_and_swap(&next_coarse_node_info, expected, desired); } while (!success); - return std::make_pair(old_c_v, old_edge); + return std::make_pair(c_v, edge); }; - tbb::parallel_for(tbb::blocked_range(0, c_n), [&](const auto &r) { - auto &local_collector = collector.local(); - auto &local_buffer = neighborhoods_buffer_ets.local(); - - for (NodeID c_u = r.begin(); c_u != r.end(); ++c_u) { - const NodeID first = buckets_index[c_u]; - const NodeID last = buckets_index[c_u + 1]; + const auto aggregate_edges = [&](const NodeID c_u, + const NodeID first, + const NodeID last, + auto &edge_collector, + auto &neighborhood_buffer) { + NodeWeight c_u_weight = 0; + for (NodeID i = first; i < last; ++i) { + const NodeID u = buckets[i]; + KASSERT(mapping[u] == c_u); + + c_u_weight += graph.node_weight(u); + + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + const NodeID c_v = mapping[v]; + if (c_u != c_v) { + edge_collector[c_v] += w; + } + }); + } - // Build coarse graph - const auto collect_edges = [&](auto &map) { - NodeWeight c_u_weight = 0; - for (NodeID i = first; i < last; ++i) { - const NodeID u = buckets[i]; - KASSERT(mapping[u] == c_u); + const std::size_t degree = edge_collector.size(); + if (NeighborhoodsBuffer::exceeds_capacity(degree)) { + auto [new_c_u, edge] = atomic_fetch_next_coarse_node_info(1, degree); + write_neighbourhood(c_u, c_u_weight, new_c_u, edge, edge_collector); + } else if (neighborhood_buffer.overfills(degree)) { + const NodeID num_buffered_nodes = neighborhood_buffer.num_buffered_nodes(); + const EdgeID num_buffered_edges = neighborhood_buffer.num_buffered_edges(); + const auto [new_c_u, edge] = + atomic_fetch_next_coarse_node_info(num_buffered_nodes + 1, num_buffered_edges + degree); + neighborhood_buffer.flush(new_c_u, edge); + write_neighbourhood( + c_u, c_u_weight, new_c_u + num_buffered_nodes, edge + num_buffered_edges, edge_collector + ); + } else { + neighborhood_buffer.add(c_u, degree, c_u_weight, [&](auto &&l) { + for (const auto [c_v, weight] : edge_collector.entries()) { + l(c_v, weight); + } + }); + } - c_u_weight += graph.node_weight(u); + edge_collector.clear(); + }; - graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { - const NodeID c_v = mapping[v]; - if (c_u != c_v) { - map[c_v] += w; - } - }); - } + tbb::enumerable_thread_specific neighborhoods_buffer_ets{[&] { + return NeighborhoodsBuffer( + c_nodes.data(), c_edges.get(), c_node_weights.data(), c_edge_weights.get(), remapping + ); + }}; - const std::size_t degree = map.size(); - if (ConstantSizeNeighborhoodsBuffer::exceeds_capacity(degree)) { - auto [new_c_u, edge] = atomic_fetch_next_coarse_node_info(1, degree); - write_neighbourhood(c_u, new_c_u, edge, c_u_weight, map); - } else if (local_buffer.overfills(degree)) { - const NodeID num_buffered_nodes = local_buffer.num_buffered_nodes(); - const EdgeID num_buffered_edges = local_buffer.num_buffered_edges(); - const auto [new_c_u, edge] = atomic_fetch_next_coarse_node_info( - num_buffered_nodes + 1, num_buffered_edges + degree - ); - local_buffer.flush(new_c_u, edge); - write_neighbourhood( - c_u, new_c_u + num_buffered_nodes, edge + num_buffered_edges, c_u_weight, map - ); - } else { - local_buffer.add(c_u, degree, c_u_weight, [&](auto &&l) { - for (const auto [c_v, weight] : map.entries()) { - l(c_v, weight); - } - }); - } + if (con_ctx.use_growing_hash_tables) { + using EdgeCollector = DynamicRememberingFlatMap; + tbb::enumerable_thread_specific edge_collector_ets; - map.clear(); - }; + tbb::parallel_for(tbb::blocked_range(0, c_n), [&](const auto &r) { + auto &local_collector = edge_collector_ets.local(); + auto &local_buffer = neighborhoods_buffer_ets.local(); - // To select the right map, we need a upper bound on the coarse node degree. If we - // previously split the coarse nodes into chunks, we have already computed them and stored - // them in the c_nodes array. - NodeID upper_bound_degree = 0; - for (NodeID i = first; i < last; ++i) { - const NodeID u = buckets[i]; - upper_bound_degree += graph.degree(u); + for (NodeID c_u = r.begin(); c_u != r.end(); ++c_u) { + const NodeID first = buckets_index[c_u]; + const NodeID last = buckets_index[c_u + 1]; + aggregate_edges(c_u, first, last, local_collector, local_buffer); } + }); + } else { + using EdgeCollector = RatingMap; + tbb::enumerable_thread_specific edge_collector_ets{[&] { + return EdgeCollector(c_n); + }}; + + tbb::parallel_for(tbb::blocked_range(0, c_n), [&](const auto &r) { + auto &local_collector = edge_collector_ets.local(); + auto &local_buffer = neighborhoods_buffer_ets.local(); + + for (NodeID c_u = r.begin(); c_u != r.end(); ++c_u) { + const NodeID first = buckets_index[c_u]; + const NodeID last = buckets_index[c_u + 1]; + + // To select the right map, we compute a upper bound on the coarse node degree by summing + // the degree of all fine nodes. + NodeID upper_bound_degree = 0; + for (NodeID i = first; i < last; ++i) { + const NodeID u = buckets[i]; + upper_bound_degree += graph.degree(u); + } - local_collector.execute(upper_bound_degree, collect_edges); - } - }); + local_collector.execute(upper_bound_degree, [&](auto &edge_collector) { + aggregate_edges(c_u, first, last, edge_collector, local_buffer); + }); + } + }); + } tbb::parallel_for(neighborhoods_buffer_ets.range(), [&](auto &r) { for (auto &buffer : r) { diff --git a/kaminpar-shm/coarsening/noop_coarsener.h b/kaminpar-shm/coarsening/noop_coarsener.h index 243580c3..1b6f8bfa 100644 --- a/kaminpar-shm/coarsening/noop_coarsener.h +++ b/kaminpar-shm/coarsening/noop_coarsener.h @@ -35,6 +35,8 @@ class NoopCoarsener : public Coarsener { return std::move(p_graph); } + void release_allocated_memory() final {}; + private: const Graph *_graph = nullptr; }; diff --git a/kaminpar-shm/context.cc b/kaminpar-shm/context.cc index bbe090cd..8dd9c975 100644 --- a/kaminpar-shm/context.cc +++ b/kaminpar-shm/context.cc @@ -22,8 +22,7 @@ void GraphCompressionContext::setup(const Graph &graph) { interval_encoding = CompressedGraph::kIntervalEncoding; interval_length_treshold = CompressedGraph::kIntervalLengthTreshold; run_length_encoding = CompressedGraph::kRunLengthEncoding; - stream_encoding = CompressedGraph::kStreamEncoding; - isolated_nodes_separation = CompressedGraph::kIsolatedNodesSeparation; + streamvbyte_encoding = CompressedGraph::kStreamVByteEncoding; if (enabled) { if (const auto *compressed_graph = diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index 63106ecc..4c967029 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -15,7 +15,6 @@ #include "kaminpar-common/asserting_cast.h" #include "kaminpar-common/console_io.h" -#include "kaminpar-common/graph-compression/varint_codec.h" #include "kaminpar-common/random.h" #include "kaminpar-common/strutils.h" @@ -288,6 +287,24 @@ std::ostream &operator<<(std::ostream &out, const GainCacheStrategy strategy) { return out << ""; } +std::unordered_map get_tie_breaking_strategies() { + return { + {"geometric", TieBreakingStrategy::GEOMETRIC}, + {"uniform", TieBreakingStrategy::UNIFORM}, + }; +} + +std::ostream &operator<<(std::ostream &out, const TieBreakingStrategy strategy) { + switch (strategy) { + case TieBreakingStrategy::GEOMETRIC: + return out << "geometric"; + case TieBreakingStrategy::UNIFORM: + return out << "uniform"; + } + + return out << ""; +} + std::ostream &operator<<(std::ostream &out, const TwoHopStrategy strategy) { switch (strategy) { case TwoHopStrategy::DISABLE: @@ -393,8 +410,8 @@ void print(const GraphCompressionContext &c_ctx, std::ostream &out) { out << "Compression Scheme: Gap Encoding + "; if (c_ctx.run_length_encoding) { out << "VarInt Run-Length Encoding\n"; - } else if (c_ctx.stream_encoding) { - out << "VarInt Stream Encoding\n"; + } else if (c_ctx.streamvbyte_encoding) { + out << "StreamVByte Encoding\n"; } else { out << "VarInt Encoding\n"; } @@ -410,8 +427,6 @@ void print(const GraphCompressionContext &c_ctx, std::ostream &out) { if (c_ctx.interval_encoding) { out << " Length Threshold: " << c_ctx.interval_length_treshold << "\n"; } - out << " Isolated Nodes Separation: " << (c_ctx.isolated_nodes_separation ? "yes" : "no") - << "\n"; out << "Compresion Ratio: "; if (c_ctx.dismissed) { @@ -424,29 +439,6 @@ void print(const GraphCompressionContext &c_ctx, std::ostream &out) { out << " High Degree Part Count: " << c_ctx.num_high_degree_parts << "\n"; out << " Interval Node Count: " << c_ctx.num_interval_nodes << "\n"; out << " Interval Count: " << c_ctx.num_intervals << "\n"; - - if (debug::kTrackVarintStats) { - const auto &stats = debug::varint_stats_global(); - - const float avg_varint_len = - (stats.varint_count == 0) ? 0 : (stats.varint_bytes / (float)stats.varint_count); - out << "Average Varint Length: " << avg_varint_len - << " [count: " << stats.varint_count << "]\n"; - - const float avg_signed_varint_len = - (stats.signed_varint_count == 0) - ? 0 - : (stats.signed_varint_bytes / (float)stats.signed_varint_count); - out << "Average Signed Varint Length: " << avg_signed_varint_len - << " [count: " << stats.signed_varint_count << "]\n"; - - const float avg_marked_varint_len = - (stats.marked_varint_count == 0) - ? 0 - : (stats.marked_varint_bytes / (float)stats.marked_varint_count); - out << "Average Marked Varint Length: " << avg_marked_varint_len - << " [count: " << stats.marked_varint_count << "]\n"; - } } } } @@ -494,6 +486,9 @@ void print(const CoarseningContext &c_ctx, std::ostream &out) { out << "Contraction mode: " << c_ctx.contraction.mode << '\n'; if (c_ctx.contraction.mode == ContractionMode::BUFFERED) { out << " Edge buffer fill fraction: " << c_ctx.contraction.edge_buffer_fill_fraction << "\n"; + } else if (c_ctx.contraction.mode == ContractionMode::UNBUFFERED) { + out << " Use growing hash tables: " + << (c_ctx.contraction.use_growing_hash_tables ? "yes" : "no") << "\n"; } } @@ -501,6 +496,7 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) { out << " Number of iterations: " << lp_ctx.num_iterations << "\n"; out << " High degree threshold: " << lp_ctx.large_degree_threshold << "\n"; out << " Max degree: " << lp_ctx.max_num_neighbors << "\n"; + out << " Tie breaking strategy: " << lp_ctx.tie_breaking_strategy << "\n"; out << " Cluster weights struct: " << lp_ctx.cluster_weights_structure << "\n"; out << " Implementation: " << lp_ctx.impl << "\n"; if (lp_ctx.impl == LabelPropagationImplementation::TWO_PHASE) { @@ -524,6 +520,7 @@ void print(const RefinementContext &r_ctx, std::ostream &out) { if (r_ctx.includes_algorithm(RefinementAlgorithm::LABEL_PROPAGATION)) { out << "Label propagation:\n"; out << " Number of iterations: " << r_ctx.lp.num_iterations << "\n"; + out << " Tie breaking strategy: " << r_ctx.lp.tie_breaking_strategy << "\n"; out << " Implementation: " << r_ctx.lp.impl << "\n"; if (r_ctx.lp.impl == LabelPropagationImplementation::TWO_PHASE) { out << " Selection strategy: " << r_ctx.lp.second_phase_selection_strategy << '\n'; diff --git a/kaminpar-shm/context_io.h b/kaminpar-shm/context_io.h index 25f21b1f..467ec963 100644 --- a/kaminpar-shm/context_io.h +++ b/kaminpar-shm/context_io.h @@ -59,6 +59,10 @@ std::unordered_map get_initial_partitionin std::ostream &operator<<(std::ostream &out, GainCacheStrategy strategy); +std::unordered_map get_tie_breaking_strategies(); + +std::ostream &operator<<(std::ostream &out, TieBreakingStrategy strategy); + std::ostream &operator<<(std::ostream &out, SecondPhaseSelectionStrategy strategy); std::unordered_map diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index 883c5705..ba89f8e3 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -74,16 +74,9 @@ class CompressedGraph : public AbstractGraph { static constexpr bool kRunLengthEncoding = CompressedNeighborhoods::kRunLengthEncoding; /*! - * Whether stream encoding is used. + * Whether StreamVByte encoding is used. */ - static constexpr bool kStreamEncoding = CompressedNeighborhoods::kStreamEncoding; - - /*! - * Whether the isolated nodes of the compressed graph are continuously stored - * at the end of the nodes array. - */ - static constexpr bool kIsolatedNodesSeparation = - CompressedNeighborhoods::kIsolatedNodesSeparation; + static constexpr bool kStreamVByteEncoding = CompressedNeighborhoods::kStreamVByteEncoding; /*! * Constructs a new compressed graph. @@ -144,7 +137,7 @@ class CompressedGraph : public AbstractGraph { } [[nodiscard]] inline EdgeWeight total_edge_weight() const final { - return _total_edge_weight; + return _compressed_neighborhoods.total_edge_weight(); } // @@ -184,7 +177,7 @@ class CompressedGraph : public AbstractGraph { constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - _compressed_neighborhoods.decode(u, [&](const EdgeID, const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(v, w); } else { @@ -198,7 +191,7 @@ class CompressedGraph : public AbstractGraph { constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - _compressed_neighborhoods.decode(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(e, v, w); } else { @@ -214,7 +207,7 @@ class CompressedGraph : public AbstractGraph { static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); _compressed_neighborhoods - .decode(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + .neighbors(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(e, v, w); } else { @@ -239,8 +232,7 @@ class CompressedGraph : public AbstractGraph { inline void pfor_neighbors( const NodeID u, const NodeID max_num_neighbors, const NodeID grainsize, Lambda &&l ) const { - constexpr bool kParallelDecoding = true; - _compressed_neighborhoods.decode(u, std::forward(l)); + _compressed_neighborhoods.parallel_neighbors(u, std::forward(l)); } // @@ -428,7 +420,6 @@ class CompressedGraph : public AbstractGraph { NodeWeight _max_node_weight = kInvalidNodeWeight; NodeWeight _total_node_weight = kInvalidNodeWeight; - EdgeWeight _total_edge_weight = kInvalidEdgeWeight; StaticArray _permutation; bool _sorted; diff --git a/kaminpar-shm/graphutils/parallel_compressed_graph_builder.h b/kaminpar-shm/graphutils/parallel_compressed_graph_builder.h index 12f798e4..32a5eb6f 100644 --- a/kaminpar-shm/graphutils/parallel_compressed_graph_builder.h +++ b/kaminpar-shm/graphutils/parallel_compressed_graph_builder.h @@ -210,7 +210,7 @@ template < }); using CompressedEdgesBuilder = kaminpar::CompressedEdgesBuilder; - tbb::enumerable_thread_specific neighbourhood_builder_ets([&] { + tbb::enumerable_thread_specific edges_builder_ets([&] { return CompressedEdgesBuilder( num_nodes, num_edges, max_degree, kHasEdgeWeights, builder.edge_weights() ); @@ -227,13 +227,13 @@ template < auto &offsets = offsets_ets.local(); auto &neighbourhood = neighbourhood_ets.local(); - auto &neighbourhood_builder = neighbourhood_builder_ets.local(); + auto &edges_builder = edges_builder_ets.local(); const NodeID chunk = buffer.next(); const auto [start, end, first_edge] = chunks[chunk]; NodeWeight local_node_weight = 0; - neighbourhood_builder.init(first_edge); + edges_builder.init(first_edge); // Compress the neighborhoods of the nodes in the fetched chunk. debug::scoped_time(dbg.compression_time, [&] { @@ -256,7 +256,7 @@ template < edge += 1; } - const EdgeID local_offset = neighbourhood_builder.add(i, neighbourhood); + const EdgeID local_offset = edges_builder.add(i, neighbourhood); offsets.push_back(local_offset); neighbourhood.clear(); @@ -265,7 +265,7 @@ template < // Wait for the parallel tasks that process the previous chunks to finish. const EdgeID offset = debug::scoped_time(dbg.sync_time, [&] { - const EdgeID compressed_neighborhoods_size = neighbourhood_builder.size(); + const EdgeID compressed_neighborhoods_size = edges_builder.size(); return buffer.fetch_and_update(chunk, compressed_neighborhoods_size); }); @@ -287,23 +287,20 @@ template < } offsets.clear(); - builder.add_compressed_edges( - offset, neighbourhood_builder.size(), neighbourhood_builder.compressed_data() - ); - + builder.add_compressed_edges(offset, edges_builder.size(), edges_builder.compressed_data()); builder.record_local_statistics( - neighbourhood_builder.max_degree(), - neighbourhood_builder.total_edge_weight(), - neighbourhood_builder.num_high_degree_nodes(), - neighbourhood_builder.num_high_degree_parts(), - neighbourhood_builder.num_interval_nodes(), - neighbourhood_builder.num_intervals() + edges_builder.max_degree(), + edges_builder.total_edge_weight(), + edges_builder.num_high_degree_nodes(), + edges_builder.num_high_degree_parts(), + edges_builder.num_interval_nodes(), + edges_builder.num_intervals() ); }); }); IF_DBG debug::print_graph_compression_stats(dbg_ets); - IF_DBG debug::print_compressed_graph_stats(neighbourhood_builder_ets); + IF_DBG debug::print_compressed_graph_stats(edges_builder_ets); return CompressedGraph(builder.build(), std::move(node_weights_array), sorted); } diff --git a/kaminpar-shm/graphutils/permutator.h b/kaminpar-shm/graphutils/permutator.h index 83ec7f7c..460254cc 100644 --- a/kaminpar-shm/graphutils/permutator.h +++ b/kaminpar-shm/graphutils/permutator.h @@ -39,7 +39,7 @@ template