diff --git a/apps/benchmarks/shm_compressed_graph_benchmark.cc b/apps/benchmarks/shm_compressed_graph_benchmark.cc index 74490f9c..c6f40de3 100644 --- a/apps/benchmarks/shm_compressed_graph_benchmark.cc +++ b/apps/benchmarks/shm_compressed_graph_benchmark.cc @@ -40,7 +40,6 @@ template static inline void do_not_optimize(T value) { } template static void benchmark_degree(const Graph &graph) { - SCOPED_HEAP_PROFILER("Degree"); SCOPED_TIMER("Degree"); for (const auto node : graph.nodes()) { @@ -49,7 +48,6 @@ template static void benchmark_degree(const Graph &graph) { } template static void benchmark_incident_edges(const Graph &graph) { - SCOPED_HEAP_PROFILER("Incident Edges"); SCOPED_TIMER("Incident Edges"); for (const auto node : graph.nodes()) { @@ -60,7 +58,6 @@ template static void benchmark_incident_edges(const Graph &grap } template static void benchmark_adjacent_nodes(const Graph &graph) { - SCOPED_HEAP_PROFILER("Adjacent Nodes"); SCOPED_TIMER("Adjacent Nodes"); for (const auto node : graph.nodes()) { @@ -69,7 +66,6 @@ template static void benchmark_adjacent_nodes(const Graph &grap } template static void benchmark_neighbors(const Graph &graph) { - SCOPED_HEAP_PROFILER("Neighbors"); SCOPED_TIMER("Neighbors"); for (const auto node : graph.nodes()) { @@ -81,7 +77,6 @@ template static void benchmark_neighbors(const Graph &graph) { } template static void benchmark_pfor_neighbors(const Graph &graph) { - SCOPED_HEAP_PROFILER("Parallel For Neighbors"); SCOPED_TIMER("Parallel For Neighbors"); for (const auto node : graph.nodes()) { @@ -97,326 +92,9 @@ template static void benchmark_pfor_neighbors(const Graph &grap } } -static void expect_equal_size(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.n() != compressed_graph.n()) { - LOG_ERROR << "The uncompressed graph has " << graph.n() - << " nodes and the compressed graph has " << compressed_graph.n() << " nodes!"; - return; - } - - if (graph.m() != compressed_graph.m()) { - LOG_ERROR << "The uncompressed graph has " << graph.m() - << " edges and the compressed graph has " << compressed_graph.m() << " edges!"; - return; - } -} - -static void expect_equal_nodes(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.nodes() != compressed_graph.nodes()) { - LOG_ERROR << "The nodes of the compressed and uncompressed graph do not match!"; - return; - } -} - -static void expect_equal_edges(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.edges() != compressed_graph.edges()) { - LOG_ERROR << "The edges of the compressed and uncompressed graph do not match!"; - return; - } -} - -static void expect_equal_degree(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - for (const auto node : graph.nodes()) { - if (graph.degree(node) != compressed_graph.degree(node)) { - LOG_ERROR << "The node " << node << " has degree " << compressed_graph.degree(node) - << " in the compressed graph and degree " << graph.degree(node) - << " in the uncompressed graph!"; - return; - } - } -} - -static void -expect_equal_incident_edges(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - for (const auto node : graph.nodes()) { - if (graph.incident_edges(node) != compressed_graph.incident_edges(node)) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - } -} - -static void -expect_equal_adjacent_nodes(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_neighbours; - std::vector compressed_graph_neighbours; - - for (const NodeID node : graph.nodes()) { - graph.adjacent_nodes(node, [&](const NodeID adjacent_node) { - graph_neighbours.push_back(adjacent_node); - }); - - compressed_graph.adjacent_nodes(node, [&](const NodeID adjacent_node) { - compressed_graph_neighbours.push_back(adjacent_node); - }); - - if (graph_neighbours.size() != compressed_graph_neighbours.size()) { - LOG_ERROR << "Node " << node << " has " << graph_neighbours.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_neighbours.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_neighbours.begin(), graph_neighbours.end()); - std::sort(compressed_graph_neighbours.begin(), compressed_graph_neighbours.end()); - if (graph_neighbours != compressed_graph_neighbours) { - LOG_ERROR << "The neighbourhood of node " << node - << " in the compressed and uncompressed graph does not match!"; - return; - } - - graph_neighbours.clear(); - compressed_graph_neighbours.clear(); - } -} - -static void -expect_equal_neighbours(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_incident_edges; - std::vector graph_adjacent_node; - std::vector compressed_graph_incident_edges; - std::vector compressed_graph_adjacent_node; - - for (const NodeID node : graph.nodes()) { - graph.neighbors(node, [&](const auto incident_edge, const auto adjacent_node) { - graph_incident_edges.push_back(incident_edge); - graph_adjacent_node.push_back(adjacent_node); - }); - - compressed_graph.neighbors(node, [&](const auto incident_edge, const auto adjacent_node) { - compressed_graph_incident_edges.push_back(incident_edge); - compressed_graph_adjacent_node.push_back(adjacent_node); - }); - - if (graph_incident_edges.size() != compressed_graph_incident_edges.size()) { - LOG_ERROR << "Node " << node << " has " << graph_incident_edges.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_incident_edges.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_incident_edges != compressed_graph_incident_edges) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_incident_edges.clear(); - graph_adjacent_node.clear(); - compressed_graph_incident_edges.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void expect_equal_neighbours_max(CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_incident_edges; - std::vector graph_adjacent_node; - std::vector compressed_graph_incident_edges; - std::vector compressed_graph_adjacent_node; - - graph::reorder_edges_by_compression(graph); - - for (const NodeID node : graph.nodes()) { - const NodeID max_neighbor_count = graph.degree(node) / 2; - - graph.neighbors( - node, - max_neighbor_count, - [&](const auto incident_edge, const auto adjacent_node) { - graph_incident_edges.push_back(incident_edge); - graph_adjacent_node.push_back(adjacent_node); - } - ); - - compressed_graph.neighbors( - node, - max_neighbor_count, - [&](const auto incident_edge, const auto adjacent_node) { - compressed_graph_incident_edges.push_back(incident_edge); - compressed_graph_adjacent_node.push_back(adjacent_node); - } - ); - - if (graph_incident_edges.size() != compressed_graph_incident_edges.size()) { - LOG_ERROR << "Node " << node << " has " << graph_incident_edges.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_incident_edges.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_incident_edges != compressed_graph_incident_edges) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_incident_edges.clear(); - graph_adjacent_node.clear(); - compressed_graph_incident_edges.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void -expect_equal_pfor_neighbors(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - tbb::concurrent_vector graph_adjacent_node; - tbb::concurrent_vector compressed_graph_adjacent_node; - - for (const NodeID node : graph.nodes()) { - graph.pfor_neighbors( - node, - std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { graph_adjacent_node.push_back(v); } - ); - - compressed_graph.pfor_neighbors( - node, - std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { compressed_graph_adjacent_node.push_back(v); } - ); - - if (graph_adjacent_node.size() != compressed_graph_adjacent_node.size()) { - LOG_ERROR << "Node " << node << " has " << graph_adjacent_node.size() - << " adjacent nodes in the uncompressed graph but " - << compressed_graph_adjacent_node.size() - << " adjacent node in the compressed graph!"; - return; - } - - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_adjacent_node.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void expect_equal_compressed_graph_edge_weights( - const CSRGraph &graph, const CompressedGraph &compressed_graph -) { - std::vector> csr_graph_edge_weights; - std::vector> compressed_graph_edge_weights; - - for (const NodeID node : graph.nodes()) { - graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - csr_graph_edge_weights.emplace_back(adjacent_node, graph.edge_weight(incident_edge)); - }); - - compressed_graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - compressed_graph_edge_weights.emplace_back( - adjacent_node, compressed_graph.edge_weight(incident_edge) - ); - }); - - if (csr_graph_edge_weights.size() != compressed_graph_edge_weights.size()) { - LOG_ERROR << "Node " << node << " has " << csr_graph_edge_weights.size() - << " adjacent nodes in the uncompressed graph but " - << compressed_graph_edge_weights.size() - << " adjacent node in the compressed graph!"; - return; - } - - std::sort( - csr_graph_edge_weights.begin(), - csr_graph_edge_weights.end(), - [](const auto &a, const auto &b) { return a.first < b.first; } - ); - - std::sort( - compressed_graph_edge_weights.begin(), - compressed_graph_edge_weights.end(), - [](const auto &a, const auto &b) { return a.first < b.first; } - ); - - if (csr_graph_edge_weights != compressed_graph_edge_weights) { - LOG_ERROR << "The edge weights of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - csr_graph_edge_weights.clear(); - compressed_graph_edge_weights.clear(); - } -} - -static void expect_equal_rearrange_compressed_edge_weights( - CSRGraph &graph, const CompressedGraph &compressed_graph -) { - graph::reorder_edges_by_compression(graph); - - for (const NodeID node : graph.nodes()) { - for (const auto [incident_edge, adjacent_node] : graph.neighbors(node)) { - if (graph.edge_weight(incident_edge) != compressed_graph.edge_weight(incident_edge)) { - LOG_ERROR << "Edge " << incident_edge << " has weight " << graph.edge_weight(incident_edge) - << " in the rearranged uncompressed graph but weight " - << compressed_graph.edge_weight(incident_edge) << " in the compressed graph!"; - return; - } - } - } -} - -static void run_checks(CSRGraph &graph, const CompressedGraph &compressed_graph) { - LOG << "Checking if the graph operations are valid..."; - - expect_equal_size(graph, compressed_graph); - expect_equal_nodes(graph, compressed_graph); - expect_equal_edges(graph, compressed_graph); - expect_equal_degree(graph, compressed_graph); - expect_equal_incident_edges(graph, compressed_graph); - expect_equal_adjacent_nodes(graph, compressed_graph); - expect_equal_neighbours(graph, compressed_graph); - expect_equal_neighbours_max(graph, compressed_graph); - expect_equal_pfor_neighbors(graph, compressed_graph); - expect_equal_compressed_graph_edge_weights(graph, compressed_graph); - expect_equal_rearrange_compressed_edge_weights(graph, compressed_graph); -} - static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { - LOG << "Running the benchmark..."; + LOG << "Running the benchmarks..."; - START_HEAP_PROFILER("Uncompressed graph operations"); TIMED_SCOPE("Uncompressed graph operations") { benchmark_degree(graph); benchmark_incident_edges(graph); @@ -424,9 +102,7 @@ static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { benchmark_neighbors(graph); benchmark_pfor_neighbors(graph); }; - STOP_HEAP_PROFILER(); - START_HEAP_PROFILER("Compressed graph operations"); TIMED_SCOPE("Compressed graph operations") { benchmark_degree(compressed_graph); benchmark_incident_edges(compressed_graph); @@ -434,29 +110,6 @@ static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { benchmark_neighbors(compressed_graph); benchmark_pfor_neighbors(compressed_graph); }; - STOP_HEAP_PROFILER(); - - Graph graph_csr(std::make_unique(std::move(graph))); - START_HEAP_PROFILER("Uncompressed underlying graph operations"); - TIMED_SCOPE("Uncompressed underlying graph operations") { - benchmark_degree(graph_csr); - benchmark_incident_edges(graph_csr); - benchmark_adjacent_nodes(graph_csr); - benchmark_neighbors(graph_csr); - benchmark_pfor_neighbors(graph_csr); - }; - STOP_HEAP_PROFILER(); - - Graph graph_compressed(std::make_unique(std::move(compressed_graph))); - START_HEAP_PROFILER("Compressed underlying graph operations"); - TIMED_SCOPE("Compressed underlying graph operations") { - benchmark_degree(graph_compressed); - benchmark_incident_edges(graph_compressed); - benchmark_adjacent_nodes(graph_compressed); - benchmark_neighbors(graph_compressed); - benchmark_pfor_neighbors(graph_compressed); - }; - STOP_HEAP_PROFILER(); } int main(int argc, char *argv[]) { @@ -471,11 +124,6 @@ int main(int argc, char *argv[]) { app.add_option("-t,--threads", num_threads, "Number of threads") ->check(CLI::NonNegativeNumber) ->default_val(num_threads); - app.add_option("-b,--benchmark", enable_benchmarks, "Enable graph operations benchmark") - ->default_val(enable_benchmarks); - app.add_option("-c,--checks", enable_checks, "Enable compressed graph operations check") - ->default_val(enable_checks); - CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, num_threads); @@ -498,22 +146,8 @@ int main(int argc, char *argv[]) { }; STOP_HEAP_PROFILER(); - // Capture graph statistics - std::size_t csr_size = graph.raw_nodes().size() * sizeof(Graph::EdgeID) + - graph.raw_edges().size() * sizeof(Graph::NodeID); - std::size_t compressed_size = compressed_graph.used_memory(); - std::size_t high_degree_count = compressed_graph.high_degree_count(); - std::size_t part_count = compressed_graph.part_count(); - std::size_t interval_count = compressed_graph.interval_count(); - - // Run checks and benchmarks - if (enable_checks) { - run_checks(graph, compressed_graph); - } - - if (enable_benchmarks) { - run_benchmark(std::move(graph), std::move(compressed_graph)); - } + // Run benchmarks + run_benchmark(std::move(graph), std::move(compressed_graph)); STOP_TIMER(); DISABLE_HEAP_PROFILER(); @@ -528,20 +162,6 @@ int main(int argc, char *argv[]) { << ", edge weights: " << (graph.edge_weighted() ? "yes" : "no"); LOG; - LOG << "The uncompressed graph uses " << to_megabytes(csr_size) << " mb (" << csr_size - << " bytes)."; - LOG << "The compressed graph uses " << to_megabytes(compressed_size) << " mb (" << compressed_size - << " bytes)."; - float compression_factor = csr_size / (float)compressed_size; - LOG << "Thats a compression ratio of " << compression_factor << '.'; - LOG; - - LOG << high_degree_count << " (" << (high_degree_count / (float)graph.n()) - << "%) vertices have high degree."; - LOG << part_count << " parts result from splitting the neighborhood of high degree nodes."; - LOG << interval_count << " vertices/parts use interval encoding."; - LOG; - Timer::global().print_human_readable(std::cout); LOG; PRINT_HEAP_PROFILE(std::cout); diff --git a/apps/benchmarks/shm_label_propagation_benchmark.cc b/apps/benchmarks/shm_label_propagation_benchmark.cc index ac8d2481..32bb6cdd 100644 --- a/apps/benchmarks/shm_label_propagation_benchmark.cc +++ b/apps/benchmarks/shm_label_propagation_benchmark.cc @@ -11,10 +11,10 @@ #include -#include "kaminpar-shm/coarsening/lp_clustering.h" +#include "kaminpar-shm/coarsening/clustering/lp_clusterer.h" +#include "kaminpar-shm/coarsening/max_cluster_weights.h" #include "kaminpar-shm/context_io.h" #include "kaminpar-shm/graphutils/permutator.h" -#include "kaminpar-shm/partition_utils.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/logger.h" @@ -74,19 +74,21 @@ int main(int argc, char *argv[]) { graph::remove_isolated_nodes(graph, ctx.partition); } - const NodeWeight max_cluster_weight = - compute_max_cluster_weight(ctx.coarsening, graph, ctx.partition); - - LPClustering lp_clustering(graph.n(), ctx.coarsening); - lp_clustering.set_max_cluster_weight(max_cluster_weight); + LPClustering lp_clustering(ctx.coarsening); + lp_clustering.set_max_cluster_weight(compute_max_cluster_weight( + ctx.coarsening, ctx.partition, graph.n(), graph.total_node_weight() + )); lp_clustering.set_desired_cluster_count(0); GLOBAL_TIMER.reset(); ENABLE_HEAP_PROFILER(); + START_HEAP_PROFILER("Allocation"); + StaticArray clustering(graph.n()); + STOP_HEAP_PROFILER(); START_HEAP_PROFILER("Label Propagation"); TIMED_SCOPE("Label Propagation") { - lp_clustering.compute_clustering(graph, false); + lp_clustering.compute_clustering(clustering, graph, false); }; STOP_HEAP_PROFILER(); DISABLE_HEAP_PROFILER(); diff --git a/apps/benchmarks/shm_variable_length_codec_benchmark.cc b/apps/benchmarks/shm_variable_length_codec_benchmark.cc index 9e9db459..fc5bc1d0 100644 --- a/apps/benchmarks/shm_variable_length_codec_benchmark.cc +++ b/apps/benchmarks/shm_variable_length_codec_benchmark.cc @@ -229,8 +229,8 @@ template void benchmark_rle(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) { SCOPED_TIMER(name); - VarIntRunLengthDecoder decoder(values_ptr); - decoder.decode(count, [](const Int value) { do_not_optimize(value); }); + VarIntRunLengthDecoder decoder(values_ptr, count); + decoder.decode([](const Int value) { do_not_optimize(value); }); } template @@ -238,7 +238,7 @@ void benchmark_sve(std::string_view name, const std::size_t count, const std::ui SCOPED_TIMER(name); VarIntStreamDecoder decoder(values_ptr, count); - decoder.decode(count, [](const Int value) { do_not_optimize(value); }); + decoder.decode([](const Int value) { do_not_optimize(value); }); } template diff --git a/apps/io/parhip_parser.cc b/apps/io/parhip_parser.cc index 06ad71e9..5201971d 100644 --- a/apps/io/parhip_parser.cc +++ b/apps/io/parhip_parser.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include "kaminpar-common/logger.h" diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc index 73f75fcb..18458747 100644 --- a/apps/io/shm_compressed_graph_binary.cc +++ b/apps/io/shm_compressed_graph_binary.cc @@ -14,10 +14,100 @@ namespace kaminpar::shm::io::compressed_binary { +struct CompressedBinaryHeader { + bool has_node_weights; + bool has_edge_weights; + + bool has_64_bit_node_id; + bool has_64_bit_edge_id; + + bool has_64_bit_node_weight; + bool has_64_bit_edge_weight; + + bool use_degree_bucket_order; + + bool use_high_degree_encoding; + bool use_interval_encoding; + bool use_run_length_encoding; + bool use_stream_vbyte_encoding; + bool use_isolated_nodes_separation; + + std::uint64_t high_degree_threshold; + std::uint64_t high_degree_part_length; + std::uint64_t interval_length_threshold; + + std::uint64_t num_nodes; + std::uint64_t num_edges; + std::uint64_t max_degree; + + std::uint64_t num_high_degree_nodes; + std::uint64_t num_high_degree_parts; + std::uint64_t num_interval_nodes; + std::uint64_t num_intervals; +}; + +CompressedBinaryHeader create_header(const CompressedGraph &graph) { + return { + graph.node_weighted(), + graph.edge_weighted(), + + sizeof(CompressedGraph::NodeID) == 8, + sizeof(CompressedGraph::EdgeID) == 8, + + sizeof(CompressedGraph::NodeWeight) == 8, + sizeof(CompressedGraph::EdgeWeight) == 8, + + graph.sorted(), + + CompressedGraph::kHighDegreeEncoding, + CompressedGraph::kIntervalEncoding, + CompressedGraph::kRunLengthEncoding, + CompressedGraph::kStreamEncoding, + CompressedGraph::kIsolatedNodesSeparation, + + CompressedGraph::kHighDegreeThreshold, + CompressedGraph::kHighDegreePartLength, + CompressedGraph::kIntervalLengthTreshold, + + graph.n(), + graph.m(), + graph.max_degree(), + + graph.num_high_degree_nodes(), + graph.num_high_degree_parts(), + graph.num_interval_nodes(), + graph.num_intervals() + }; +} + template static void write_int(std::ofstream &out, const T id) { out.write(reinterpret_cast(&id), sizeof(T)); } +static void write_header(std::ofstream &out, const CompressedBinaryHeader header) { + const std::uint16_t boolean_values = + (header.use_isolated_nodes_separation << 12) | (header.use_stream_vbyte_encoding << 11) | + (header.use_run_length_encoding << 9) | (header.use_interval_encoding << 8) | + (header.use_high_degree_encoding << 7) | (header.use_degree_bucket_order << 6) | + (header.has_64_bit_edge_weight << 5) | (header.has_64_bit_node_weight << 4) | + (header.has_64_bit_edge_id << 3) | (header.has_64_bit_node_id << 2) | + (header.has_edge_weights << 1) | (header.has_node_weights); + write_int(out, boolean_values); + + write_int(out, header.high_degree_threshold); + write_int(out, header.high_degree_part_length); + write_int(out, header.interval_length_threshold); + + write_int(out, header.num_nodes); + write_int(out, header.num_edges); + write_int(out, header.max_degree); + + write_int(out, header.num_high_degree_nodes); + write_int(out, header.num_high_degree_parts); + write_int(out, header.num_interval_nodes); + write_int(out, header.num_intervals); +} + template static void write_compact_static_array(std::ofstream &out, const CompactStaticArray &array) { write_int(out, array.byte_width()); @@ -27,42 +117,18 @@ static void write_compact_static_array(std::ofstream &out, const CompactStaticAr template static void write_static_array(std::ofstream &out, const StaticArray &static_array) { + write_int(out, static_array.size()); out.write(reinterpret_cast(static_array.data()), static_array.size() * sizeof(T)); } void write(const std::string &filename, const CompressedGraph &graph) { std::ofstream out(filename, std::ios::binary); - write_int(out, kMagicNumber); - write_int(out, static_cast(sizeof(CompressedGraph::NodeID))); - write_int(out, static_cast(sizeof(CompressedGraph::EdgeID))); - write_int(out, static_cast(sizeof(CompressedGraph::NodeWeight))); - write_int(out, static_cast(sizeof(CompressedGraph::EdgeWeight))); - - write_int(out, static_cast(CompressedGraph::kHighDegreeEncoding)); - write_int(out, CompressedGraph::kHighDegreeThreshold); - write_int(out, CompressedGraph::kHighDegreePartLength); - write_int(out, static_cast(CompressedGraph::kIntervalEncoding)); - write_int(out, CompressedGraph::kIntervalLengthTreshold); - write_int(out, static_cast(CompressedGraph::kRunLengthEncoding)); - write_int(out, static_cast(CompressedGraph::kStreamEncoding)); - write_int(out, static_cast(CompressedGraph::kIsolatedNodesSeparation)); - - write_int(out, graph.n()); - write_int(out, graph.m()); - write_int(out, graph.max_degree()); - write_int(out, static_cast(graph.sorted())); - write_int(out, static_cast(graph.node_weighted())); - write_int(out, static_cast(graph.edge_weighted())); - - write_int(out, graph.high_degree_count()); - write_int(out, graph.part_count()); - write_int(out, graph.interval_count()); + CompressedBinaryHeader header = create_header(graph); + write_header(out, header); write_compact_static_array(out, graph.raw_nodes()); - - write_int(out, graph.raw_compressed_edges().size()); write_static_array(out, graph.raw_compressed_edges()); if (graph.node_weighted()) { @@ -80,68 +146,78 @@ template static T read_int(std::ifstream &in) { return t; } -template static CompactStaticArray read_compact_static_array(std::ifstream &in) { - std::uint8_t byte_width = read_int(in); - std::size_t allocated_size = read_int(in); - - auto data = std::make_unique(allocated_size); - in.read(reinterpret_cast(data.get()), allocated_size); - return CompactStaticArray(byte_width, allocated_size, std::move(data)); +CompressedBinaryHeader read_header(std::ifstream &in) { + const auto boolean_values = read_int(in); + return { + (boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0, + (boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0, + (boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0, + (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0, + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), + }; } -template -static StaticArray read_static_array(std::ifstream &in, const std::size_t size) { - T *ptr = static_cast(std::malloc(sizeof(T) * size)); - in.read(reinterpret_cast(ptr), sizeof(T) * size); - return StaticArray(size, ptr); -} - -CompressedGraph read(const std::string &filename) { +void verify_header(const CompressedBinaryHeader header) { using NodeID = CompressedGraph::NodeID; using EdgeID = CompressedGraph::EdgeID; using NodeWeight = CompressedGraph::NodeWeight; using EdgeWeight = CompressedGraph::EdgeWeight; - std::ifstream in(filename, std::ios::binary); - - if (kMagicNumber != read_int(in)) { - LOG_ERROR << "The magic number of the file is not correct!"; - std::exit(1); - } - - std::uint8_t stored_node_id_size = read_int(in); - if (stored_node_id_size != sizeof(NodeID)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_node_id_size * 8) - << "-Bit NodeIDs but this build uses " << (sizeof(NodeID) * 8) << "-Bit NodeIDs."; + if (header.has_64_bit_node_id) { + if (sizeof(NodeID) != 8) { + LOG_ERROR << "The stored compressed graph uses 64-Bit node IDs but this build uses 32-Bit " + "node IDs."; + std::exit(1); + } + } else if (sizeof(NodeID) != 4) { + LOG_ERROR + << "The stored compressed graph uses 32-Bit node IDs but this build uses 64-Bit node IDs."; std::exit(1); } - std::uint8_t stored_edge_id_size = read_int(in); - if (stored_edge_id_size != sizeof(EdgeID)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_edge_id_size * 8) - << "-Bit EdgeIDs but this build uses " << (sizeof(EdgeID) * 8) << "-Bit EdgeIDs."; + if (header.has_64_bit_edge_id) { + if (sizeof(EdgeID) != 8) { + LOG_ERROR << "The stored compressed graph uses 64-Bit edge IDs but this build uses 32-Bit " + "edge IDs."; + std::exit(1); + } + } else if (sizeof(EdgeID) != 4) { + LOG_ERROR + << "The stored compressed graph uses 32-Bit edge IDs but this build uses 64-Bit edge IDs."; std::exit(1); } - std::uint8_t stored_node_weight_size = read_int(in); - if (stored_node_weight_size != sizeof(NodeWeight)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_node_weight_size * 8) - << "-Bit NodeWeights but this build uses " << (sizeof(NodeWeight) * 8) - << "-Bit NodeWeights."; + if (header.has_64_bit_node_weight) { + if (sizeof(NodeWeight) != 8) { + LOG_ERROR + << "The stored compressed graph uses 64-Bit node weights but this build uses 32-Bit " + "node weights."; + std::exit(1); + } + } else if (sizeof(NodeWeight) != 4) { + LOG_ERROR << "The stored compressed graph uses 32-Bit node weights but this build uses 64-Bit " + "node weights."; std::exit(1); } - std::uint8_t stored_edge_weight_size = read_int(in); - if (stored_edge_weight_size != sizeof(EdgeWeight)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_edge_weight_size * 8) - << "-Bit EdgeWeights but this build uses " << (sizeof(EdgeWeight) * 8) - << "-Bit EdgeWeights."; + if (header.has_64_bit_edge_weight) { + if (sizeof(EdgeWeight) != 8) { + LOG_ERROR + << "The stored compressed graph uses 64-Bit edge weights but this build uses 32-Bit " + "edge weights."; + std::exit(1); + } + } else if (sizeof(EdgeWeight) != 4) { + LOG_ERROR << "The stored compressed graph uses 32-Bit edge weights but this build uses 64-Bit " + "edge weights."; std::exit(1); } - bool high_degree_encoding = static_cast(read_int(in)); - if (high_degree_encoding != CompressedGraph::kHighDegreeEncoding) { - if (high_degree_encoding) { + if (header.use_high_degree_encoding != CompressedGraph::kHighDegreeEncoding) { + if (header.use_high_degree_encoding) { LOG_ERROR << "The stored compressed graph uses high degree encoding but this build does not."; } else { LOG_ERROR @@ -150,25 +226,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID high_degree_threshold = read_int(in); - if (high_degree_threshold != CompressedGraph::kHighDegreeThreshold) { - LOG_ERROR << "The stored compressed graph uses " << high_degree_threshold - << " as the high degree threshold but this build uses " - << (CompressedGraph::kHighDegreeThreshold) << " as the high degree threshold."; - std::exit(1); - } - - NodeID high_degree_part_length = read_int(in); - if (high_degree_part_length != CompressedGraph::kHighDegreePartLength) { - LOG_ERROR << "The stored compressed graph uses " << high_degree_part_length - << " as the high degree part length but this build uses " - << (CompressedGraph::kHighDegreePartLength) << " as the high degree part length."; - std::exit(1); - } - - bool interval_encoding = static_cast(read_int(in)); - if (interval_encoding != CompressedGraph::kIntervalEncoding) { - if (interval_encoding) { + if (header.use_interval_encoding != CompressedGraph::kIntervalEncoding) { + if (header.use_interval_encoding) { LOG_ERROR << "The stored compressed graph uses interval encoding but this build does not."; } else { LOG_ERROR @@ -177,17 +236,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID interval_length_threshold = read_int(in); - if (interval_length_threshold != CompressedGraph::kIntervalLengthTreshold) { - LOG_ERROR << "The stored compressed graph uses " << interval_length_threshold - << " as the interval length threshold but this build uses " - << (CompressedGraph::kIntervalLengthTreshold) << " as the interval length threshold."; - std::exit(1); - } - - bool run_length_encoding = static_cast(read_int(in)); - if (run_length_encoding != CompressedGraph::kRunLengthEncoding) { - if (run_length_encoding) { + if (header.use_run_length_encoding != CompressedGraph::kRunLengthEncoding) { + if (header.use_run_length_encoding) { LOG_ERROR << "The stored compressed graph uses run-length encoding but this build does not."; } else { LOG_ERROR @@ -196,9 +246,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - bool stream_encoding = static_cast(read_int(in)); - if (stream_encoding != CompressedGraph::kStreamEncoding) { - if (stream_encoding) { + if (header.use_stream_vbyte_encoding != CompressedGraph::kStreamEncoding) { + if (header.use_stream_vbyte_encoding) { LOG_ERROR << "The stored compressed graph uses stream encoding but this build does not."; } else { LOG_ERROR << "The stored compressed graph does not use stream encoding but this build does."; @@ -206,9 +255,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - bool isolated_nodes_separation = static_cast(read_int(in)); - if (isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) { - if (isolated_nodes_separation) { + if (header.use_isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) { + if (header.use_isolated_nodes_separation) { LOG_ERROR << "The stored compressed graph uses isolated nodes separation but this build does not."; } else { @@ -218,40 +266,74 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID n = read_int(in); - EdgeID m = read_int(in); - NodeID max_degree = read_int(in); - bool sorted = static_cast(read_int(in)); - bool is_node_weighted = static_cast(read_int(in)); - bool is_edge_weighted = static_cast(read_int(in)); + if (header.high_degree_threshold != CompressedGraph::kHighDegreeThreshold) { + LOG_ERROR << "The stored compressed graph uses " << header.high_degree_threshold + << " as the high degree threshold but this build uses " + << (CompressedGraph::kHighDegreeThreshold) << " as the high degree threshold."; + std::exit(1); + } - std::size_t high_degree_count = read_int(in); - std::size_t part_count = read_int(in); - std::size_t interval_count = read_int(in); + if (header.high_degree_part_length != CompressedGraph::kHighDegreePartLength) { + LOG_ERROR << "The stored compressed graph uses " << header.high_degree_part_length + << " as the high degree part length but this build uses " + << (CompressedGraph::kHighDegreePartLength) << " as the high degree part length."; + std::exit(1); + } - CompactStaticArray nodes = read_compact_static_array(in); + if (header.interval_length_threshold != CompressedGraph::kIntervalLengthTreshold) { + LOG_ERROR << "The stored compressed graph uses " << header.interval_length_threshold + << " as the interval length threshold but this build uses " + << (CompressedGraph::kIntervalLengthTreshold) << " as the interval length threshold."; + std::exit(1); + } +} - std::size_t compressed_edges_size = read_int(in); - StaticArray compressed_edges = - read_static_array(in, compressed_edges_size); - StaticArray node_weights = +template static CompactStaticArray read_compact_static_array(std::ifstream &in) { + const auto byte_width = read_int(in); + const auto allocated_size = read_int(in); - is_node_weighted ? read_static_array(in, n) : StaticArray(); + auto data = std::make_unique(allocated_size); + in.read(reinterpret_cast(data.get()), allocated_size); + return CompactStaticArray(byte_width, allocated_size, std::move(data)); +} + +template static StaticArray read_static_array(std::ifstream &in) { + const auto size = read_int(in); + T *ptr = static_cast(std::malloc(sizeof(T) * size)); + in.read(reinterpret_cast(ptr), sizeof(T) * size); + return StaticArray(size, ptr); +} + +CompressedGraph read(const std::string &filename) { + std::ifstream in(filename, std::ios::binary); + if (kMagicNumber != read_int(in)) { + LOG_ERROR << "The magic number of the file is not correct!"; + std::exit(1); + } + + CompressedBinaryHeader header = read_header(in); + verify_header(header); + CompactStaticArray nodes = read_compact_static_array(in); + StaticArray compressed_edges = read_static_array(in); + + StaticArray node_weights = + header.has_node_weights ? read_static_array(in) : StaticArray(); StaticArray edge_weights = - is_edge_weighted ? read_static_array(in, m) : StaticArray(); + header.has_edge_weights ? read_static_array(in) : StaticArray(); return CompressedGraph( std::move(nodes), std::move(compressed_edges), std::move(node_weights), std::move(edge_weights), - m, - max_degree, - sorted, - high_degree_count, - part_count, - interval_count + header.num_edges, + header.max_degree, + header.use_degree_bucket_order, + header.num_high_degree_nodes, + header.num_high_degree_parts, + header.num_interval_nodes, + header.num_intervals ); } diff --git a/apps/io/shm_compressed_graph_binary.h b/apps/io/shm_compressed_graph_binary.h index 0362e3d2..ef6567a3 100644 --- a/apps/io/shm_compressed_graph_binary.h +++ b/apps/io/shm_compressed_graph_binary.h @@ -17,7 +17,7 @@ namespace kaminpar::shm::io::compressed_binary { constexpr std::uint64_t kMagicNumber = 0x434F4D5052455353; /*! - * Writes a graph to a file as a compressed graph binary. + * Writes a compressed graph to a file in binary format. * * @param filename The name of the file to write to. * @param graph The compressed graph to write. @@ -25,7 +25,7 @@ constexpr std::uint64_t kMagicNumber = 0x434F4D5052455353; void write(const std::string &filename, const CompressedGraph &graph); /*! - * Reads the graph from a compressed graph binary file. If the paramters of the compressed graph + * Reads a compressed graph from a file with binary format. If the paramters of the compressed graph * stored in the file do not match with this build, exit is called. * * @param filename The name of the file to read from. @@ -34,7 +34,7 @@ void write(const std::string &filename, const CompressedGraph &graph); CompressedGraph read(const std::string &filename); /*! - * Checks whether a graph is stored in compressed format. + * Checks whether a graph is stored in compressed binary format. * * @param filename The name of the file to check. * @return Whether the graph is stored in compressed format. diff --git a/apps/tools/shm_graph_compression_tool.cc b/apps/tools/shm_graph_compression_tool.cc index 7db47575..4163611d 100644 --- a/apps/tools/shm_graph_compression_tool.cc +++ b/apps/tools/shm_graph_compression_tool.cc @@ -23,12 +23,18 @@ int main(int argc, char *argv[]) { // Parse CLI arguments std::string graph_filename; std::string compressed_graph_filename; + io::GraphFileFormat graph_file_format = io::GraphFileFormat::METIS; int num_threads = 1; CLI::App app("Shared-memory graph compression tool"); app.add_option("-G,--graph", graph_filename, "Input graph in METIS format")->required(); app.add_option("--out", compressed_graph_filename, "Ouput file for saving the compressed graph") ->required(); + app.add_option("-f,--graph-file-format", graph_file_format) + ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) + ->description(R"(Graph file formats: + - metis + - parhip)"); app.add_option("-t,--threads", num_threads, "Number of threads"); CLI11_PARSE(app, argc, argv); diff --git a/apps/tools/shm_graph_properties_tool.cc b/apps/tools/shm_graph_properties_tool.cc index 35c2e82a..0872808a 100644 --- a/apps/tools/shm_graph_properties_tool.cc +++ b/apps/tools/shm_graph_properties_tool.cc @@ -85,8 +85,7 @@ int main(int argc, char *argv[]) { ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) ->description(R"(Graph file formats: - metis - - parhip)") - ->capture_default_str(); + - parhip)"); create_graph_compression_options(&app, ctx); CLI11_PARSE(app, argc, argv); diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index 1f67d8e4..e6627aab 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -206,10 +206,17 @@ CLI::Option_group *create_lp_coarsening_options(CLI::App *app, Context &ctx) { ->capture_default_str(); lp->add_option( - "--c-lp-use-two-level-cluster-weight-vector", - ctx.coarsening.clustering.lp.use_two_level_cluster_weight_vector, - "Whether to use the two level cluster weight vector" + "--c-lp-cluster-weights-struct", ctx.coarsening.clustering.lp.cluster_weights_structure ) + ->transform(CLI::CheckedTransformer(get_cluster_weight_structures()).description("")) + ->description( + R"(Determines the data structure for storing the cluster weights. +Options are: + - vec: Uses a fixed-width vector + - two-level-vec: Uses a two-level vector + - initially-small-vec: Uses a small fixed-width vector initially and switches to a bigger fixed-width vector after relabeling (Requires two-phase lp with relabeling) + )" + ) ->capture_default_str(); lp->add_option( @@ -220,24 +227,26 @@ CLI::Option_group *create_lp_coarsening_options(CLI::App *app, Context &ctx) { ) ->capture_default_str(); lp->add_option( - "--c-lp-second-phase-select-mode", ctx.coarsening.clustering.lp.second_phase_select_mode + "--c-lp-second-phase-selection-strategy", + ctx.coarsening.clustering.lp.second_phase_selection_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_select_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_selection_strategies()).description("")) ->description( - R"(Determines the mode for selecting nodes for the second phase of label propagation. + R"(Determines the strategy for selecting nodes for the second phase of label propagation. Options are: - high-degree: Select nodes with high degree - - full-rating-map: Select nodes which have a full rating map in the first phase + - full-rating-map: Select nodes that have a full rating map in the first phase )" ) ->capture_default_str(); lp->add_option( - "--c-lp-second-phase-aggregation-mode", - ctx.coarsening.clustering.lp.second_phase_aggregation_mode + "--c-lp-second-phase-aggregation-strategy", + ctx.coarsening.clustering.lp.second_phase_aggregation_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_strategies()).description("") + ) ->description( - R"(Determines the mode for aggregating ratings in the second phase of label propagation. + R"(Determines the strategy for aggregating ratings in the second phase of label propagation. Options are: - none: Skip the second phase - direct: Write the ratings directly into the global vector (shared between threads) @@ -295,9 +304,10 @@ CLI::Option_group *create_contraction_coarsening_options(CLI::App *app, Context ->transform(CLI::CheckedTransformer(get_contraction_modes()).description("")) ->description(R"(The mode useed for contraction. Options are: - - edge-buffer: Use an edge buffer to store edges temporarily - - no-edge-buffer-naive: Use no edge buffer by computing the neighborhood of each coarse node twice - - no-edge-buffer-remap: Use no edge buffer by remapping the coarse nodes afterwards + - buffered: Use an edge buffer that is partially filled + - buffered-legacy: Use an edge buffer + - unbuffered: Use no edge buffer by remapping the coarse nodes + - unbuffered-naive: Use no edge buffer by computing twice )") ->capture_default_str(); contraction @@ -379,22 +389,26 @@ CLI::Option_group *create_lp_refinement_options(CLI::App *app, Context &ctx) { "treated separately" ) ->capture_default_str(); - lp->add_option("--r-lp-second-phase-select-mode", ctx.refinement.lp.second_phase_select_mode) - ->transform(CLI::CheckedTransformer(get_second_phase_select_modes()).description("")) + lp->add_option( + "--r-lp-second-phase-selection-strategy", ctx.refinement.lp.second_phase_selection_strategy + ) + ->transform(CLI::CheckedTransformer(get_second_phase_selection_strategies()).description("")) ->description( - R"(Determines the mode for selecting nodes for the second phase of label propagation. + R"(Determines the strategy for selecting nodes for the second phase of label propagation. Options are: - high-degree: Select nodes with high degree - - full-rating-map: Select nodes which have a full rating map in the first phase + - full-rating-map: Select nodes that have a full rating map in the first phase )" ) ->capture_default_str(); lp->add_option( - "--r-lp-second-phase-aggregation-mode", ctx.refinement.lp.second_phase_aggregation_mode + "--r-lp-second-phase-aggregation-strategy", + ctx.refinement.lp.second_phase_aggregation_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_strategies()).description("") + ) ->description( - R"(Determines the mode for aggregating ratings in the second phase of label propagation. + R"(Determines the strategy for aggregating ratings in the second phase of label propagation. Options are: - none: Skip the second phase - direct: Write the ratings directly into the global vector (shared between threads) diff --git a/kaminpar-common/constexpr_utils.h b/kaminpar-common/constexpr_utils.h index e0c58fc3..9a43211a 100644 --- a/kaminpar-common/constexpr_utils.h +++ b/kaminpar-common/constexpr_utils.h @@ -13,7 +13,7 @@ namespace kaminpar { /*! - * Invokes a function either directly or indirectly depending on a lambda. + * Invokes a function either directly or indirectly. * * @tparam direct Whether to call the function directly. * @tparam Lambda The type of the lambda to pass to the function. @@ -22,14 +22,35 @@ namespace kaminpar { * @param fun The function to invoke. */ template -constexpr void invoke_maybe_indirect(Lambda &&l, Function &&fun) { +constexpr void invoke_indirect(Lambda &&l, Function &&fun) { if constexpr (direct) { - fun(std::forward(l)); + return fun(std::forward(l)); } else { l([&](auto &&l2) { fun(std::forward(l2)); }); } } +/*! + * Invokes a function either directly or indirectly and returns its return value. + * + * @tparam direct Whether to call the function directly. + * @tparam Value The type of the return value of the function. + * @tparam Lambda The type of the lambda to pass to the function. + * @tparam Function The type of the function to invoke. + * @param l The lambda to pass to the function. + * @param fun The function to invoke. + */ +template +constexpr Value invoke_indirect2(Lambda &&l, Function &&fun) { + if constexpr (direct) { + return fun(std::forward(l)); + } else { + Value val; + l([&](auto &&l2) { val = fun(std::forward(l2)); }); + return val; + } +} + // Utility functions for constexpr loops based on https://stackoverflow.com/a/47563100 template struct Number { static const constexpr auto value = N; diff --git a/kaminpar-common/datastructures/concurrent_two_level_vector.h b/kaminpar-common/datastructures/concurrent_two_level_vector.h index 056f2c9f..63d12a50 100644 --- a/kaminpar-common/datastructures/concurrent_two_level_vector.h +++ b/kaminpar-common/datastructures/concurrent_two_level_vector.h @@ -114,7 +114,7 @@ class ConcurrentTwoLevelVector { tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { for (Size pos = r.begin(); pos != r.end(); ++pos) { - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value == kMaxFirstValue) { Size new_pos = mapping[pos] - 1; @@ -143,7 +143,7 @@ class ConcurrentTwoLevelVector { [[nodiscard]] Value operator[](const Size pos) { KASSERT(pos < _values.size()); - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value < kMaxFirstValue) { return value; } @@ -167,7 +167,7 @@ class ConcurrentTwoLevelVector { KASSERT(pos < _values.size()); if (value < kMaxFirstValue) { - _values[pos] = value; + _values[pos] = static_cast(value); } else { _values[pos] = kMaxFirstValue; _table.get_handle().insert(pos, value); @@ -196,7 +196,12 @@ class ConcurrentTwoLevelVector { const Value new_value = static_cast(value) + delta; if (new_value < kMaxFirstValue) { success = __atomic_compare_exchange_n( - &_values[pos], &value, new_value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(new_value), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } else { success = __atomic_compare_exchange_n( @@ -234,7 +239,12 @@ class ConcurrentTwoLevelVector { } success = __atomic_compare_exchange_n( - &_values[pos], &value, value - delta, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(value - delta), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } while (!success); } @@ -327,7 +337,7 @@ class ConcurrentTwoLevelVector { tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { for (Size pos = r.begin(); pos != r.end(); ++pos) { - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value == kMaxFirstValue) { Size new_pos = mapping[pos] - 1; @@ -363,7 +373,7 @@ class ConcurrentTwoLevelVector { [[nodiscard]] Value operator[](const Size pos) { KASSERT(pos < _values.size()); - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value < kMaxFirstValue) { return value; } @@ -387,7 +397,7 @@ class ConcurrentTwoLevelVector { KASSERT(pos < _values.size()); if (value < kMaxFirstValue) { - _values[pos] = value; + _values[pos] = static_cast(value); } else { _values[pos] = kMaxFirstValue; @@ -423,7 +433,12 @@ class ConcurrentTwoLevelVector { const Value new_value = static_cast(value) + delta; if (new_value < kMaxFirstValue) { success = __atomic_compare_exchange_n( - &_values[pos], &value, new_value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(new_value), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } else { success = __atomic_compare_exchange_n( @@ -469,7 +484,12 @@ class ConcurrentTwoLevelVector { } success = __atomic_compare_exchange_n( - &_values[pos], &value, value - delta, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(value - delta), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } while (!success); } diff --git a/kaminpar-common/varint_run_length_codec.h b/kaminpar-common/varint_run_length_codec.h index 6120bfb8..8e545fe1 100644 --- a/kaminpar-common/varint_run_length_codec.h +++ b/kaminpar-common/varint_run_length_codec.h @@ -114,31 +114,30 @@ template class VarIntRunLengthDecoder { * Constructs a new VarIntRunLengthDecoder. * * @param ptr The pointer to the memory location where the encoded integers are stored. + * @param count The number of integers that are encoded. */ - VarIntRunLengthDecoder(const std::uint8_t *ptr) : _ptr(ptr) {} + VarIntRunLengthDecoder(const std::uint8_t *ptr, const std::size_t count) + : _ptr(ptr), + _count(count) {} /*! * Decodes the encoded integers. * - * @param max_decoded The amount of integers to decode. * @param l The function to be called with the decoded integers, i.e. the function has one * parameter of type Int. */ - template void decode(const std::size_t max_decoded, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + template void decode(Lambda &&l) { + constexpr bool non_stoppable = std::is_void_v>; std::size_t decoded = 0; - while (decoded < max_decoded) { + while (decoded < _count) { const std::uint8_t run_header = *_ptr++; if constexpr (sizeof(Int) == 4) { - std::uint8_t run_length = (run_header >> 2) + 1; + const std::uint8_t run_length = (run_header >> 2) + 1; const std::uint8_t run_size = (run_header & 0b00000011) + 1; decoded += run_length; - if (decoded > max_decoded) { - run_length -= decoded - max_decoded; - } if constexpr (non_stoppable) { decode32(run_length, run_size, std::forward(l)); @@ -149,13 +148,10 @@ template class VarIntRunLengthDecoder { } } } else if constexpr (sizeof(Int) == 8) { - std::uint8_t run_length = (run_header >> 3) + 1; + const std::uint8_t run_length = (run_header >> 3) + 1; const std::uint8_t run_size = (run_header & 0b00000111) + 1; decoded += run_length; - if (decoded > max_decoded) { - run_length -= decoded - max_decoded; - } if constexpr (non_stoppable) { decode64(run_length, run_size, std::forward(l)); @@ -171,10 +167,11 @@ template class VarIntRunLengthDecoder { private: const std::uint8_t *_ptr; + const std::size_t _count; template bool decode32(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + constexpr bool non_stoppable = std::is_void_v>; switch (run_size) { case 1: @@ -246,7 +243,7 @@ template class VarIntRunLengthDecoder { template bool decode64(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + constexpr bool non_stoppable = std::is_void_v>; switch (run_size) { case 1: diff --git a/kaminpar-common/varint_stream_codec.h b/kaminpar-common/varint_stream_codec.h index f6db0742..23712e60 100644 --- a/kaminpar-common/varint_stream_codec.h +++ b/kaminpar-common/varint_stream_codec.h @@ -164,9 +164,9 @@ template class VarIntStreamDecoder { return shuffle_table; } - static const constexpr std::array kLengthTable = create_length_table(); + static constexpr const std::array kLengthTable = create_length_table(); - static const constexpr std::array, 256> kShuffleTable = + static constexpr const std::array, 256> kShuffleTable = create_shuffle_table(); public: @@ -185,18 +185,13 @@ template class VarIntStreamDecoder { /*! * Decodes the encoded integers. * - * @param max_count The amount of integers to decode, it has to be less then the amount of - * integers stored that are stored. * @param l The function to be called with the decoded integers, i.e. the function has one * parameter of type Int. */ - template void decode(const std::size_t max_count, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + template void decode(Lambda &&l) { + constexpr bool non_stoppable = std::is_void_v>; - // max_count = std::min(max_count, _count); - - const std::size_t control_bytes = max_count / 4; - for (std::size_t i = 0; i < control_bytes; ++i) { + for (std::size_t i = 0; i < _control_bytes; ++i) { const std::uint8_t control_byte = _control_bytes_ptr[i]; const std::uint8_t length = kLengthTable[control_byte]; @@ -230,9 +225,9 @@ template class VarIntStreamDecoder { } } - switch (max_count % 4) { + switch (_count % 4) { case 1: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); @@ -248,7 +243,7 @@ template class VarIntStreamDecoder { break; } case 2: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); @@ -269,7 +264,7 @@ template class VarIntStreamDecoder { break; } case 3: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); diff --git a/kaminpar-shm/coarsening/cluster_coarsener.cc b/kaminpar-shm/coarsening/cluster_coarsener.cc index d512568c..42f534fe 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.cc +++ b/kaminpar-shm/coarsening/cluster_coarsener.cc @@ -31,11 +31,9 @@ bool ClusteringCoarsener::coarsen() { SCOPED_HEAP_PROFILER("Level", std::to_string(_hierarchy.size())); SCOPED_TIMER("Level", std::to_string(_hierarchy.size())); - if (_clustering.size() < current().n()) { - SCOPED_HEAP_PROFILER("Allocation"); - SCOPED_TIMER("Allocation"); - _clustering.resize(current().n()); - } + START_HEAP_PROFILER("Allocation"); + RECORD("clustering") StaticArray clustering(current().n(), static_array::noinit); + STOP_HEAP_PROFILER(); const bool free_allocated_memory = !keep_allocated_memory(); const NodeWeight total_node_weight = current().total_node_weight(); @@ -47,13 +45,15 @@ bool ClusteringCoarsener::coarsen() { compute_max_cluster_weight(_c_ctx, _p_ctx, prev_n, total_node_weight) ); _clustering_algorithm->set_desired_cluster_count(0); - _clustering_algorithm->compute_clustering(_clustering, current(), free_allocated_memory); + _clustering_algorithm->compute_clustering(clustering, current(), free_allocated_memory); STOP_TIMER(); STOP_HEAP_PROFILER(); START_HEAP_PROFILER("Contract graph"); auto coarsened = TIMED_SCOPE("Contract graph") { - return contract_clustering(current(), _clustering, _c_ctx.contraction, _contraction_m_ctx); + return contract_clustering( + current(), std::move(clustering), _c_ctx.contraction, _contraction_m_ctx + ); }; _hierarchy.push_back(std::move(coarsened)); STOP_HEAP_PROFILER(); diff --git a/kaminpar-shm/coarsening/cluster_coarsener.h b/kaminpar-shm/coarsening/cluster_coarsener.h index 833cca35..6f443a02 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.h +++ b/kaminpar-shm/coarsening/cluster_coarsener.h @@ -49,7 +49,6 @@ class ClusteringCoarsener : public Coarsener { const Graph *_input_graph; std::vector> _hierarchy; - StaticArray _clustering{}; std::unique_ptr _clustering_algorithm; contraction::MemoryContext _contraction_m_ctx{}; diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index bdbf8095..7f84b4a6 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -41,13 +41,13 @@ class LPClusteringImpl final LPClusteringImpl(const CoarseningContext &c_ctx, Permutations &permutations) : Base(permutations), - ClusterWeightBase(c_ctx.clustering.lp.use_two_level_cluster_weight_vector), + ClusterWeightBase(c_ctx.clustering.lp.cluster_weights_structure), _lp_ctx(c_ctx.clustering.lp) { Base::set_max_degree(_lp_ctx.large_degree_threshold); Base::set_max_num_neighbors(_lp_ctx.max_num_neighbors); Base::set_use_two_phases(_lp_ctx.use_two_phases); - Base::set_second_phase_select_mode(_lp_ctx.second_phase_select_mode); - Base::set_second_phase_aggregation_mode(_lp_ctx.second_phase_aggregation_mode); + Base::set_second_phase_selection_strategy(_lp_ctx.second_phase_selection_strategy); + Base::set_second_phase_aggregation_strategy(_lp_ctx.second_phase_aggregation_strategy); Base::set_relabel_before_second_phase(_lp_ctx.relabel_before_second_phase); } @@ -323,6 +323,11 @@ class LPClusteringImplWrapper { _csr_core->set_relabel_before_second_phase(false); _compact_csr_core->set_relabel_before_second_phase(false); _compressed_core->set_relabel_before_second_phase(false); + + // Only use the initially small cluster weight vector for the first lp implementation + _csr_core->set_use_small_vector_initially(false); + _compact_csr_core->set_use_small_vector_initially(false); + _compressed_core->set_use_small_vector_initially(false); } private: diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc index 41f6f5b3..f56f7528 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc @@ -277,18 +277,18 @@ std::unique_ptr contract_clustering_buffered( std::unique_ptr contract_clustering_buffered( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ) { if (con_ctx.use_compact_mapping) { - auto [c_n, mapping] = compute_mapping(graph, clustering, m_ctx); + auto [c_n, mapping] = compute_mapping(graph, std::move(clustering), m_ctx); fill_cluster_buckets(c_n, graph, mapping, m_ctx.buckets_index, m_ctx.buckets); return graph.reified([&](auto &graph) { return contract_clustering_buffered(graph, c_n, std::move(mapping), con_ctx, m_ctx); }); } else { - auto [c_n, mapping] = compute_mapping(graph, clustering, m_ctx); + auto [c_n, mapping] = compute_mapping(graph, std::move(clustering), m_ctx); fill_cluster_buckets(c_n, graph, mapping, m_ctx.buckets_index, m_ctx.buckets); return graph.reified([&](auto &graph) { return contract_clustering_buffered(graph, c_n, std::move(mapping), con_ctx, m_ctx); diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h index 78937eb3..a41da0ce 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h @@ -18,7 +18,7 @@ namespace kaminpar::shm::contraction { std::unique_ptr contract_clustering_buffered( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/cluster_contraction.cc index 5b68379d..50d6f050 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction.cc @@ -25,27 +25,27 @@ namespace kaminpar::shm { using namespace contraction; std::unique_ptr contract_clustering( - const Graph &graph, StaticArray &clustering, const ContractionCoarseningContext &con_ctx + const Graph &graph, StaticArray clustering, const ContractionCoarseningContext &con_ctx ) { MemoryContext m_ctx; - return contract_clustering(graph, clustering, con_ctx, m_ctx); + return contract_clustering(graph, std::move(clustering), con_ctx, m_ctx); } std::unique_ptr contract_clustering( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ) { switch (con_ctx.mode) { case ContractionMode::BUFFERED: - return contract_clustering_buffered(graph, clustering, con_ctx, m_ctx); + return contract_clustering_buffered(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::BUFFERED_LEGACY: - return contract_clustering_buffered_legacy(graph, clustering, con_ctx, m_ctx); + return contract_clustering_buffered_legacy(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::UNBUFFERED: - return contract_clustering_unbuffered(graph, clustering, con_ctx, m_ctx); + return contract_clustering_unbuffered(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::UNBUFFERED_NAIVE: - return contract_clustering_unbuffered_naive(graph, clustering, con_ctx, m_ctx); + return contract_clustering_unbuffered_naive(graph, std::move(clustering), con_ctx, m_ctx); } __builtin_unreachable(); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction.h b/kaminpar-shm/coarsening/contraction/cluster_contraction.h index b119785c..65af9328 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction.h +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction.h @@ -41,12 +41,12 @@ struct MemoryContext { } // namespace contraction std::unique_ptr contract_clustering( - const Graph &graph, StaticArray &clustering, const ContractionCoarseningContext &con_ctx + const Graph &graph, StaticArray clustering, const ContractionCoarseningContext &con_ctx ); std::unique_ptr contract_clustering( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, contraction::MemoryContext &m_ctx ); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc index a5fbec31..b32ba773 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc @@ -21,50 +21,40 @@ namespace kaminpar::shm::contraction { void fill_leader_mapping( const Graph &graph, const StaticArray &clustering, StaticArray &leader_mapping ) { - START_TIMER("Allocation"); - if (leader_mapping.size() < graph.n()) { - leader_mapping.resize(graph.n()); - } - STOP_TIMER(); - - RECORD("leader_mapping"); - RECORD_LOCAL_DATA_STRUCT("StaticArray", leader_mapping.size() * sizeof(NodeID)); + TIMED_SCOPE("Allocation") { + if (leader_mapping.size() < graph.n()) { + RECORD("leader_mapping") leader_mapping.resize(graph.n(), static_array::noinit); + RECORD_LOCAL_DATA_STRUCT("StaticArray", leader_mapping.size() * sizeof(NodeID)); + } + }; - START_TIMER("Preprocessing"); - graph.pfor_nodes([&](const NodeID u) { leader_mapping[u] = 0; }); - graph.pfor_nodes([&](const NodeID u) { - __atomic_store_n(&leader_mapping[clustering[u]], 1, __ATOMIC_RELAXED); - }); - parallel::prefix_sum( - leader_mapping.begin(), leader_mapping.begin() + graph.n(), leader_mapping.begin() - ); - STOP_TIMER(); + TIMED_SCOPE("Preprocessing") { + graph.pfor_nodes([&](const NodeID u) { leader_mapping[u] = 0; }); + graph.pfor_nodes([&](const NodeID u) { + __atomic_store_n(&leader_mapping[clustering[u]], 1, __ATOMIC_RELAXED); + }); + parallel::prefix_sum( + leader_mapping.begin(), leader_mapping.begin() + graph.n(), leader_mapping.begin() + ); + }; } template <> StaticArray compute_mapping( - const Graph &graph, - const StaticArray &clustering, - const StaticArray &leader_mapping + const Graph &graph, StaticArray clustering, const StaticArray &leader_mapping ) { - START_TIMER("Allocation"); - RECORD("mapping") StaticArray mapping(graph.n()); - STOP_TIMER(); - START_TIMER("Preprocessing"); graph.pfor_nodes([&](const NodeID u) { - mapping[u] = __atomic_load_n(&leader_mapping[clustering[u]], __ATOMIC_RELAXED) - 1; + clustering[u] = __atomic_load_n(&leader_mapping[clustering[u]], __ATOMIC_RELAXED) - 1; }); STOP_TIMER(); - return mapping; + return std::move(clustering); } template <> CompactStaticArray compute_mapping( - const Graph &graph, - const StaticArray &clustering, - const StaticArray &leader_mapping + const Graph &graph, StaticArray clustering, const StaticArray &leader_mapping ) { const NodeID c_n = leader_mapping[graph.n() - 1]; @@ -83,25 +73,27 @@ CompactStaticArray compute_mapping( template