From c8ace57fe6d4ab3d0735d594cceeacbee09f0eae Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Tue, 11 Jun 2024 13:18:28 +0200 Subject: [PATCH 01/54] feat(kaminpar-dist): add graph compression --- apps/CMakeLists.txt | 4 +- apps/benchmarks/dist_contraction_benchmark.cc | 2 +- apps/dKaMinPar.cc | 22 +- apps/io/dist_parhip_parser.cc | 291 ++++++++ apps/io/dist_parhip_parser.h | 27 + kaminpar-cli/dkaminpar_arguments.cc | 11 + kaminpar-cli/dkaminpar_arguments.h | 2 + .../graph-compression/compressed_edges.h | 391 +++++++++++ .../compressed_edges_builder.h | 436 ++++++++++++ kaminpar-dist/algorithms/border_nodes.cc | 9 +- .../algorithms/greedy_node_coloring.cc | 27 +- .../algorithms/greedy_node_coloring.h | 188 +++++- kaminpar-dist/algorithms/independent_set.cc | 30 +- .../clustering/hem/hem_clusterer.cc | 16 +- .../clustering/lp/global_lp_clusterer.cc | 14 +- .../contraction/global_cluster_contraction.cc | 42 +- .../contraction/local_cluster_contraction.cc | 9 +- .../abstract_distributed_graph.h | 137 ++++ .../distributed_compressed_graph.cc | 154 +++++ .../distributed_compressed_graph.h | 555 +++++++++++++++ .../distributed_compressed_graph_builder.cc | 157 +++++ .../distributed_compressed_graph_builder.h | 70 ++ .../datastructures/distributed_csr_graph.cc | 163 +++++ .../datastructures/distributed_csr_graph.h | 632 ++++++++++++++++++ .../datastructures/distributed_graph.cc | 170 +---- .../datastructures/distributed_graph.h | 592 +++++++--------- .../distributed_partitioned_graph.h | 21 +- .../datastructures/ghost_node_mapper.h | 2 +- kaminpar-dist/debug.cc | 5 +- kaminpar-dist/distributed_label_propagation.h | 23 +- kaminpar-dist/dkaminpar.cc | 28 +- kaminpar-dist/dkaminpar.h | 7 + kaminpar-dist/graphutils/bfs_extractor.cc | 42 +- kaminpar-dist/graphutils/communication.h | 112 ++-- kaminpar-dist/graphutils/rearrangement.cc | 17 +- kaminpar-dist/graphutils/rearrangement.h | 12 +- kaminpar-dist/graphutils/replicator.cc | 39 +- .../graphutils/subgraph_extractor.cc | 12 +- kaminpar-dist/graphutils/synchronization.cc | 23 - kaminpar-dist/graphutils/synchronization.h | 20 +- .../mtkahypar_initial_partitioner.cc | 10 +- kaminpar-dist/metrics.cc | 4 +- .../refinement/adapters/mtkahypar_refiner.cc | 10 +- .../refinement/balancer/cluster_balancer.cc | 6 +- kaminpar-dist/refinement/balancer/clusters.cc | 32 +- kaminpar-dist/refinement/balancer/clusters.h | 8 +- .../refinement/balancer/node_balancer.cc | 17 +- kaminpar-dist/refinement/gain_calculator.h | 4 +- kaminpar-dist/refinement/jet/jet_refiner.cc | 4 +- kaminpar-dist/refinement/lp/clp_refiner.cc | 12 +- tests/CMakeLists.txt | 6 +- .../algorithms/greedy_node_coloring_test.cc | 8 +- tests/dist/algorithms/independent_set_test.cc | 4 +- .../distributed_compressed_graph_test.cc | 210 ++++++ tests/dist/distributed_graph_builder.h | 8 +- tests/dist/distributed_graph_factories.h | 127 +++- tests/dist/distributed_graph_helpers.h | 56 +- tests/dist/graphutils/block_extractor_test.cc | 22 +- tests/dist/graphutils/rearrangement_test.cc | 2 +- 59 files changed, 4206 insertions(+), 858 deletions(-) create mode 100644 apps/io/dist_parhip_parser.cc create mode 100644 apps/io/dist_parhip_parser.h create mode 100644 kaminpar-common/graph-compression/compressed_edges.h create mode 100644 kaminpar-common/graph-compression/compressed_edges_builder.h create mode 100644 kaminpar-dist/datastructures/abstract_distributed_graph.h create mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph.cc create mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph.h create mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc create mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph_builder.h create mode 100644 kaminpar-dist/datastructures/distributed_csr_graph.cc create mode 100644 kaminpar-dist/datastructures/distributed_csr_graph.h create mode 100644 tests/dist/datastructures/distributed_compressed_graph_test.cc diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index bb829f39..b3be4645 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -39,7 +39,9 @@ add_shm_app(KaMinPar KaMinPar.cc) if (TARGET kaminpar_dist) add_dist_app(dKaMinPar dKaMinPar.cc) target_sources(dKaMinPar PRIVATE - ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_io.cc) + ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_io.cc + ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_parhip_parser.h + ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_parhip_parser.cc) target_link_libraries(dKaMinPar PRIVATE KaGen::KaGen) endif () diff --git a/apps/benchmarks/dist_contraction_benchmark.cc b/apps/benchmarks/dist_contraction_benchmark.cc index d3d129eb..fd0cb70b 100644 --- a/apps/benchmarks/dist_contraction_benchmark.cc +++ b/apps/benchmarks/dist_contraction_benchmark.cc @@ -14,7 +14,7 @@ #include #include -#include "kaminpar-dist/coarsening/contraction/cluster_contraction.h" +#include "kaminpar-dist/coarsening/contraction/global_cluster_contraction.h" #include "kaminpar-dist/context.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/factories.h" diff --git a/apps/dKaMinPar.cc b/apps/dKaMinPar.cc index b68882a7..3e9c1735 100644 --- a/apps/dKaMinPar.cc +++ b/apps/dKaMinPar.cc @@ -17,6 +17,7 @@ #include "kaminpar-common/environment.h" #include "apps/io/dist_io.h" +#include "apps/io/dist_parhip_parser.h" using namespace kaminpar; using namespace kaminpar::dist; @@ -174,6 +175,17 @@ NodeID load_kagen_graph(const ApplicationContext &app, dKaMinPar &partitioner) { return graph.vertex_range.second - graph.vertex_range.first; } + +NodeID load_compressed_graph(const ApplicationContext &app, dKaMinPar &partitioner) { + DistributedGraph graph(std::make_unique( + io::parhip::compressed_read(app.graph_filename, false, MPI_COMM_WORLD) + )); + const NodeID n = graph.n(); + + partitioner.import_graph(std::move(graph)); + return n; +} + } // namespace int main(int argc, char *argv[]) { @@ -215,8 +227,14 @@ int main(int argc, char *argv[]) { partitioner.context().debug.graph_filename = app.graph_filename; partitioner.set_max_timer_depth(app.max_timer_depth); - // Load the graph via KaGen - const NodeID n = load_kagen_graph(app, partitioner); + // Load the graph via KaGen or via our graph compressor. + const NodeID n = [&] { + if (ctx.compression.enabled) { + return load_compressed_graph(app, partitioner); + } else { + return load_kagen_graph(app, partitioner); + } + }(); // Compute the partition std::vector partition(n); diff --git a/apps/io/dist_parhip_parser.cc b/apps/io/dist_parhip_parser.cc new file mode 100644 index 00000000..36bc3ab4 --- /dev/null +++ b/apps/io/dist_parhip_parser.cc @@ -0,0 +1,291 @@ +/******************************************************************************* + * Sequential and parallel ParHiP parser for distributed compressed graphs. + * + * @file: dist_parhip_parser.h + * @author: Daniel Salwasser + * @date: 11.05.2024 + ******************************************************************************/ +#include "apps/io/dist_parhip_parser.h" + +#include +#include + +#include +#include +#include +#include + +#include "kaminpar-mpi/datatype.h" +#include "kaminpar-mpi/utils.h" + +#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" +#include "kaminpar-dist/datastructures/ghost_node_mapper.h" +#include "kaminpar-dist/dkaminpar.h" +#include "kaminpar-dist/graphutils/synchronization.h" + +#include "kaminpar-common/logger.h" + +namespace { + +class BinaryReaderException : public std::exception { +public: + BinaryReaderException(std::string msg) : _msg(std::move(msg)) {} + + [[nodiscard]] const char *what() const noexcept override { + return _msg.c_str(); + } + +private: + std::string _msg; +}; + +class BinaryReader { +public: + BinaryReader(const std::string &filename) { + _file = open(filename.c_str(), O_RDONLY); + if (_file == -1) { + throw BinaryReaderException("Cannot read the file that stores the graph"); + } + + struct stat file_info; + if (fstat(_file, &file_info) == -1) { + close(_file); + throw BinaryReaderException("Cannot determine the size of the file that stores the graph"); + } + + _length = static_cast(file_info.st_size); + _data = static_cast(mmap(nullptr, _length, PROT_READ, MAP_PRIVATE, _file, 0)); + if (_data == MAP_FAILED) { + close(_file); + throw BinaryReaderException("Cannot map the file that stores the graph"); + } + } + + ~BinaryReader() { + munmap(_data, _length); + close(_file); + } + + template [[nodiscard]] T read(std::size_t position) const { + return *reinterpret_cast(_data + position); + } + + template [[nodiscard]] T *fetch(std::size_t position) const { + return reinterpret_cast(_data + position); + } + +private: + int _file; + std::size_t _length; + std::uint8_t *_data; +}; + +struct ParhipHeader { + static constexpr std::uint64_t kSize = 3 * sizeof(std::uint64_t); + + bool has_edge_weights; + bool has_node_weights; + bool has_64_bit_edge_id; + bool has_64_bit_node_id; + bool has_64_bit_node_weight; + bool has_64_bit_edge_weight; + std::uint64_t num_nodes; + std::uint64_t num_edges; + + ParhipHeader(std::uint64_t version, std::uint64_t num_nodes, std::uint64_t num_edges) + : has_edge_weights((version & 1) == 0), + has_node_weights((version & 2) == 0), + has_64_bit_edge_id((version & 4) == 0), + has_64_bit_node_id((version & 8) == 0), + has_64_bit_node_weight((version & 16) == 0), + has_64_bit_edge_weight((version & 32) == 0), + num_nodes(num_nodes), + num_edges(num_edges) {} +}; + +} // namespace + +namespace kaminpar::dist::io::parhip { + +std::pair +compute_edge_range(const EdgeID num_edges, const mpi::PEID size, const mpi::PEID rank) { + const EdgeID chunk = num_edges / size; + const EdgeID rem = num_edges % size; + const EdgeID from = rank * chunk + std::min(rank, rem); + const EdgeID to = + std::min(from + ((static_cast(rank) < rem) ? chunk + 1 : chunk), num_edges); + return std::make_pair(from, to); +} + +template +NodeID find_node_by_edge( + const NodeID num_nodes, + const EdgeID num_edges, + const EdgeID edge, + Lambda &&fetch_adjacent_offset +) { + if (edge == 0) { + return 0; + } + + std::pair low = {0, 0}; + std::pair high = {num_nodes, num_edges - 1}; + while (high.first - low.first > 1) { + std::pair mid; + mid.first = (low.first + high.first) / 2; + mid.second = fetch_adjacent_offset(mid.first); + + if (mid.second < edge) { + low = mid; + } else { + high = mid; + } + } + + return high.first; +} + +DistributedCompressedGraph +compressed_read(const std::string &filename, const bool sorted, const MPI_Comm comm) { + BinaryReader reader(filename); + + const auto version = reader.read(0); + const auto num_nodes = reader.read(sizeof(std::uint64_t)); + const auto num_edges = reader.read(sizeof(std::uint64_t) * 2); + const ParhipHeader header(version, num_nodes, num_edges); + + std::size_t position = ParhipHeader::kSize; + + const EdgeID *raw_nodes = reader.fetch(position); + position += (header.num_nodes + 1) * sizeof(EdgeID); + + const NodeID *raw_edges = reader.fetch(position); + position += header.num_edges + sizeof(NodeID); + + const NodeWeight *raw_node_weights = reader.fetch(position); + position += header.num_nodes + sizeof(NodeWeight); + + const EdgeWeight *raw_edge_weights = reader.fetch(position); + + // Since the offsets stored in the (raw) node array of the binary are relative byte adresses + // into the binary itself, these offsets must be mapped to the actual edge IDs. + const EdgeID nodes_offset_base = ParhipHeader::kSize + (header.num_nodes + 1) * sizeof(EdgeID); + const auto map_edge_offset = [&](const NodeID node) { + return (raw_nodes[node] - nodes_offset_base) / sizeof(NodeID); + }; + + const mpi::PEID size = mpi::get_comm_size(comm); + const mpi::PEID rank = mpi::get_comm_rank(comm); + + const auto [first_edge, last_edge] = compute_edge_range(num_edges, size, rank); + + const std::uint64_t first_node = + find_node_by_edge(num_nodes, num_edges, first_edge, map_edge_offset); + const std::uint64_t last_node = + find_node_by_edge(num_nodes, num_edges, last_edge, map_edge_offset); + + const NodeID num_local_nodes = last_node - first_node; + const EdgeID num_local_edges = map_edge_offset(last_node) - map_edge_offset(first_node); + + StaticArray node_distribution(size + 1); + node_distribution[rank + 1] = last_node; + MPI_Allgather( + MPI_IN_PLACE, + 0, + MPI_DATATYPE_NULL, + node_distribution.data() + 1, + 1, + mpi::type::get(), + comm + ); + + StaticArray edge_distribution(size + 1); + edge_distribution[rank] = num_local_edges; + MPI_Allgather( + MPI_IN_PLACE, + 1, + mpi::type::get(), + edge_distribution.data(), + 1, + mpi::type::get(), + comm + ); + std::exclusive_scan( + edge_distribution.begin(), + edge_distribution.end(), + edge_distribution.begin(), + static_cast(0) + ); + + graph::GhostNodeMapper mapper(rank, node_distribution); + DistributedCompressedGraphBuilder builder( + num_local_nodes, num_local_edges, header.has_node_weights, header.has_edge_weights, sorted + ); + + std::vector> neighbourhood; + for (NodeID u = first_node; u < last_node; ++u) { + const EdgeID offset = map_edge_offset(u); + const EdgeID next_offset = map_edge_offset(u + 1); + + const auto degree = static_cast(next_offset - offset); + for (NodeID i = 0; i < degree; ++i) { + const EdgeID e = offset + i; + + NodeID adjacent_node = raw_edges[e]; + if (adjacent_node >= first_node && adjacent_node < last_node) { + adjacent_node = adjacent_node - first_node; + } else { + adjacent_node = mapper.new_ghost_node(adjacent_node); + } + + EdgeWeight edge_weight; + if (header.has_edge_weights) [[unlikely]] { + edge_weight = raw_edge_weights[e]; + } else { + edge_weight = 1; + } + + neighbourhood.emplace_back(adjacent_node, edge_weight); + } + + builder.add_node(u - first_node, neighbourhood); + neighbourhood.clear(); + } + + StaticArray node_weights; + if (header.has_node_weights) { + node_weights.resize(num_local_nodes + mapper.next_ghost_node(), static_array::noinit); + + tbb::parallel_for(tbb::blocked_range(0, num_local_nodes), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + node_weights[u] = raw_node_weights[first_node + u]; + } + }); + } + + auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); + auto [nodes, edges, edge_weights] = builder.build(); + + DistributedCompressedGraph graph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(edges), + std::move(node_weights), + std::move(edge_weights), + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + sorted, + comm + ); + + // Fill in ghost node weights + if (header.has_node_weights) { + graph::synchronize_ghost_node_weights(graph); + } + + return graph; +} + +} // namespace kaminpar::dist::io::parhip diff --git a/apps/io/dist_parhip_parser.h b/apps/io/dist_parhip_parser.h new file mode 100644 index 00000000..7b1994db --- /dev/null +++ b/apps/io/dist_parhip_parser.h @@ -0,0 +1,27 @@ +/******************************************************************************* + * Sequential and parallel ParHiP parser for distributed compressed graphs. + * + * @file: dist_parhip_parser.h + * @author: Daniel Salwasser + * @date: 11.05.2024 + ******************************************************************************/ +#pragma once + +#include + +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" + +namespace kaminpar::dist::io::parhip { + +/*! + * Reads and compresses a distributed graph that is stored in a file with ParHiP format. + * + * @param filename The name of the file to read. + * @param sorted Whether the nodes of the graph to read are stored in degree-buckets order. + * @param comm The group of processed that reads and compress the distributed graph. + * @return The graph that is stored in the file. + */ +DistributedCompressedGraph +compressed_read(const std::string &filename, const bool sorted, const MPI_Comm comm); + +} // namespace kaminpar::dist::io::parhip diff --git a/kaminpar-cli/dkaminpar_arguments.cc b/kaminpar-cli/dkaminpar_arguments.cc index b635d243..30022f9a 100644 --- a/kaminpar-cli/dkaminpar_arguments.cc +++ b/kaminpar-cli/dkaminpar_arguments.cc @@ -45,6 +45,7 @@ void create_chunks_options(CLI::Option_group *cli, const std::string &prefix, Ch void create_all_options(CLI::App *app, Context &ctx) { create_partitioning_options(app, ctx); create_debug_options(app, ctx); + create_compression_options(app, ctx); create_coarsening_options(app, ctx); create_initial_partitioning_options(app, ctx); create_refinement_options(app, ctx); @@ -110,6 +111,16 @@ CLI::Option_group *create_debug_options(CLI::App *app, Context &ctx) { return debug; } +CLI::Option_group *create_compression_options(CLI::App *app, Context &ctx) { + auto *compression = app->add_option_group("Graph Compression"); + + compression->add_flag( + "-c,--compress", ctx.compression.enabled, "Whether to compress the input graph." + ); + + return compression; +} + CLI::Option_group *create_initial_partitioning_options(CLI::App *app, Context &ctx) { auto *ip = app->add_option_group("Initial Partitioning"); diff --git a/kaminpar-cli/dkaminpar_arguments.h b/kaminpar-cli/dkaminpar_arguments.h index e43f19f8..27a84df5 100644 --- a/kaminpar-cli/dkaminpar_arguments.h +++ b/kaminpar-cli/dkaminpar_arguments.h @@ -20,6 +20,8 @@ CLI::Option_group *create_partitioning_options(CLI::App *app, Context &ctx); CLI::Option_group *create_debug_options(CLI::App *app, Context &ctx); +CLI::Option_group *create_compression_options(CLI::App *app, Context &ctx); + CLI::Option_group *create_initial_partitioning_options(CLI::App *app, Context &ctx); CLI::Option_group *create_refinement_options(CLI::App *app, Context &ctx); diff --git a/kaminpar-common/graph-compression/compressed_edges.h b/kaminpar-common/graph-compression/compressed_edges.h new file mode 100644 index 00000000..50ce0058 --- /dev/null +++ b/kaminpar-common/graph-compression/compressed_edges.h @@ -0,0 +1,391 @@ +#pragma once + +#include "kaminpar-common/constexpr_utils.h" +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/math.h" +#include "kaminpar-common/ranges.h" +#include "kaminpar-common/varint_codec.h" +#include "kaminpar-common/varint_run_length_codec.h" +#include "kaminpar-common/varint_stream_codec.h" + +namespace kaminpar { + +template class CompressedEdges { + static_assert(std::numeric_limits::is_integer); + static_assert(std::numeric_limits::is_integer); + +public: + using SignedID = std::int64_t; + +#ifdef KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING + /*! + * Whether high degree encoding is used. + */ + static constexpr bool kHighDegreeEncoding = true; +#else + /*! + * Whether high degree encoding is used. + */ + static constexpr bool kHighDegreeEncoding = false; +#endif + + /*! + * The minimum degree of a node to be considered high degree. + */ + static constexpr NodeID kHighDegreeThreshold = 10000; + + /*! + * The length of a part when splitting the neighbourhood of a high degree + * node. + */ + static constexpr NodeID kHighDegreePartLength = 1000; + +#ifdef KAMINPAR_COMPRESSION_INTERVAL_ENCODING + /*! + * Whether interval encoding is used. + */ + static constexpr bool kIntervalEncoding = true; +#else + /*! + * Whether interval encoding is used. + */ + static constexpr bool kIntervalEncoding = false; +#endif + + /*! + * The minimum length of an interval to encode if interval encoding is used. + */ + static constexpr NodeID kIntervalLengthTreshold = 3; + +#ifdef KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING + /*! + * Whether run-length encoding is used. + */ + static constexpr bool kRunLengthEncoding = true; +#else + /*! + * Whether run-length encoding is used. + */ + static constexpr bool kRunLengthEncoding = false; +#endif + +#ifdef KAMINPAR_COMPRESSION_STREAM_ENCODING + /*! + * Whether stream encoding is used. + */ + static constexpr bool kStreamEncoding = true; +#else + /*! + * Whether stream encoding is used. + */ + static constexpr bool kStreamEncoding = false; +#endif + + static_assert( + !kRunLengthEncoding || !kStreamEncoding, + "Either run-length or stream encoding can be used for varints " + "but not both." + ); + +#ifdef KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION + /*! + * Whether the isolated nodes of the compressed graph are continuously stored + * at the end of the nodes array. + */ + static constexpr bool kIsolatedNodesSeparation = true; +#else + /*! + * Whether the isolated nodes of the compressed graph are continuously stored + * at the end of the nodes array. + */ + static constexpr bool kIsolatedNodesSeparation = false; +#endif + + CompressedEdges(const EdgeID num_edges, StaticArray compressed_edges) + : _num_edges(num_edges), + _compressed_edges(std::move(compressed_edges)) {} + + CompressedEdges(const CompressedEdges &) = delete; + CompressedEdges &operator=(const CompressedEdges &) = delete; + + CompressedEdges(CompressedEdges &&) noexcept = default; + CompressedEdges &operator=(CompressedEdges &&) noexcept = default; + + [[nodiscard]] EdgeID num_edges() const { + return _num_edges; + } + + [[nodiscard]] NodeID + degree(const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset) const { + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + edge_offset; + const std::uint8_t *next_node_data = data + next_edge_offset; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) { + return 0; + } + + const auto header = decode_header(node, node_data, next_node_data); + return std::get<1>(header); + } + + [[nodiscard]] IotaRange + incident_edges(const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset) const { + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + edge_offset; + const std::uint8_t *next_node_data = data + next_edge_offset; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) { + return {0, 0}; + } + + const auto [first_edge, degree, _, __] = decode_header(node, node_data, next_node_data); + return {first_edge, first_edge + degree}; + } + + template + void decode_neighborhood( + const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset, Lambda &&l + ) const { + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + edge_offset; + const std::uint8_t *next_node_data = data + next_edge_offset; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) { + return; + } + + const auto header = decode_header(node, node_data, next_node_data); + const auto &edge = std::get<0>(header); + const auto °ree = std::get<1>(header); + const auto &uses_intervals = std::get<2>(header); + const auto &len = std::get<3>(header); + + node_data += len; + + if constexpr (kHighDegreeEncoding) { + if (degree >= kHighDegreeThreshold) { + decode_parts(node_data, node, edge, degree, std::forward(l)); + return; + } + } + + invoke_indirect>( + std::forward(l), + [&](auto &&l2) { + decode_edges( + node_data, node, edge, degree, uses_intervals, std::forward(l2) + ); + } + ); + } + +private: + EdgeID _num_edges; + StaticArray _compressed_edges; + +private: + inline std::tuple decode_header( + const NodeID node, const std::uint8_t *node_data, const std::uint8_t *next_node_data + ) const { + const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { + if constexpr (kIntervalEncoding) { + auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); + auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); + + return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); + } else { + auto [first_edge, len] = varint_decode(node_data); + auto [next_first_edge, _] = varint_decode(next_node_data); + + return std::make_tuple(first_edge, next_first_edge, false, len); + } + }(); + + if constexpr (kIsolatedNodesSeparation) { + const EdgeID ungapped_first_edge = first_edge + node; + const NodeID degree = static_cast(1 + next_first_edge - first_edge); + return std::make_tuple(ungapped_first_edge, degree, uses_intervals, len); + } else { + const NodeID degree = static_cast(next_first_edge - first_edge); + return std::make_tuple(first_edge, degree, uses_intervals, len); + } + } + + template + void decode_parts( + const std::uint8_t *data, + const NodeID node, + const EdgeID edge, + const NodeID degree, + Lambda &&l + ) const { + const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); + + const auto iterate_part = [&](const NodeID part) { + const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); + const std::uint8_t *part_data = data + part_offset; + + const NodeID part_count_m1 = part_count - 1; + const bool last_part = part == part_count_m1; + + const EdgeID part_edge = edge + kHighDegreePartLength * part; + const NodeID part_degree = + last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; + + return invoke_indirect2, bool>( + std::forward(l), + [&](auto &&l2) { + return decode_edges( + part_data, node, part_edge, part_degree, true, std::forward(l2) + ); + } + ); + }; + + if constexpr (parallel) { + tbb::parallel_for(0, part_count, std::forward(iterate_part)); + } else { + for (NodeID part = 0; part < part_count; ++part) { + const bool stop = iterate_part(part); + if (stop) { + return; + } + } + } + } + + template + bool decode_edges( + const std::uint8_t *data, + const NodeID node, + EdgeID edge, + const NodeID degree, + bool uses_intervals, + Lambda &&l + ) const { + const EdgeID max_edge = edge + degree; + + if constexpr (kIntervalEncoding) { + if (uses_intervals) { + const bool stop = decode_intervals(data, edge, std::forward(l)); + if (stop) { + return true; + } + + if (edge == max_edge) { + return false; + } + } + } + + return decode_gaps(data, node, edge, max_edge, std::forward(l)); + } + + template + bool decode_intervals(const std::uint8_t *&data, EdgeID &edge, Lambda &&l) const { + constexpr bool non_stoppable = std::is_void_v>; + + const NodeID interval_count = *((NodeID *)data); + data += sizeof(NodeID); + + NodeID previous_right_extreme = 2; + for (NodeID i = 0; i < interval_count; ++i) { + const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); + data += left_extreme_gap_len; + + const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); + data += interval_length_gap_len; + + const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; + const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; + previous_right_extreme = cur_left_extreme + cur_interval_len - 1; + + for (NodeID j = 0; j < cur_interval_len; ++j) { + if constexpr (non_stoppable) { + l(edge, cur_left_extreme + j); + } else { + const bool stop = l(edge, cur_left_extreme + j); + if (stop) { + return true; + } + } + + edge += 1; + } + } + + return false; + } + + template + bool decode_gaps( + const std::uint8_t *data, NodeID node, EdgeID &edge, const EdgeID max_edge, Lambda &&l + ) const { + constexpr bool non_stoppable = std::is_void_v>; + + const auto [first_gap, first_gap_len] = signed_varint_decode(data); + data += first_gap_len; + + const NodeID first_adjacent_node = static_cast(first_gap + node); + NodeID prev_adjacent_node = first_adjacent_node; + + if constexpr (non_stoppable) { + l(edge, first_adjacent_node); + } else { + const bool stop = l(edge, first_adjacent_node); + if (stop) { + return true; + } + } + edge += 1; + + const auto handle_gap = [&](const NodeID gap) { + const NodeID adjacent_node = gap + prev_adjacent_node + 1; + prev_adjacent_node = adjacent_node; + + if constexpr (non_stoppable) { + l(edge++, adjacent_node); + } else { + return l(edge++, adjacent_node); + } + }; + + if constexpr (kRunLengthEncoding) { + VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); + rl_decoder.decode(std::forward(handle_gap)); + } else if constexpr (kStreamEncoding) { + VarIntStreamDecoder sv_encoder(data, max_edge - edge); + sv_encoder.decode(std::forward(handle_gap)); + } else { + while (edge != max_edge) { + const auto [gap, gap_len] = varint_decode(data); + data += gap_len; + + const NodeID adjacent_node = gap + prev_adjacent_node + 1; + prev_adjacent_node = adjacent_node; + + if constexpr (non_stoppable) { + l(edge, adjacent_node); + } else { + const bool stop = l(edge, adjacent_node); + if (stop) { + return true; + } + } + + edge += 1; + } + } + + return false; + } +}; + +} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_edges_builder.h b/kaminpar-common/graph-compression/compressed_edges_builder.h new file mode 100644 index 00000000..a31ac8ad --- /dev/null +++ b/kaminpar-common/graph-compression/compressed_edges_builder.h @@ -0,0 +1,436 @@ +#pragma once + +#include +#include +#include +#include + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/graph-compression/compressed_edges.h" +#include "kaminpar-common/heap_profiler.h" + +namespace kaminpar { + +template class CompressedEdgesBuilder { + using CompressedEdges = kaminpar::CompressedEdges; + using SignedID = CompressedEdges::SignedID; + + static constexpr bool kHighDegreeEncoding = CompressedEdges::kHighDegreeEncoding; + static constexpr NodeID kHighDegreeThreshold = CompressedEdges::kHighDegreeThreshold; + static constexpr NodeID kHighDegreePartLength = CompressedEdges::kHighDegreePartLength; + static constexpr NodeID kIntervalEncoding = CompressedEdges::kIntervalEncoding; + static constexpr NodeID kIntervalLengthTreshold = CompressedEdges::kIntervalLengthTreshold; + static constexpr bool kRunLengthEncoding = CompressedEdges::kRunLengthEncoding; + static constexpr bool kStreamEncoding = CompressedEdges::kStreamEncoding; + static constexpr bool kIsolatedNodesSeparation = CompressedEdges::kIsolatedNodesSeparation; + + template + [[nodiscard]] static std::size_t + compressed_edge_array_max_size(const NodeID num_nodes, const EdgeID num_edges) { + std::size_t edge_id_width; + if constexpr (kActualNumEdges) { + if constexpr (kIntervalEncoding) { + edge_id_width = marked_varint_length(num_edges); + } else { + edge_id_width = varint_length(num_edges); + } + } else { + edge_id_width = varint_max_length(); + } + + std::size_t max_size = num_nodes * edge_id_width + num_edges * varint_length(num_nodes); + + if constexpr (kHighDegreeEncoding) { + if constexpr (kIntervalEncoding) { + max_size += 2 * num_nodes * varint_max_length(); + } else { + max_size += num_nodes * varint_max_length(); + } + + max_size += (num_edges / kHighDegreePartLength) * varint_max_length(); + } + + return max_size; + } + +public: + /*! + * Constructs a new CompressedEdgesBuilder. + * + * @param num_nodes The number of nodes of the graph to compress. + * @param num_edges The number of edges of the graph to compress. + * @param has_edge_weights Whether the graph to compress has edge weights. + * @param edge_weights A reference to the edge weights of the compressed graph. + */ + CompressedEdgesBuilder( + const NodeID num_nodes, + const EdgeID num_edges, + bool has_edge_weights, + StaticArray &edge_weights + ) + : _has_edge_weights(has_edge_weights), + _edge_weights(edge_weights) { + const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); + _compressed_data_start = heap_profiler::overcommit_memory(max_size); + } + + /*! + * Constructs a new CompressedEdgesBuilder where the maxmimum degree specifies the number of edges + * that are compressed at once. + * + * @param num_nodes The number of nodes of the graph to compress. + * @param num_edges The number of edges of the graph to compress. + * @param max_degree The maximum number of edges that are compressed at once. + * @param has_edge_weights Whether the graph to compress has edge weights. + * @param edge_weights A reference to the edge weights of the compressed graph. + * @param edge_weights A reference to the edge weights of the compressed graph. + */ + CompressedEdgesBuilder( + const NodeID num_nodes, + const EdgeID num_edges, + const NodeID max_degree, + bool has_edge_weights, + StaticArray &edge_weights + ) + : _has_edge_weights(has_edge_weights), + _edge_weights(edge_weights) { + const std::size_t max_size = compressed_edge_array_max_size(num_nodes, max_degree); + _compressed_data_start = heap_profiler::overcommit_memory(max_size); + } + + CompressedEdgesBuilder(const CompressedEdgesBuilder &) = delete; + CompressedEdgesBuilder &operator=(const CompressedEdgesBuilder &) = delete; + + CompressedEdgesBuilder(CompressedEdgesBuilder &&) noexcept = default; + + /*! + * Initializes/resets the builder. + * + * @param first_edge The first edge ID of the first node to be added. + */ + void init(const EdgeID first_edge) { + _compressed_data = _compressed_data_start.get(); + + _edge = first_edge; + _max_degree = 0; + _total_edge_weight = 0; + + _num_high_degree_nodes = 0; + _num_high_degree_parts = 0; + _num_interval_nodes = 0; + _num_intervals = 0; + } + + /*! + * Adds the neighborhood of a node. Note that the neighbourhood vector is modified. + * + * @param node The node whose neighborhood to add. + * @param neighbourhood The neighbourhood of the node to add. + * @return The offset into the compressed edge array of the node. + */ + EdgeID add(const NodeID node, std::vector> &neighbourhood) { + // The offset into the compressed edge array of the start of the neighbourhood. + const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); + + const NodeID degree = neighbourhood.size(); + if (degree == 0) { + return offset; + } + + _max_degree = std::max(_max_degree, degree); + + // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes + // in one of its bits whether interval encoding is used for this node, i.e., whether the nodes + // has intervals in its neighbourhood. + std::uint8_t *marked_byte = _compressed_data; + + // Store only the first edge for the source node. The degree can be obtained by determining the + // difference between the first edge ids of a node and the next node. Additionally, store the + // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes + // array. + const EdgeID first_edge = _edge; + if constexpr (kIntervalEncoding) { + _compressed_data += marked_varint_encode(first_edge, false, _compressed_data); + } else { + _compressed_data += varint_encode(first_edge, _compressed_data); + } + + // Only increment the edge if edge weights are not stored as otherwise the edge is + // incremented with each edge weight being added. + if (!_has_edge_weights) { + _edge += degree; + } + + // Sort the adjacent nodes in ascending order. + std::sort(neighbourhood.begin(), neighbourhood.end(), [](const auto &a, const auto &b) { + return a.first < b.first; + }); + + // If high-degree encoding is used then split the neighborhood if the degree crosses a + // threshold. The neighborhood is split into equally sized parts (except possible the last part) + // and each part is encoded independently. Furthermore, the offset at which the part is encoded + // is also stored. + if constexpr (kHighDegreeEncoding) { + const bool split_neighbourhood = degree >= kHighDegreeThreshold; + + if (split_neighbourhood) { + const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); + const NodeID last_part_length = ((degree % kHighDegreePartLength) == 0) + ? kHighDegreePartLength + : (degree % kHighDegreePartLength); + + uint8_t *part_ptr = _compressed_data; + _compressed_data += sizeof(NodeID) * part_count; + + for (NodeID i = 0; i < part_count; ++i) { + const bool last_part = (i + 1) == part_count; + const NodeID part_length = last_part ? last_part_length : kHighDegreePartLength; + + auto part_begin = neighbourhood.begin() + i * kHighDegreePartLength; + auto part_end = part_begin + part_length; + + std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; + *((NodeID *)cur_part_ptr) = static_cast(_compressed_data - part_ptr); + + std::span> part_neighbourhood(part_begin, part_end); + add_edges(node, nullptr, part_neighbourhood); + } + + _num_high_degree_nodes += 1; + _num_high_degree_parts += part_count; + return offset; + } + } + + add_edges(node, marked_byte, std::forward(neighbourhood)); + return offset; + } + + /*! + * Returns the number of bytes that the compressed data of the added neighborhoods take up. + * + * @return The number of bytes that the compressed data of the added neighborhoods take up. + */ + [[nodiscard]] std::size_t size() const { + return static_cast(_compressed_data - _compressed_data_start.get()); + } + + /*! + * Returns a pointer to the start of the compressed data. + * + * @return A pointer to the start of the compressed data. + */ + [[nodiscard]] const std::uint8_t *compressed_data() const { + return _compressed_data_start.get(); + } + + /*! + * Returns ownership of the compressed data + * + * @return Ownership of the compressed data. + */ + [[nodiscard]] heap_profiler::unique_ptr take_compressed_data() { + return std::move(_compressed_data_start); + } + + [[nodiscard]] std::size_t max_degree() const { + return _max_degree; + } + + [[nodiscard]] std::int64_t total_edge_weight() const { + return _total_edge_weight; + } + + [[nodiscard]] std::size_t num_high_degree_nodes() const { + return _num_high_degree_nodes; + } + + [[nodiscard]] std::size_t num_high_degree_parts() const { + return _num_high_degree_parts; + } + + [[nodiscard]] std::size_t num_interval_nodes() const { + return _num_interval_nodes; + } + + [[nodiscard]] std::size_t num_intervals() const { + return _num_intervals; + } + +private: + heap_profiler::unique_ptr _compressed_data_start; + std::uint8_t *_compressed_data; + + bool _has_edge_weights; + StaticArray &_edge_weights; + + EdgeID _edge; + NodeID _max_degree; + EdgeWeight _total_edge_weight; + + // Graph compression statistics + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; + +private: + template + void add_edges(const NodeID node, std::uint8_t *marked_byte, Container &&neighbourhood) { + const auto store_edge_weight = [&](const EdgeWeight edge_weight) { + _edge_weights[_edge++] = edge_weight; + _total_edge_weight += edge_weight; + }; + + NodeID local_degree = neighbourhood.size(); + + // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at + // least kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i + // and the length j - i + 1. Left extremes are stored static constexpr bool + // kHighDegreeEncoding = the differences between each left extreme and the previous right + // extreme minus 2 (because there must be at least one integer between the end of an interval + // and the beginning of the next one), except the first left extreme, which is stored directly. + // The lengths are decremented by kIntervalLengthTreshold, the minimum length of an interval. + if constexpr (kIntervalEncoding) { + NodeID interval_count = 0; + + // Save the pointer to the interval count and skip the amount of bytes needed to store the + // interval count as we can only determine the amount of intervals after finding all of + // them. + std::uint8_t *interval_count_ptr = _compressed_data; + _compressed_data += sizeof(NodeID); + + if (local_degree >= kIntervalLengthTreshold) { + NodeID interval_len = 1; + NodeID previous_right_extreme = 2; + NodeID prev_adjacent_node = (*neighbourhood.begin()).first; + + for (auto iter = neighbourhood.begin() + 1; iter != neighbourhood.end(); ++iter) { + const NodeID adjacent_node = (*iter).first; + + if (prev_adjacent_node + 1 == adjacent_node) { + interval_len++; + + // The interval ends if there are no more nodes or the next node is not the increment of + // the current node. + if (iter + 1 == neighbourhood.end() || (*(iter + 1)).first != adjacent_node + 1) { + if (interval_len >= kIntervalLengthTreshold) { + const NodeID left_extreme = adjacent_node + 1 - interval_len; + const NodeID left_extreme_gap = left_extreme + 2 - previous_right_extreme; + const NodeID interval_length_gap = interval_len - kIntervalLengthTreshold; + + _compressed_data += varint_encode(left_extreme_gap, _compressed_data); + _compressed_data += varint_encode(interval_length_gap, _compressed_data); + + for (NodeID i = 0; i < interval_len; ++i) { + std::pair &incident_edge = *(iter + 1 + i - interval_len); + + // Set the adjacent node to a special value, which indicates for the gap encoder + // that the node has been encoded through an interval. + incident_edge.first = std::numeric_limits::max(); + + if (_has_edge_weights) { + store_edge_weight(incident_edge.second); + } + } + + previous_right_extreme = adjacent_node; + + local_degree -= interval_len; + interval_count += 1; + } + + interval_len = 1; + } + } + + prev_adjacent_node = adjacent_node; + } + } + + // If intervals have been encoded store the interval count and set the bit in the marked byte + // indicating that interval encoding has been used for the neighbourhood if the marked byte is + // given. Otherwise, fix the amount of bytes stored as we don't store the interval count if no + // intervals have been encoded. + if (marked_byte == nullptr) { + *((NodeID *)interval_count_ptr) = interval_count; + } else if (interval_count > 0) { + *((NodeID *)interval_count_ptr) = interval_count; + *marked_byte |= 0b01000000; + } else { + _compressed_data -= sizeof(NodeID); + } + + if (interval_count > 0) { + _num_interval_nodes += 1; + _num_intervals += interval_count; + } + + // If all incident edges have been compressed static constexpr bool kHighDegreeEncoding = + // intervals then gap encoding cannot be applied. + if (local_degree == 0) { + return; + } + } + + // Store the remaining adjacent nodes static constexpr bool kHighDegreeEncoding = gap + // encoding. That is instead of directly storing the nodes v_1, v_2, ..., v_{k - 1}, v_k, store + // the gaps v_1 - u, v_2 - v_1 - 1, ..., v_k - v_{k - 1} - 1 between the nodes, where u is the + // source node. Note that all gaps except the first one have to be positive as we sorted the + // nodes in ascending order. Thus, only for the first gap the sign is additionally stored. + auto iter = neighbourhood.begin(); + + // Go to the first adjacent node that has not been encoded through an interval. + if constexpr (kIntervalEncoding) { + while ((*iter).first == std::numeric_limits::max()) { + ++iter; + } + } + + const auto [first_adjacent_node, first_edge_weight] = *iter++; + const SignedID first_gap = first_adjacent_node - static_cast(node); + _compressed_data += signed_varint_encode(first_gap, _compressed_data); + + if (_has_edge_weights) { + store_edge_weight(first_edge_weight); + } + + VarIntRunLengthEncoder rl_encoder(_compressed_data); + VarIntStreamEncoder sv_encoder(_compressed_data, local_degree - 1); + + NodeID prev_adjacent_node = first_adjacent_node; + while (iter != neighbourhood.end()) { + const auto [adjacent_node, edge_weight] = *iter++; + + // Skip the adjacent node since it has been encoded through an interval. + if constexpr (kIntervalEncoding) { + if (adjacent_node == std::numeric_limits::max()) { + continue; + } + } + + const NodeID gap = adjacent_node - prev_adjacent_node - 1; + if constexpr (kRunLengthEncoding) { + _compressed_data += rl_encoder.add(gap); + } else if constexpr (kStreamEncoding) { + _compressed_data += sv_encoder.add(gap); + } else { + _compressed_data += varint_encode(gap, _compressed_data); + } + + if (_has_edge_weights) { + store_edge_weight(edge_weight); + } + + prev_adjacent_node = adjacent_node; + } + + if constexpr (kRunLengthEncoding) { + rl_encoder.flush(); + } else if constexpr (kStreamEncoding) { + sv_encoder.flush(); + } + } +}; + +} // namespace kaminpar diff --git a/kaminpar-dist/algorithms/border_nodes.cc b/kaminpar-dist/algorithms/border_nodes.cc index f9dfede7..f35387e5 100644 --- a/kaminpar-dist/algorithms/border_nodes.cc +++ b/kaminpar-dist/algorithms/border_nodes.cc @@ -18,12 +18,15 @@ std::vector find_border_nodes(const DistributedPartitionedGraph &p_graph for (const NodeID u : p_graph.nodes()) { const BlockID bu = p_graph.block(u); - for (const auto [e, v] : p_graph.neighbors(u)) { + + p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (p_graph.block(v) != bu) { border_nodes.push_back(u); - break; + return true; } - } + + return false; + }); } return border_nodes; diff --git a/kaminpar-dist/algorithms/greedy_node_coloring.cc b/kaminpar-dist/algorithms/greedy_node_coloring.cc index 0dd176c5..ebac098c 100644 --- a/kaminpar-dist/algorithms/greedy_node_coloring.cc +++ b/kaminpar-dist/algorithms/greedy_node_coloring.cc @@ -7,9 +7,10 @@ ******************************************************************************/ #include "kaminpar-dist/algorithms/greedy_node_coloring.h" +/* #include "kaminpar-mpi/wrapper.h" -#include "kaminpar-dist/datastructures/distributed_graph.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/graphutils/communication.h" #include "kaminpar-common/assert.h" @@ -26,9 +27,9 @@ namespace { SET_DEBUG(false); } -NoinitVector compute_node_coloring_sequentially( - const DistributedGraph &graph, const NodeID number_of_supersteps -) { +template +NoinitVector +compute_node_coloring_sequentially(const Graph &graph, const NodeID number_of_supersteps) { KASSERT(number_of_supersteps > 0u, "bad parameter", assert::light); SCOPED_TIMER("Compute greedy node coloring"); @@ -65,7 +66,7 @@ NoinitVector compute_node_coloring_sequentially( } bool is_interface_node = false; - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { is_interface_node = is_interface_node || graph.is_ghost_node(v); // @todo replace v < u with random numbers r(v) < r(u) @@ -74,7 +75,7 @@ NoinitVector compute_node_coloring_sequentially( graph.local_to_global_node(v)))) { incident_colors.set(coloring[v] - 1); } - } + }); if (coloring[u] == 0) { coloring[u] = incident_colors.first_unmarked_element() + 1; @@ -144,12 +145,20 @@ NoinitVector compute_node_coloring_sequentially( KASSERT( [&] { for (const NodeID u : graph.nodes()) { - for (const auto v : graph.adjacent_nodes(u)) { + bool fail = false; + + graph.adjacent_nodes(u, [&](const NodeID v) { if (coloring[u] == coloring[v]) { LOG_WARNING << "bad color for node " << u << " with neighbor " << v << ": " << coloring[u]; - return false; + fail = true; } + + return fail; + }); + + if (fail) { + return false; } } return true; @@ -192,4 +201,6 @@ NoinitVector compute_node_coloring_sequentially( return coloring; } + } // namespace kaminpar::dist +*/ diff --git a/kaminpar-dist/algorithms/greedy_node_coloring.h b/kaminpar-dist/algorithms/greedy_node_coloring.h index f890ddbc..f3302d37 100644 --- a/kaminpar-dist/algorithms/greedy_node_coloring.h +++ b/kaminpar-dist/algorithms/greedy_node_coloring.h @@ -7,14 +7,198 @@ ******************************************************************************/ #pragma once -#include "kaminpar-dist/datastructures/distributed_graph.h" +#include "kaminpar-mpi/wrapper.h" + #include "kaminpar-dist/dkaminpar.h" +#include "kaminpar-dist/graphutils/communication.h" +#include "kaminpar-common/assert.h" +#include "kaminpar-common/datastructures/marker.h" #include "kaminpar-common/datastructures/noinit_vector.h" +#include "kaminpar-common/logger.h" +#include "kaminpar-common/math.h" +#include "kaminpar-common/parallel/algorithm.h" +#include "kaminpar-common/ranges.h" +#include "kaminpar-common/timer.h" namespace kaminpar::dist { +SET_DEBUG(false); + using ColorID = EdgeID; +template NoinitVector -compute_node_coloring_sequentially(const DistributedGraph &graph, NodeID number_of_supersteps); +compute_node_coloring_sequentially(const Graph &graph, const NodeID number_of_supersteps) { + KASSERT(number_of_supersteps > 0u, "bad parameter", assert::light); + SCOPED_TIMER("Compute greedy node coloring"); + + // Initialize coloring to 0 == no color picked yet + NoinitVector coloring(graph.total_n()); + graph.pfor_all_nodes([&](const NodeID u) { coloring[u] = 0; }); + + // Use max degree in the graph as an upper bound on the number of colors + // required + TransformedIotaRange degrees(static_cast(0), graph.n(), [&](const NodeID u) { + return graph.degree(u); + }); + const EdgeID max_degree = parallel::max_element(degrees.begin(), degrees.end()); + const ColorID max_colors = mpi::allreduce(max_degree, MPI_MAX, graph.communicator()) + 1; + + // Marker to keep track of the colors already incident to the current node + Marker<> incident_colors(max_colors); + + // Keep track of nodes that still need a color + NoinitVector active(graph.n()); + graph.pfor_nodes([&](const NodeID u) { active[u] = 1; }); + + bool converged; + do { + converged = true; + + for (NodeID superstep = 0; superstep < number_of_supersteps; ++superstep) { + const auto [from, to] = math::compute_local_range(graph.n(), number_of_supersteps, superstep); + + // Color all nodes in [from, to) + for (const NodeID u : graph.nodes(from, to)) { + if (!active[u]) { + continue; + } + + bool is_interface_node = false; + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + is_interface_node = is_interface_node || graph.is_ghost_node(v); + + // @todo replace v < u with random numbers r(v) < r(u) + if (coloring[v] != 0 && (coloring[u] == 0 || !(coloring[v] == coloring[u] && + graph.local_to_global_node(u) < + graph.local_to_global_node(v)))) { + incident_colors.set(coloring[v] - 1); + } + }); + + if (coloring[u] == 0) { + coloring[u] = incident_colors.first_unmarked_element() + 1; + DBGC(u == 156543 || u == 262712) << "setting " << u << " to " << coloring[u] << " A"; + if (!is_interface_node) { + active[u] = 0; + } + } else if (incident_colors.get(coloring[u] - 1)) { + coloring[u] = incident_colors.first_unmarked_element() + 1; + DBGC(u == 156543 || u == 262712 || graph.local_to_global_node(u) == 681015) + << "setting " << u << " to " << coloring[u] << " B, global " + << graph.local_to_global_node(u); + } else { + active[u] = 0; + } + + incident_colors.reset(); + } + + // Synchronize coloring of interface <-> ghost nodes + struct Message { + NodeID node; + ColorID color; + }; + + mpi::graph::sparse_alltoall_interface_to_pe( + graph, + from, + to, + [&](const NodeID u) { return active[u]; }, + [&](const NodeID u) -> Message { + DBGC(u == 156543) << "Sending " << u << " --> " << coloring[u]; + return {.node = u, .color = coloring[u]}; + }, + [&](const auto &recv_buffer, const PEID pe) { + converged &= recv_buffer.empty(); + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [local_node_on_pe, color] = recv_buffer[i]; + const GlobalNodeID global_node = + static_cast(graph.offset_n(pe) + local_node_on_pe); + const NodeID local_node = graph.global_to_local_node(global_node); + coloring[local_node] = color; + DBGC(local_node == 156543 || local_node == 262712) + << "setting " << local_node << " to " << coloring[local_node] << " C, global " + << graph.local_to_global_node(local_node); + }); + } + ); + } + } while (!mpi::allreduce(converged, MPI_LAND, graph.communicator())); + + // Check that all nodes have a color assigned (i.e., coloring[u] >= 1) + KASSERT( + [&] { + for (const NodeID u : graph.all_nodes()) { + if (coloring[u] == 0) { + return false; + } + } + return true; + }(), + "node coloring is incomplete", + assert::heavy + ); + + // Check that adjacent nodes have different colores + KASSERT( + [&] { + for (const NodeID u : graph.nodes()) { + bool fail = false; + + graph.adjacent_nodes(u, [&](const NodeID v) { + if (coloring[u] == coloring[v]) { + LOG_WARNING << "bad color for node " << u << " with neighbor " << v << ": " + << coloring[u]; + fail = true; + } + + return fail; + }); + + if (fail) { + return false; + } + } + return true; + }(), + "local node coloring is invalid", + assert::heavy + ); + + // Check that interface and ghost nodes have the same colors + KASSERT( + [&] { + struct Message { + GlobalNodeID node; + ColorID color; + }; + bool inconsistent = false; + mpi::graph::sparse_alltoall_interface_to_pe( + graph, + [&](const NodeID u) -> Message { + return {.node = graph.local_to_global_node(u), .color = coloring[u]}; + }, + [&](const auto &recv_buffer) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [node, color] = recv_buffer[i]; + const NodeID local_node = graph.global_to_local_node(node); + if (coloring[local_node] != color) { + inconsistent = true; + } + }); + } + ); + return !inconsistent; + }(), + "global node coloring inconsistent", + assert::heavy + ); + + // Make colors start at 0 + tbb::parallel_for(0, graph.total_n(), [&](const NodeID u) { coloring[u] -= 1; }); + + return coloring; +} + } // namespace kaminpar::dist diff --git a/kaminpar-dist/algorithms/independent_set.cc b/kaminpar-dist/algorithms/independent_set.cc index 49cec483..82eccba8 100644 --- a/kaminpar-dist/algorithms/independent_set.cc +++ b/kaminpar-dist/algorithms/independent_set.cc @@ -61,20 +61,22 @@ find_independent_border_set(const DistributedPartitionedGraph &p_graph, const in return; // Not a border node } - const bool is_seed_node = std::all_of( - p_graph.adjacent_nodes(u).begin(), - p_graph.adjacent_nodes(u).end(), - [&](const NodeID v) { - // Compute score for ghost nodes lazy - if (score[v] < 0) { - const auto v_score = - compute_score(generator_ets.local(), p_graph.local_to_global_node(v), seed); - __atomic_store_n(&score[v], v_score, __ATOMIC_RELAXED); - } - - return score[u] < score[v]; - } - ); + bool is_seed_node = true; + p_graph.adjacent_nodes(u, [&](const NodeID v) { + // Compute score for ghost nodes lazy + if (score[v] < 0) { + const auto v_score = + compute_score(generator_ets.local(), p_graph.local_to_global_node(v), seed); + __atomic_store_n(&score[v], v_score, __ATOMIC_RELAXED); + } + + if (score[u] >= score[v]) { + is_seed_node = false; + return true; + } + + return false; + }); if (is_seed_node) { seed_nodes.push_back(u); diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc index 2f77d8a6..521e6502 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc @@ -243,28 +243,28 @@ void HEMClusterer::compute_local_matching( NodeID best_neighbor = 0; EdgeWeight best_weight = 0; - for (const auto [e, v] : _graph->neighbors(u)) { + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { // v already matched? if (_matching[v] != kInvalidGlobalNodeID) { - continue; + return; } // v too heavy? const NodeWeight v_weight = _graph->node_weight(v); if (u_weight + v_weight > max_cluster_weight && !_ctx.ignore_weight_limit) { - continue; + return; } // Already found a better neighbor? const EdgeWeight e_weight = _graph->edge_weight(e); if (e_weight < best_weight) { - continue; + return; } // Match with v best_weight = e_weight; best_neighbor = v; - } + }); // If we found a good neighbor, try to match with it if (best_weight > 0) { @@ -401,9 +401,9 @@ void HEMClusterer::resolve_global_conflicts(const ColorID c) { auto add_node = [&](const NodeID u) { marked.reset(); - for (const auto &[e, v] : _graph->neighbors(u)) { + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { if (!_graph->is_ghost_node(v)) { - continue; + return; } const PEID owner = _graph->ghost_owner(v); @@ -411,7 +411,7 @@ void HEMClusterer::resolve_global_conflicts(const ColorID c) { sync_msgs[owner].push_back({u, _matching[u]}); marked.set(owner); } - } + }); }; for (const NodeID seq_u : _graph->nodes(seq_from, seq_to)) { diff --git a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc index 91fcaab9..fdd01350 100644 --- a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc @@ -241,9 +241,9 @@ class GlobalLPClusteringImpl final bool interface_node = false; bool smallest = true; - for (const NodeID lv : _graph->adjacent_nodes(lu)) { + _graph->adjacent_nodes(lu, [&](const NodeID lv) { if (_graph->is_owned_node(lv)) { - continue; + return false; } interface_node = true; @@ -251,9 +251,11 @@ class GlobalLPClusteringImpl final const GlobalNodeID gv = _graph->local_to_global_node(lv); if (gv < gu) { smallest = false; - break; + return true; } - } + + return false; + }); if (interface_node && smallest) { _locked[lu] = 1; @@ -514,7 +516,9 @@ class GlobalLPClusteringImpl final from, to, [&](const NodeID lnode) { return _changed_label[lnode] != kInvalidGlobalNodeID; }, - [&](const NodeID lnode) -> ChangedLabelMessage { return {lnode, cluster(lnode)}; }, + [&](const NodeID lnode) -> ChangedLabelMessage { + return {lnode, cluster(lnode)}; + }, [&](const auto &buffer, const PEID owner) { tbb::parallel_for(tbb::blocked_range(0, buffer.size()), [&](const auto &r) { auto &weight_delta_handle = _weight_delta_handles_ets.local(); diff --git a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc index f2e6c9b9..c61a8b57 100644 --- a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc @@ -159,7 +159,9 @@ class GlobalCoarseGraphImpl : public CoarseGraph { mpi::graph::sparse_alltoall_interface_to_pe( _f_graph, - [&](const NodeID lnode) -> GhostNodeLabel { return {lnode, f_partition[lnode]}; }, + [&](const NodeID lnode) -> GhostNodeLabel { + return {lnode, f_partition[lnode]}; + }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[sender_lnode, block] = buffer[i]; @@ -236,8 +238,7 @@ StaticArray find_nonlocal_nodes( const GlobalNodeID gcluster = lnode_to_gcluster[lnode]; if (!graph.is_owned_global_node(gcluster)) { nonlocal_nodes[node_position_buffer[lnode]] = { - .u = gcluster, .weight = graph.node_weight(lnode) - }; + .u = gcluster, .weight = graph.node_weight(lnode)}; } }); @@ -257,12 +258,12 @@ StaticArray find_nonlocal_edges( NodeID nonlocal_neighbors_count = 0; if (!graph.is_owned_global_node(gcluster_u)) { - for (const auto [e, lnode_v] : graph.neighbors(lnode_u)) { + graph.neighbors(lnode_u, [&](const EdgeID e, const NodeID lnode_v) { const GlobalNodeID gcluster_v = lnode_to_gcluster[lnode_v]; if (gcluster_u != gcluster_v) { ++nonlocal_neighbors_count; } - } + }); } edge_position_buffer[lnode_u + 1] = nonlocal_neighbors_count; @@ -278,7 +279,7 @@ StaticArray find_nonlocal_edges( if (!graph.is_owned_global_node(gcluster_u)) { NodeID pos = edge_position_buffer[lnode_u]; - for (const auto [e, lnode_v] : graph.neighbors(lnode_u)) { + graph.neighbors(lnode_u, [&](const EdgeID e, const NodeID lnode_v) { const GlobalNodeID gcluster_v = lnode_to_gcluster[lnode_v]; if (gcluster_u != gcluster_v) { nonlocal_edges[pos] = { @@ -288,7 +289,7 @@ StaticArray find_nonlocal_edges( }; ++pos; } - } + }); } }); @@ -358,7 +359,9 @@ void update_ghost_node_weights(DistributedGraph &graph) { mpi::graph::sparse_alltoall_interface_to_pe( graph, - [&](const NodeID u) -> Message { return {u, graph.node_weight(u)}; }, + [&](const NodeID u) -> Message { + return {u, graph.node_weight(u)}; + }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[local_node_on_other_pe, weight] = buffer[i]; @@ -553,8 +556,7 @@ MigrationResult migrate_elements( .sendcounts = std::move(sendcounts), .sdispls = std::move(sdispls), .recvcounts = std::move(recvcounts), - .rdispls = std::move(rdispls) - }; + .rdispls = std::move(rdispls)}; } MigrationResult @@ -946,7 +948,9 @@ void rebalance_cluster_placement( }; mpi::graph::sparse_alltoall_interface_to_pe( graph, - [&](const NodeID lnode) -> Message { return {lnode, lnode_to_gcluster[lnode]}; }, + [&](const NodeID lnode) -> Message { + return {lnode, lnode_to_gcluster[lnode]}; + }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[their_lnode, new_gcluster] = buffer[i]; @@ -992,8 +996,8 @@ bool validate_clustering( const NodeID lnode = graph.global_to_local_node(gnode); if (lnode_to_gcluster[lnode] != gcluster) { LOG_WARNING << "Inconsistent cluster for local node " << lnode - << " (ghost node, global node ID " << gnode - << "): " << "the node is owned by PE " << pe + << " (ghost node, global node ID " << gnode << "): " + << "the node is owned by PE " << pe << ", which assigned the node to cluster " << gcluster << ", but our ghost node is assigned to cluster " << lnode_to_gcluster[lnode] << "; aborting"; @@ -1157,12 +1161,12 @@ std::unique_ptr contract_clustering( return; } - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { const GlobalNodeID gcluster_v = lnode_to_gcluster[v]; if (!graph.is_owned_global_node(gcluster_v)) { request_nonlocal_mapping(gcluster_v); } - } + }); }); }, [&] { @@ -1387,9 +1391,9 @@ std::unique_ptr contract_clustering( if (u < graph.n()) { c_u_weight += graph.node_weight(u); - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { handle_edge_to_lnode(graph.edge_weight(e), v); - } + }); } else { // Fix node weight later for (std::size_t index = u - graph.n(); @@ -1472,7 +1476,7 @@ std::unique_ptr contract_clustering( } }); - DistributedGraph c_graph( + DistributedGraph c_graph(std::make_unique( std::move(c_node_distribution), std::move(c_edge_distribution), std::move(c_nodes), @@ -1484,7 +1488,7 @@ std::unique_ptr contract_clustering( std::move(c_global_to_ghost), false, graph.communicator() - ); + )); STOP_TIMER(); update_ghost_node_weights(c_graph); diff --git a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc index e06396f0..aaefcf38 100644 --- a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc @@ -12,6 +12,7 @@ #include "kaminpar-mpi/wrapper.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/ghost_node_mapper.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/graphutils/communication.h" @@ -196,12 +197,12 @@ contract_local_clustering(const DistributedGraph &graph, const StaticArray( std::move(c_node_distribution), std::move(c_edge_distribution), std::move(c_nodes), @@ -281,7 +282,7 @@ contract_local_clustering(const DistributedGraph &graph, const StaticArray(graph, std::move(c_graph), std::move(mapping)); } diff --git a/kaminpar-dist/datastructures/abstract_distributed_graph.h b/kaminpar-dist/datastructures/abstract_distributed_graph.h new file mode 100644 index 00000000..aa8de8e9 --- /dev/null +++ b/kaminpar-dist/datastructures/abstract_distributed_graph.h @@ -0,0 +1,137 @@ +/******************************************************************************* + * Abstract interface for a graph data structure. + * + * @file: abstract_distributed_graph.h + * @author: Daniel Salwasser + * @date: 06.06.2024 + ******************************************************************************/ +#pragma once + +#include "kaminpar-dist/dkaminpar.h" + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/ranges.h" + +namespace kaminpar::dist { + +class AbstractDistributedGraph { +public: + // Data types used for this graph + using NodeID = dist::NodeID; + using EdgeID = dist::EdgeID; + using GlobalNodeID = dist::GlobalNodeID; + using GlobalEdgeID = dist::GlobalEdgeID; + using NodeWeight = dist::NodeWeight; + using EdgeWeight = dist::EdgeWeight; + using GlobalNodeWeight = dist::GlobalNodeWeight; + using GlobalEdgeWeight = dist::GlobalEdgeWeight; + + AbstractDistributedGraph() = default; + + AbstractDistributedGraph(const AbstractDistributedGraph &) = delete; + AbstractDistributedGraph &operator=(const AbstractDistributedGraph &) = delete; + + AbstractDistributedGraph(AbstractDistributedGraph &&) noexcept = default; + AbstractDistributedGraph &operator=(AbstractDistributedGraph &&) noexcept = default; + + virtual ~AbstractDistributedGraph() = default; + + // Size of the graph + [[nodiscard]] virtual GlobalNodeID global_n() const = 0; + [[nodiscard]] virtual GlobalEdgeID global_m() const = 0; + + [[nodiscard]] virtual NodeID n() const = 0; + [[nodiscard]] virtual NodeID n(const PEID pe) const = 0; + [[nodiscard]] virtual NodeID ghost_n() const = 0; + [[nodiscard]] virtual NodeID total_n() const = 0; + + [[nodiscard]] virtual EdgeID m() const = 0; + [[nodiscard]] virtual EdgeID m(const PEID pe) const = 0; + + [[nodiscard]] virtual GlobalNodeID offset_n() const = 0; + [[nodiscard]] virtual GlobalNodeID offset_n(const PEID pe) const = 0; + + [[nodiscard]] virtual GlobalEdgeID offset_m() const = 0; + [[nodiscard]] virtual GlobalEdgeID offset_m(const PEID pe) const = 0; + + // Node and edge weights + [[nodiscard]] virtual bool is_node_weighted() const = 0; + [[nodiscard]] virtual NodeWeight node_weight(const NodeID u) const = 0; + [[nodiscard]] virtual NodeWeight max_node_weight() const = 0; + [[nodiscard]] virtual NodeWeight global_max_node_weight() const = 0; + [[nodiscard]] virtual NodeWeight total_node_weight() const = 0; + [[nodiscard]] virtual GlobalNodeWeight global_total_node_weight() const = 0; + + [[nodiscard]] virtual bool is_edge_weighted() const = 0; + [[nodiscard]] virtual EdgeWeight edge_weight(const EdgeID e) const = 0; + [[nodiscard]] virtual EdgeWeight total_edge_weight() const = 0; + [[nodiscard]] virtual GlobalEdgeWeight global_total_edge_weight() const = 0; + + // Node ownership + [[nodiscard]] virtual bool is_owned_global_node(const GlobalNodeID global_u) const = 0; + [[nodiscard]] virtual bool contains_global_node(const GlobalNodeID global_u) const = 0; + [[nodiscard]] virtual bool contains_local_node(const NodeID local_u) const = 0; + + // Node type + [[nodiscard]] virtual bool is_ghost_node(const NodeID u) const = 0; + [[nodiscard]] virtual bool is_owned_node(const NodeID u) const = 0; + [[nodiscard]] virtual PEID ghost_owner(const NodeID u) const = 0; + [[nodiscard]] virtual NodeID + map_remote_node(const NodeID their_lnode, const PEID owner) const = 0; + [[nodiscard]] virtual GlobalNodeID local_to_global_node(const NodeID local_u) const = 0; + [[nodiscard]] virtual NodeID global_to_local_node(const GlobalNodeID global_u) const = 0; + + // Iterators for nodes / edges + [[nodiscard]] virtual IotaRange nodes(const NodeID from, const NodeID to) const = 0; + [[nodiscard]] virtual IotaRange nodes() const = 0; + [[nodiscard]] virtual IotaRange ghost_nodes() const = 0; + [[nodiscard]] virtual IotaRange all_nodes() const = 0; + + [[nodiscard]] virtual IotaRange edges() const = 0; + [[nodiscard]] virtual IotaRange incident_edges(const NodeID u) const = 0; + + // Access methods + [[nodiscard]] virtual NodeID degree(const NodeID u) const = 0; + + [[nodiscard]] virtual const StaticArray &node_weights() const = 0; + [[nodiscard]] virtual const StaticArray &edge_weights() const = 0; + + virtual void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) = 0; + + [[nodiscard]] virtual const StaticArray &node_distribution() const = 0; + [[nodiscard]] virtual GlobalNodeID node_distribution(const PEID pe) const = 0; + [[nodiscard]] virtual PEID find_owner_of_global_node(const GlobalNodeID u) const = 0; + + [[nodiscard]] virtual const StaticArray &edge_distribution() const = 0; + [[nodiscard]] virtual GlobalEdgeID edge_distribution(const PEID pe) const = 0; + + // Cached inter-PE metrics + [[nodiscard]] virtual EdgeID edge_cut_to_pe(const PEID pe) const = 0; + [[nodiscard]] virtual EdgeID comm_vol_to_pe(const PEID pe) const = 0; + [[nodiscard]] virtual MPI_Comm communicator() const = 0; + + // High degree classification + virtual void init_high_degree_info(const EdgeID high_degree_threshold) const = 0; + [[nodiscard]] virtual bool is_high_degree_node(const NodeID node) const = 0; + + // Graph permutation + virtual void set_permutation(StaticArray permutation) = 0; + [[nodiscard]] virtual bool permuted() const = 0; + [[nodiscard]] virtual NodeID map_original_node(const NodeID u) const = 0; + + // Degree buckets + [[nodiscard]] virtual bool sorted() const = 0; + [[nodiscard]] virtual std::size_t number_of_buckets() const = 0; + [[nodiscard]] virtual std::size_t bucket_size(const std::size_t bucket) const = 0; + [[nodiscard]] virtual NodeID first_node_in_bucket(const std::size_t bucket) const = 0; + [[nodiscard]] virtual NodeID first_invalid_node_in_bucket(const std::size_t bucket) const = 0; + + // Graph permutation by coloring + virtual void set_color_sorted(StaticArray color_sizes) = 0; + [[nodiscard]] virtual bool color_sorted() const = 0; + [[nodiscard]] virtual std::size_t number_of_colors() const = 0; + [[nodiscard]] virtual NodeID color_size(const std::size_t c) const = 0; + [[nodiscard]] virtual const StaticArray &get_color_sizes() const = 0; +}; + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.cc b/kaminpar-dist/datastructures/distributed_compressed_graph.cc new file mode 100644 index 00000000..1c542f52 --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.cc @@ -0,0 +1,154 @@ +/******************************************************************************* + * Static distributed compressed graph data structure. + * + * @file: distributed_compressed_graph.cc + * @author: Daniel Salwasser + * @date: 07.06.2024 + ******************************************************************************/ +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" + +#include "kaminpar-dist/graphutils/communication.h" + +#include "kaminpar-common/parallel/vector_ets.h" + +namespace kaminpar::dist { + +void DistributedCompressedGraph::init_high_degree_info(const EdgeID high_degree_threshold) const { + if (_high_degree_threshold == high_degree_threshold) { + return; + } + + _high_degree_threshold = high_degree_threshold; + _high_degree_ghost_node.resize(ghost_n()); + + struct Message { + NodeID node; + std::uint8_t high_degree; + }; + + mpi::graph::sparse_alltoall_interface_to_pe( + *this, + [&](const NodeID u) -> Message { + return {.node = u, .high_degree = degree(u) > _high_degree_threshold}; + }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto &[remote_node, high_degree] = recv_buffer[i]; + const NodeID local_node = map_remote_node(remote_node, pe); + _high_degree_ghost_node[local_node - n()] = high_degree; + }); + } + ); +} + +namespace { +inline EdgeID degree_bucket(const EdgeID degree) { + return (degree == 0) ? 0 : math::floor_log2(degree) + 1; +} +} // namespace + +void DistributedCompressedGraph::init_degree_buckets() { + KASSERT(std::all_of(_buckets.begin(), _buckets.end(), [](const auto n) { return n == 0; })); + + if (_sorted) { + parallel::vector_ets buckets_ets(_buckets.size()); + tbb::parallel_for(tbb::blocked_range(0, n()), [&](const auto &r) { + auto &buckets = buckets_ets.local(); + for (NodeID u = r.begin(); u != r.end(); ++u) { + auto bucket = degree_bucket(degree(u)) + 1; + ++buckets[bucket]; + } + }); + const auto buckets = buckets_ets.combine(std::plus{}); + std::copy(buckets.begin(), buckets.end(), _buckets.begin()); + + auto last_nonempty_bucket = + std::find_if(_buckets.rbegin(), _buckets.rend(), [](const auto n) { return n > 0; }); + _number_of_buckets = std::distance(_buckets.begin(), (last_nonempty_bucket + 1).base()); + } else { + _buckets[1] = n(); + _number_of_buckets = 1; + } + + std::partial_sum(_buckets.begin(), _buckets.end(), _buckets.begin()); +} + +void DistributedCompressedGraph::init_total_weights() { + if (is_node_weighted()) { + const auto begin_node_weights = _node_weights.begin(); + const auto end_node_weights = begin_node_weights + static_cast(n()); + + _total_node_weight = parallel::accumulate(begin_node_weights, end_node_weights, 0); + _max_node_weight = parallel::max_element(begin_node_weights, end_node_weights); + } else { + _total_node_weight = n(); + _max_node_weight = 1; + } + + if (is_edge_weighted()) { + _total_edge_weight = parallel::accumulate(_edge_weights.begin(), _edge_weights.end(), 0); + } else { + _total_edge_weight = m(); + } + + _global_total_node_weight = + mpi::allreduce(_total_node_weight, MPI_SUM, communicator()); + _global_max_node_weight = + mpi::allreduce(_max_node_weight, MPI_MAX, communicator()); + _global_total_edge_weight = + mpi::allreduce(_total_edge_weight, MPI_SUM, communicator()); +} + +void DistributedCompressedGraph::init_communication_metrics() { + const PEID size = mpi::get_comm_size(_communicator); + + tbb::enumerable_thread_specific> edge_cut_to_pe_ets{[&] { + return std::vector(size); + }}; + tbb::enumerable_thread_specific> comm_vol_to_pe_ets{[&] { + return std::vector(size); + }}; + + pfor_nodes_range([&](const auto r) { + auto &edge_cut_to_pe = edge_cut_to_pe_ets.local(); + auto &comm_vol_to_pe = comm_vol_to_pe_ets.local(); + Marker<> counted_pe{static_cast(size)}; + + for (NodeID u = r.begin(); u < r.end(); ++u) { + adjacent_nodes(u, [&](const NodeID v) { + if (is_ghost_node(v)) { + const PEID owner = ghost_owner(v); + KASSERT(static_cast(owner) < edge_cut_to_pe.size()); + ++edge_cut_to_pe[owner]; + + if (!counted_pe.get(owner)) { + KASSERT(static_cast(owner) < counted_pe.size()); + counted_pe.set(owner); + + KASSERT(static_cast(owner) < comm_vol_to_pe.size()); + ++comm_vol_to_pe[owner]; + } + } + }); + counted_pe.reset(); + } + }); + + _edge_cut_to_pe.clear(); + _edge_cut_to_pe.resize(size); + for (const auto &edge_cut_to_pe : edge_cut_to_pe_ets) { // PE x THREADS + for (std::size_t i = 0; i < edge_cut_to_pe.size(); ++i) { + _edge_cut_to_pe[i] += edge_cut_to_pe[i]; + } + } + + _comm_vol_to_pe.clear(); + _comm_vol_to_pe.resize(size); + for (const auto &comm_vol_to_pe : comm_vol_to_pe_ets) { + for (std::size_t i = 0; i < comm_vol_to_pe.size(); ++i) { + _comm_vol_to_pe[i] += comm_vol_to_pe[i]; + } + } +} + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h new file mode 100644 index 00000000..5d2ccba1 --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -0,0 +1,555 @@ +/******************************************************************************* + * Static distributed compressed graph data structure. + * + * @file: distributed_compressed_graph.h + * @author: Daniel Salwasser + * @date: 07.06.2024 + ******************************************************************************/ +#pragma once + +#include + +#include "kaminpar-mpi/utils.h" + +#include "kaminpar-dist/datastructures/abstract_distributed_graph.h" +#include "kaminpar-dist/datastructures/growt.h" +#include "kaminpar-dist/dkaminpar.h" + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/degree_buckets.h" +#include "kaminpar-common/graph-compression/compressed_edges.h" + +namespace kaminpar::dist { + +class DistributedCompressedGraph : public AbstractDistributedGraph { +public: + // Data types used for this graph + using AbstractDistributedGraph::EdgeID; + using AbstractDistributedGraph::EdgeWeight; + using AbstractDistributedGraph::GlobalEdgeID; + using AbstractDistributedGraph::GlobalEdgeWeight; + using AbstractDistributedGraph::GlobalNodeID; + using AbstractDistributedGraph::GlobalNodeWeight; + using AbstractDistributedGraph::NodeID; + using AbstractDistributedGraph::NodeWeight; + + using CompressedEdges = CompressedEdges; + + DistributedCompressedGraph( + StaticArray node_distribution, + StaticArray edge_distribution, + StaticArray nodes, + CompressedEdges compressed_edges, + StaticArray ghost_owner, + StaticArray ghost_to_global, + growt::StaticGhostNodeMapping global_to_ghost, + const bool sorted, + MPI_Comm comm + ) + : DistributedCompressedGraph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(compressed_edges), + {}, + {}, + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + sorted, + comm + ) {} + + DistributedCompressedGraph( + StaticArray node_distribution, + StaticArray edge_distribution, + StaticArray nodes, + CompressedEdges compressed_edges, + StaticArray node_weights, + StaticArray edge_weights, + StaticArray ghost_owner, + StaticArray ghost_to_global, + growt::StaticGhostNodeMapping global_to_ghost, + const bool sorted, + MPI_Comm comm + ) + : _node_distribution(std::move(node_distribution)), + _edge_distribution(std::move(edge_distribution)), + _nodes(std::move(nodes)), + _compressed_edges(std::move(compressed_edges)), + _node_weights(std::move(node_weights)), + _edge_weights(std::move(edge_weights)), + _ghost_owner(std::move(ghost_owner)), + _ghost_to_global(std::move(ghost_to_global)), + _global_to_ghost(std::move(global_to_ghost)), + _sorted(sorted), + _communicator(comm) { + const PEID rank = mpi::get_comm_rank(communicator()); + + _n = _nodes.size() - 1; + _m = _compressed_edges.num_edges(); + _ghost_n = _ghost_to_global.size(); + _offset_n = _node_distribution[rank]; + _offset_m = _edge_distribution[rank]; + _global_n = _node_distribution.back(); + _global_m = _edge_distribution.back(); + + init_total_weights(); + init_communication_metrics(); + init_degree_buckets(); + } + + DistributedCompressedGraph(const DistributedCompressedGraph &) = delete; + DistributedCompressedGraph &operator=(const DistributedCompressedGraph &) = delete; + + DistributedCompressedGraph(DistributedCompressedGraph &&) noexcept = default; + DistributedCompressedGraph &operator=(DistributedCompressedGraph &&) noexcept = default; + + ~DistributedCompressedGraph() override = default; + + // + // Size of the graph + // + + [[nodiscard]] inline GlobalNodeID global_n() const final { + return _global_n; + } + + [[nodiscard]] inline GlobalEdgeID global_m() const final { + return _global_m; + } + + [[nodiscard]] inline NodeID n() const final { + return _n; + } + + [[nodiscard]] inline NodeID n(const PEID pe) const final { + KASSERT(pe < static_cast(_node_distribution.size())); + return _node_distribution[pe + 1] - _node_distribution[pe]; + } + + [[nodiscard]] inline NodeID ghost_n() const final { + return _ghost_n; + } + + [[nodiscard]] inline NodeID total_n() const final { + return ghost_n() + n(); + } + + [[nodiscard]] inline EdgeID m() const final { + return _m; + } + + [[nodiscard]] inline EdgeID m(const PEID pe) const final { + KASSERT(pe < static_cast(_edge_distribution.size())); + return _edge_distribution[pe + 1] - _edge_distribution[pe]; + } + + [[nodiscard]] inline GlobalNodeID offset_n() const final { + return _offset_n; + } + + [[nodiscard]] inline GlobalNodeID offset_n(const PEID pe) const final { + return _node_distribution[pe]; + } + + [[nodiscard]] inline GlobalEdgeID offset_m() const final { + return _offset_m; + } + + [[nodiscard]] inline GlobalEdgeID offset_m(const PEID pe) const final { + return _edge_distribution[pe]; + } + + // + // Node and edge weights + // + + [[nodiscard]] inline bool is_node_weighted() const final { + return !_node_weights.empty(); + } + + [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const final { + return is_node_weighted() ? _node_weights[u] : 1; + } + + [[nodiscard]] inline NodeWeight max_node_weight() const final { + return _max_node_weight; + } + + [[nodiscard]] inline NodeWeight global_max_node_weight() const final { + return _global_max_node_weight; + } + + [[nodiscard]] inline NodeWeight total_node_weight() const final { + return _total_node_weight; + } + + [[nodiscard]] inline GlobalNodeWeight global_total_node_weight() const final { + return _global_total_node_weight; + } + + [[nodiscard]] inline bool is_edge_weighted() const final { + return !_edge_weights.empty(); + } + + [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { + return is_edge_weighted() ? _edge_weights[e] : 1; + } + + [[nodiscard]] inline EdgeWeight total_edge_weight() const final { + return _total_edge_weight; + } + + [[nodiscard]] inline GlobalEdgeWeight global_total_edge_weight() const final { + return _global_total_edge_weight; + } + + // + // Node ownership + // + + [[nodiscard]] inline bool is_owned_global_node(const GlobalNodeID global_u) const final { + return (offset_n() <= global_u && global_u < offset_n() + n()); + } + + [[nodiscard]] inline bool contains_global_node(const GlobalNodeID global_u) const final { + return is_owned_global_node(global_u) || + (_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); + } + + [[nodiscard]] inline bool contains_local_node(const NodeID local_u) const final { + return local_u < total_n(); + } + + // + // Node type + // + + [[nodiscard]] inline bool is_ghost_node(const NodeID u) const final { + KASSERT(u < total_n()); + return u >= n(); + } + + [[nodiscard]] inline bool is_owned_node(const NodeID u) const final { + KASSERT(u < total_n()); + return u < n(); + } + + [[nodiscard]] inline PEID ghost_owner(const NodeID u) const final { + KASSERT(is_ghost_node(u)); + KASSERT(u - n() < _ghost_owner.size()); + KASSERT(_ghost_owner[u - n()] >= 0); + KASSERT(_ghost_owner[u - n()] < mpi::get_comm_size(communicator())); + return _ghost_owner[u - n()]; + } + + [[nodiscard]] inline NodeID + map_remote_node(const NodeID their_lnode, const PEID owner) const final { + const auto gnode = static_cast(their_lnode + offset_n(owner)); + return global_to_local_node(gnode); + } + + [[nodiscard]] inline GlobalNodeID local_to_global_node(const NodeID local_u) const final { + KASSERT(contains_local_node(local_u)); + return is_owned_node(local_u) ? _offset_n + local_u : _ghost_to_global[local_u - n()]; + } + + [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const final { + KASSERT(contains_global_node(global_u)); + + if (offset_n() <= global_u && global_u < offset_n() + n()) { + return global_u - offset_n(); + } else { + KASSERT(_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); + return (*_global_to_ghost.find(global_u + 1)).second; + } + } + + // + // Iterators for nodes / edges + // + + [[nodiscard]] inline IotaRange nodes(const NodeID from, const NodeID to) const final { + return {from, to}; + } + + [[nodiscard]] inline IotaRange nodes() const final { + return nodes(0, n()); + } + + [[nodiscard]] inline IotaRange ghost_nodes() const final { + return {n(), total_n()}; + } + + [[nodiscard]] inline IotaRange all_nodes() const final { + return {static_cast(0), total_n()}; + } + + [[nodiscard]] inline IotaRange edges() const final { + return {static_cast(0), m()}; + } + + [[nodiscard]] inline IotaRange incident_edges(const NodeID u) const final { + return _compressed_edges.incident_edges(u, _nodes[u], _nodes[u + 1]); + } + + // + // Graph operations + // + + template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { + _compressed_edges.decode_neighborhood( + u, + _nodes[u], + _nodes[u + 1], + [&](const EdgeID incident_edge, const NodeID adjacent_node) { return l(adjacent_node); } + ); + } + + template inline void neighbors(const NodeID u, Lambda &&l) const { + _compressed_edges.decode_neighborhood(u, _nodes[u], _nodes[u + 1], std::forward(l)); + } + + template + inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { + _compressed_edges.decode_neighborhood(u, _nodes[u], _nodes[u + 1], std::forward(l)); + } + + // + // Parallel iteration + // + + template + inline void pfor_nodes(const NodeID from, const NodeID to, Lambda &&l) const { + tbb::parallel_for(from, to, std::forward(l)); + } + + template + inline void pfor_nodes_range(const NodeID from, const NodeID to, Lambda &&l) const { + tbb::parallel_for(tbb::blocked_range(from, to), std::forward(l)); + } + + template inline void pfor_ghost_nodes(Lambda &&l) const { + pfor_nodes(n(), total_n(), std::forward(l)); + } + + template inline void pfor_nodes(Lambda &&l) const { + pfor_nodes(0, n(), std::forward(l)); + } + + template inline void pfor_all_nodes(Lambda &&l) const { + pfor_nodes(0, total_n(), std::forward(l)); + } + + template inline void pfor_nodes_range(Lambda &&l) const { + pfor_nodes_range(0, n(), std::forward(l)); + } + + template inline void pfor_all_nodes_range(Lambda &&l) const { + pfor_nodes_range(0, total_n(), std::forward(l)); + } + + template inline void pfor_edges(Lambda &&l) const { + pfor_nodes([&](const NodeID u) { neighbors(u, std::forward(l)); }); + } + + // + // Access methods + // + + [[nodiscard]] inline NodeID degree(const NodeID u) const final { + KASSERT(is_owned_node(u)); + return _compressed_edges.degree(u, _nodes[u], _nodes[u + 1]); + } + + [[nodiscard]] inline const StaticArray &node_weights() const final { + return _node_weights; + } + + [[nodiscard]] inline const StaticArray &edge_weights() const final { + return _edge_weights; + } + + inline void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) final { + KASSERT(is_ghost_node(ghost_node)); + KASSERT(is_node_weighted()); + _node_weights[ghost_node] = weight; + } + + [[nodiscard]] inline const StaticArray &node_distribution() const final { + return _node_distribution; + } + + [[nodiscard]] inline GlobalNodeID node_distribution(const PEID pe) const final { + KASSERT(static_cast(pe) < _node_distribution.size()); + return _node_distribution[pe]; + } + + [[nodiscard]] inline PEID find_owner_of_global_node(const GlobalNodeID u) const final { + KASSERT(u < global_n()); + auto it = std::upper_bound(_node_distribution.begin() + 1, _node_distribution.end(), u); + KASSERT(it != _node_distribution.end()); + return static_cast(std::distance(_node_distribution.begin(), it) - 1); + } + + [[nodiscard]] inline const StaticArray &edge_distribution() const final { + return _edge_distribution; + } + + [[nodiscard]] inline GlobalEdgeID edge_distribution(const PEID pe) const final { + KASSERT(static_cast(pe) < _edge_distribution.size()); + return _edge_distribution[pe]; + } + + // + // Cached inter-PE metrics + // + + [[nodiscard]] inline EdgeID edge_cut_to_pe(const PEID pe) const final { + KASSERT(static_cast(pe) < _edge_cut_to_pe.size()); + return _edge_cut_to_pe[pe]; + } + + [[nodiscard]] inline EdgeID comm_vol_to_pe(const PEID pe) const final { + KASSERT(static_cast(pe) < _comm_vol_to_pe.size()); + return _comm_vol_to_pe[pe]; + } + + [[nodiscard]] inline MPI_Comm communicator() const final { + return _communicator; + } + + // + // High degree classification + // + + void init_high_degree_info(const EdgeID high_degree_threshold) const final; + + [[nodiscard]] bool is_high_degree_node(const NodeID node) const final { + KASSERT(_high_degree_ghost_node.size() == ghost_n()); + KASSERT(!is_ghost_node(node) || node - n() < _high_degree_ghost_node.size()); + return is_ghost_node(node) ? _high_degree_ghost_node[node - n()] + : degree(node) > _high_degree_threshold; + } + + // + // Graph permutation + // + + void set_permutation(StaticArray permutation) final { + _permutation = std::move(permutation); + } + + [[nodiscard]] inline bool permuted() const final { + return !_permutation.empty(); + } + + [[nodiscard]] inline NodeID map_original_node(const NodeID u) const final { + KASSERT(permuted()); + KASSERT(u < _permutation.size()); + return _permutation[u]; + } + + // + // Degree buckets + // + + [[nodiscard]] inline bool sorted() const final { + return _sorted; + } + + [[nodiscard]] inline std::size_t number_of_buckets() const final { + return _number_of_buckets; + } + + [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const final { + return _buckets[bucket + 1] - _buckets[bucket]; + } + + [[nodiscard]] inline NodeID first_node_in_bucket(const std::size_t bucket) const final { + return _buckets[bucket]; + } + + [[nodiscard]] inline NodeID first_invalid_node_in_bucket(const std::size_t bucket) const final { + return first_node_in_bucket(bucket + 1); + } + + // + // Graph permutation by coloring + // + + inline void set_color_sorted(StaticArray color_sizes) final { + KASSERT(color_sizes.front() == 0u); + KASSERT(color_sizes.back() == n()); + _color_sizes = std::move(color_sizes); + } + + [[nodiscard]] inline bool color_sorted() const final { + return !_color_sizes.empty(); + } + + [[nodiscard]] inline std::size_t number_of_colors() const final { + return _color_sizes.size() - 1; + } + + [[nodiscard]] inline NodeID color_size(const std::size_t c) const final { + KASSERT(c < number_of_colors()); + return _color_sizes[c + 1] - _color_sizes[c]; + } + + [[nodiscard]] inline const StaticArray &get_color_sizes() const final { + return _color_sizes; + } + +private: + void init_degree_buckets(); + void init_total_weights(); + void init_communication_metrics(); + + NodeID _n; + EdgeID _m; + NodeID _ghost_n; + GlobalNodeID _offset_n; + GlobalEdgeID _offset_m; + GlobalNodeID _global_n; + GlobalEdgeID _global_m; + + NodeWeight _total_node_weight{}; + GlobalNodeWeight _global_total_node_weight{}; + NodeWeight _max_node_weight{}; + NodeWeight _global_max_node_weight{}; + + EdgeWeight _total_edge_weight{}; + GlobalEdgeWeight _global_total_edge_weight{}; + + StaticArray _node_distribution{}; + StaticArray _edge_distribution{}; + + StaticArray _nodes{}; + CompressedEdges _compressed_edges; + StaticArray _node_weights{}; + StaticArray _edge_weights{}; + + StaticArray _ghost_owner{}; + StaticArray _ghost_to_global{}; + growt::StaticGhostNodeMapping _global_to_ghost{}; + + // mutable for lazy initialization + mutable StaticArray _high_degree_ghost_node{}; + mutable EdgeID _high_degree_threshold = 0; + + std::vector _edge_cut_to_pe{}; + std::vector _comm_vol_to_pe{}; + + StaticArray _permutation; + bool _sorted = false; + std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); + std::size_t _number_of_buckets = 0; + + StaticArray _color_sizes{}; + + MPI_Comm _communicator; +}; + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc new file mode 100644 index 00000000..d818ed11 --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc @@ -0,0 +1,157 @@ +/******************************************************************************* + * Sequential builder for distributed compressed graphs. + * + * @file: distributed_compressed_graph_builder.h + * @author: Daniel Salwasser + * @date: 07.06.2024 + ******************************************************************************/ +#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" + +#include "kaminpar-dist/datastructures/ghost_node_mapper.h" +#include "kaminpar-dist/graphutils/synchronization.h" + +#include "kaminpar-common/assert.h" + +namespace kaminpar::dist { + +DistributedCompressedGraph +DistributedCompressedGraphBuilder::compress(const DistributedCSRGraph &graph) { + const mpi::PEID size = mpi::get_comm_size(graph.communicator()); + const mpi::PEID rank = mpi::get_comm_rank(graph.communicator()); + + StaticArray node_distribution( + graph.node_distribution().begin(), graph.node_distribution().end() + ); + StaticArray edge_distribution( + graph.edge_distribution().begin(), graph.edge_distribution().end() + ); + + graph::GhostNodeMapper mapper(rank, node_distribution); + DistributedCompressedGraphBuilder builder( + graph.n(), graph.m(), graph.is_node_weighted(), graph.is_edge_weighted(), graph.sorted() + ); + + const NodeID first_node = node_distribution[rank]; + const NodeID last_node = node_distribution[rank + 1]; + + const auto &raw_nodes = graph.raw_nodes(); + const auto &raw_edges = graph.raw_nodes(); + const auto &raw_node_weights = graph.raw_nodes(); + + std::vector> neighbourhood; + for (const NodeID u : graph.nodes()) { + graph.neighbors(u, [&](const EdgeID e, const NodeID adjacent_node) { + const EdgeWeight edge_weight = graph.is_edge_weighted() ? graph.edge_weight(e) : 1; + + if (graph.is_owned_node(adjacent_node)) { + neighbourhood.emplace_back(adjacent_node, edge_weight); + } else { + const NodeID original_adjacent_node = graph.local_to_global_node(adjacent_node); + neighbourhood.emplace_back(mapper.new_ghost_node(original_adjacent_node), edge_weight); + } + }); + + builder.add_node(u, neighbourhood); + neighbourhood.clear(); + } + + StaticArray node_weights; + if (graph.is_node_weighted()) { + node_weights.resize(graph.n() + mapper.next_ghost_node(), static_array::noinit); + + tbb::parallel_for(tbb::blocked_range(0, graph.n()), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + node_weights[u] = raw_node_weights[first_node + u]; + } + }); + } + + auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); + auto [nodes, edges, edge_weights] = builder.build(); + + DistributedCompressedGraph compressed_graph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(edges), + std::move(node_weights), + std::move(edge_weights), + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + graph.sorted(), + graph.communicator() + ); + return compressed_graph; +} + +DistributedCompressedGraphBuilder::DistributedCompressedGraphBuilder( + const NodeID num_nodes, + const EdgeID num_edges, + const bool has_node_weights, + const bool has_edge_weights, + const bool sorted +) + : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights, _edge_weights) { + _sorted = sorted; + _nodes.resize(num_nodes + 1, static_array::noinit); + + _num_edges = num_edges; + _compressed_edges_builder.init(0); + + if (has_edge_weights) { + _edge_weights.resize(num_edges, static_array::noinit); + } +} + +void DistributedCompressedGraphBuilder::add_node( + const NodeID node, std::vector> &neighbourhood +) { + KASSERT(node + 1 < _nodes.size()); + + const EdgeID offset = _compressed_edges_builder.add(node, neighbourhood); + _nodes[node] = offset; +} + +std::tuple, CompressedEdges, StaticArray> +DistributedCompressedGraphBuilder::build() { + std::size_t compressed_edges_size = _compressed_edges_builder.size(); + heap_profiler::unique_ptr wrapped_compressed_edges = + _compressed_edges_builder.take_compressed_data(); + + // Store in the last entry of the node array the offset one after the last byte belonging to the + // last node. + _nodes[_nodes.size() - 1] = static_cast(compressed_edges_size); + + // Store at the end of the compressed edge array the (gap of the) id of the last edge. This + // ensures that the the degree of the last node can be computed from the difference between + // the last two first edge ids. + const EdgeID last_edge = _num_edges; + std::uint8_t *compressed_edges_end = wrapped_compressed_edges.get() + compressed_edges_size; + if constexpr (CompressedEdges::kIntervalEncoding) { + compressed_edges_size += marked_varint_encode(last_edge, false, compressed_edges_end); + } else { + compressed_edges_size += varint_encode(last_edge, compressed_edges_end); + } + + // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to + // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. + if constexpr (CompressedEdges::kStreamEncoding) { + compressed_edges_size += 15; + } + + if constexpr (kHeapProfiling) { + heap_profiler::HeapProfiler::global().record_alloc( + wrapped_compressed_edges.get(), compressed_edges_size + ); + } + + StaticArray raw_compressed_edges( + compressed_edges_size, std::move(wrapped_compressed_edges) + ); + CompressedEdges compressed_edges(_num_edges, std::move(raw_compressed_edges)); + + return std::make_tuple(std::move(_nodes), std::move(compressed_edges), std::move(_edge_weights)); +} + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h new file mode 100644 index 00000000..80ea25ce --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h @@ -0,0 +1,70 @@ +/******************************************************************************* + * Sequential builder for distributed compressed graphs. + * + * @file: distributed_compressed_graph_builder.h + * @author: Daniel Salwasser + * @date: 07.06.2024 + ******************************************************************************/ +#pragma once + +#include + +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" +#include "kaminpar-dist/dkaminpar.h" + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/graph-compression/compressed_edges_builder.h" + +namespace kaminpar::dist { + +/*! + * A sequential builder that constructs compressed graphs. + */ +class DistributedCompressedGraphBuilder { +public: + [[nodiscard]] static DistributedCompressedGraph compress(const DistributedCSRGraph &graph); + + /*! + * Constructs a new DistributedCompressedGraphBuilder. + * + * @param num_nodes The number of nodes of the graph to compress. + * @param num_edges The number of edges of the graph to compress. + * @param has_node_weights Whether node weights are stored. + * @param has_edge_weights Whether edge weights are stored. + * @param sorted Whether the nodes to add are stored in degree-bucket order. + */ + DistributedCompressedGraphBuilder( + const NodeID num_nodes, + const EdgeID num_edges, + const bool has_node_weights, + const bool has_edge_weights, + const bool sorted + ); + + /*! + * Adds a node to the compressed graph. Note that the neighbourhood vector is modified. + * + * @param node The node to add. + * @param neighbourhood The neighbourhood of the node to add. + */ + void add_node(const NodeID node, std::vector> &neighbourhood); + + /*! + * Builds the compressed graph. The builder must then be reinitialized in order to compress + * another graph. + * + * @return The components of the compressed graph that has been build. + */ + std::tuple, CompressedEdges, StaticArray> build(); + +private: + bool _sorted; // Whether the nodes of the graph are stored in degree-bucket order + StaticArray _nodes; + + EdgeID _num_edges; + CompressedEdgesBuilder _compressed_edges_builder; + StaticArray _edge_weights; +}; + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_csr_graph.cc b/kaminpar-dist/datastructures/distributed_csr_graph.cc new file mode 100644 index 00000000..6c2f73b9 --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_csr_graph.cc @@ -0,0 +1,163 @@ +/******************************************************************************* + * Static distributed graph data structure. + * + * @file: distributed_csr_graph.cc + * @author: Daniel Seemaier + * @date: 27.10.2021 + ******************************************************************************/ +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" + +#include +#include + +#include "kaminpar-mpi/wrapper.h" + +#include "kaminpar-dist/graphutils/communication.h" +#include "kaminpar-dist/logger.h" + +#include "kaminpar-common/datastructures/marker.h" +#include "kaminpar-common/datastructures/scalable_vector.h" +#include "kaminpar-common/math.h" +#include "kaminpar-common/parallel/algorithm.h" +#include "kaminpar-common/parallel/vector_ets.h" + +namespace kaminpar::dist { +void DistributedCSRGraph::init_high_degree_info(const EdgeID high_degree_threshold) const { + if (_high_degree_threshold == high_degree_threshold) { + return; + } + + _high_degree_threshold = high_degree_threshold; + _high_degree_ghost_node.resize(ghost_n()); + + struct Message { + NodeID node; + std::uint8_t high_degree; + }; + + mpi::graph::sparse_alltoall_interface_to_pe( + *this, + [&](const NodeID u) -> Message { + return {.node = u, .high_degree = degree(u) > _high_degree_threshold}; + }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto &[remote_node, high_degree] = recv_buffer[i]; + const NodeID local_node = map_remote_node(remote_node, pe); + _high_degree_ghost_node[local_node - n()] = high_degree; + }); + } + ); +} + +namespace { +inline EdgeID degree_bucket(const EdgeID degree) { + return (degree == 0) ? 0 : math::floor_log2(degree) + 1; +} +} // namespace + +void DistributedCSRGraph::init_degree_buckets() { + KASSERT(std::all_of(_buckets.begin(), _buckets.end(), [](const auto n) { return n == 0; })); + + if (_sorted) { + parallel::vector_ets buckets_ets(_buckets.size()); + tbb::parallel_for(tbb::blocked_range(0, n()), [&](const auto &r) { + auto &buckets = buckets_ets.local(); + for (NodeID u = r.begin(); u != r.end(); ++u) { + auto bucket = degree_bucket(degree(u)) + 1; + ++buckets[bucket]; + } + }); + const auto buckets = buckets_ets.combine(std::plus{}); + std::copy(buckets.begin(), buckets.end(), _buckets.begin()); + + auto last_nonempty_bucket = + std::find_if(_buckets.rbegin(), _buckets.rend(), [](const auto n) { return n > 0; }); + _number_of_buckets = std::distance(_buckets.begin(), (last_nonempty_bucket + 1).base()); + } else { + _buckets[1] = n(); + _number_of_buckets = 1; + } + + std::partial_sum(_buckets.begin(), _buckets.end(), _buckets.begin()); +} + +void DistributedCSRGraph::init_total_weights() { + if (is_node_weighted()) { + const auto begin_node_weights = _node_weights.begin(); + const auto end_node_weights = begin_node_weights + static_cast(n()); + + _total_node_weight = parallel::accumulate(begin_node_weights, end_node_weights, 0); + _max_node_weight = parallel::max_element(begin_node_weights, end_node_weights); + } else { + _total_node_weight = n(); + _max_node_weight = 1; + } + + if (is_edge_weighted()) { + _total_edge_weight = parallel::accumulate(_edge_weights.begin(), _edge_weights.end(), 0); + } else { + _total_edge_weight = m(); + } + + _global_total_node_weight = + mpi::allreduce(_total_node_weight, MPI_SUM, communicator()); + _global_max_node_weight = + mpi::allreduce(_max_node_weight, MPI_MAX, communicator()); + _global_total_edge_weight = + mpi::allreduce(_total_edge_weight, MPI_SUM, communicator()); +} + +void DistributedCSRGraph::init_communication_metrics() { + const PEID size = mpi::get_comm_size(_communicator); + + tbb::enumerable_thread_specific> edge_cut_to_pe_ets{[&] { + return std::vector(size); + }}; + tbb::enumerable_thread_specific> comm_vol_to_pe_ets{[&] { + return std::vector(size); + }}; + + pfor_nodes_range([&](const auto r) { + auto &edge_cut_to_pe = edge_cut_to_pe_ets.local(); + auto &comm_vol_to_pe = comm_vol_to_pe_ets.local(); + Marker<> counted_pe{static_cast(size)}; + + for (NodeID u = r.begin(); u < r.end(); ++u) { + adjacent_nodes(u, [&](const NodeID v) { + if (is_ghost_node(v)) { + const PEID owner = ghost_owner(v); + KASSERT(static_cast(owner) < edge_cut_to_pe.size()); + ++edge_cut_to_pe[owner]; + + if (!counted_pe.get(owner)) { + KASSERT(static_cast(owner) < counted_pe.size()); + counted_pe.set(owner); + + KASSERT(static_cast(owner) < comm_vol_to_pe.size()); + ++comm_vol_to_pe[owner]; + } + } + }); + counted_pe.reset(); + } + }); + + _edge_cut_to_pe.clear(); + _edge_cut_to_pe.resize(size); + for (const auto &edge_cut_to_pe : edge_cut_to_pe_ets) { // PE x THREADS + for (std::size_t i = 0; i < edge_cut_to_pe.size(); ++i) { + _edge_cut_to_pe[i] += edge_cut_to_pe[i]; + } + } + + _comm_vol_to_pe.clear(); + _comm_vol_to_pe.resize(size); + for (const auto &comm_vol_to_pe : comm_vol_to_pe_ets) { + for (std::size_t i = 0; i < comm_vol_to_pe.size(); ++i) { + _comm_vol_to_pe[i] += comm_vol_to_pe[i]; + } + } +} + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_csr_graph.h b/kaminpar-dist/datastructures/distributed_csr_graph.h new file mode 100644 index 00000000..ae305672 --- /dev/null +++ b/kaminpar-dist/datastructures/distributed_csr_graph.h @@ -0,0 +1,632 @@ +/******************************************************************************* + * Static distributed CSR graph data structure. + * + * @file: distributed_csr_graph.h + * @author: Daniel Seemaier + * @date: 27.10.2021 + ******************************************************************************/ +#pragma once + +#include +#include + +#include "kaminpar-mpi/utils.h" + +#include "kaminpar-dist/datastructures/abstract_distributed_graph.h" +#include "kaminpar-dist/datastructures/growt.h" +#include "kaminpar-dist/dkaminpar.h" + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/degree_buckets.h" +#include "kaminpar-common/ranges.h" + +namespace kaminpar::dist { + +class DistributedCSRGraph : public AbstractDistributedGraph { +public: + // Data types used for this graph + using AbstractDistributedGraph::EdgeID; + using AbstractDistributedGraph::EdgeWeight; + using AbstractDistributedGraph::GlobalEdgeID; + using AbstractDistributedGraph::GlobalEdgeWeight; + using AbstractDistributedGraph::GlobalNodeID; + using AbstractDistributedGraph::GlobalNodeWeight; + using AbstractDistributedGraph::NodeID; + using AbstractDistributedGraph::NodeWeight; + + DistributedCSRGraph() = default; + + DistributedCSRGraph( + StaticArray node_distribution, + StaticArray edge_distribution, + StaticArray nodes, + StaticArray edges, + StaticArray ghost_owner, + StaticArray ghost_to_global, + growt::StaticGhostNodeMapping global_to_ghost, + const bool sorted, + MPI_Comm comm + ) + : DistributedCSRGraph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(edges), + {}, + {}, + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + sorted, + comm + ) {} + + DistributedCSRGraph( + StaticArray node_distribution, + StaticArray edge_distribution, + StaticArray nodes, + StaticArray edges, + StaticArray node_weights, + StaticArray edge_weights, + StaticArray ghost_owner, + StaticArray ghost_to_global, + growt::StaticGhostNodeMapping global_to_ghost, + const bool sorted, + MPI_Comm comm + ) + : _node_distribution(std::move(node_distribution)), + _edge_distribution(std::move(edge_distribution)), + _nodes(std::move(nodes)), + _edges(std::move(edges)), + _node_weights(std::move(node_weights)), + _edge_weights(std::move(edge_weights)), + _ghost_owner(std::move(ghost_owner)), + _ghost_to_global(std::move(ghost_to_global)), + _global_to_ghost(std::move(global_to_ghost)), + _sorted(sorted), + _communicator(comm) { + const PEID rank = mpi::get_comm_rank(communicator()); + + _n = _nodes.size() - 1; + _m = _edges.size(); + _ghost_n = _ghost_to_global.size(); + _offset_n = _node_distribution[rank]; + _offset_m = _edge_distribution[rank]; + _global_n = _node_distribution.back(); + _global_m = _edge_distribution.back(); + + init_total_weights(); + init_communication_metrics(); + init_degree_buckets(); + } + + DistributedCSRGraph(const DistributedCSRGraph &) = delete; + DistributedCSRGraph &operator=(const DistributedCSRGraph &) = delete; + + DistributedCSRGraph(DistributedCSRGraph &&) noexcept = default; + DistributedCSRGraph &operator=(DistributedCSRGraph &&) noexcept = default; + + ~DistributedCSRGraph() override = default; + + // + // Size of the graph + // + + [[nodiscard]] inline GlobalNodeID global_n() const final { + return _global_n; + } + + [[nodiscard]] inline GlobalEdgeID global_m() const final { + return _global_m; + } + + [[nodiscard]] inline NodeID n() const final { + return _n; + } + + [[nodiscard]] inline NodeID n(const PEID pe) const final { + KASSERT(pe < static_cast(_node_distribution.size())); + return _node_distribution[pe + 1] - _node_distribution[pe]; + } + + [[nodiscard]] inline NodeID ghost_n() const final { + return _ghost_n; + } + + [[nodiscard]] inline NodeID total_n() const final { + return ghost_n() + n(); + } + + [[nodiscard]] inline EdgeID m() const final { + return _m; + } + + [[nodiscard]] inline EdgeID m(const PEID pe) const final { + KASSERT(pe < static_cast(_edge_distribution.size())); + return _edge_distribution[pe + 1] - _edge_distribution[pe]; + } + + [[nodiscard]] inline GlobalNodeID offset_n() const final { + return _offset_n; + } + + [[nodiscard]] inline GlobalNodeID offset_n(const PEID pe) const final { + return _node_distribution[pe]; + } + + [[nodiscard]] inline GlobalEdgeID offset_m() const final { + return _offset_m; + } + + [[nodiscard]] inline GlobalEdgeID offset_m(const PEID pe) const final { + return _edge_distribution[pe]; + } + + // + // Node and edge weights + // + + [[nodiscard]] inline bool is_node_weighted() const final { + return !_node_weights.empty(); + } + + [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const final { + return is_node_weighted() ? _node_weights[u] : 1; + } + + [[nodiscard]] inline NodeWeight max_node_weight() const final { + return _max_node_weight; + } + + [[nodiscard]] inline NodeWeight global_max_node_weight() const final { + return _global_max_node_weight; + } + + [[nodiscard]] inline NodeWeight total_node_weight() const final { + return _total_node_weight; + } + + [[nodiscard]] inline GlobalNodeWeight global_total_node_weight() const final { + return _global_total_node_weight; + } + + [[nodiscard]] inline bool is_edge_weighted() const final { + return !_edge_weights.empty(); + } + + [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { + return is_edge_weighted() ? _edge_weights[e] : 1; + } + + [[nodiscard]] inline EdgeWeight total_edge_weight() const final { + return _total_edge_weight; + } + + [[nodiscard]] inline GlobalEdgeWeight global_total_edge_weight() const final { + return _global_total_edge_weight; + } + + // + // Node ownership + // + + [[nodiscard]] inline bool is_owned_global_node(const GlobalNodeID global_u) const final { + return (offset_n() <= global_u && global_u < offset_n() + n()); + } + + [[nodiscard]] inline bool contains_global_node(const GlobalNodeID global_u) const final { + return is_owned_global_node(global_u) || + (_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); + } + + [[nodiscard]] inline bool contains_local_node(const NodeID local_u) const final { + return local_u < total_n(); + } + + // + // Node type + // + + [[nodiscard]] inline bool is_ghost_node(const NodeID u) const final { + KASSERT(u < total_n()); + return u >= n(); + } + + [[nodiscard]] inline bool is_owned_node(const NodeID u) const final { + KASSERT(u < total_n()); + return u < n(); + } + + [[nodiscard]] inline PEID ghost_owner(const NodeID u) const final { + KASSERT(is_ghost_node(u)); + KASSERT(u - n() < _ghost_owner.size()); + KASSERT(_ghost_owner[u - n()] >= 0); + KASSERT(_ghost_owner[u - n()] < mpi::get_comm_size(communicator())); + return _ghost_owner[u - n()]; + } + + [[nodiscard]] inline NodeID + map_remote_node(const NodeID their_lnode, const PEID owner) const final { + const auto gnode = static_cast(their_lnode + offset_n(owner)); + return global_to_local_node(gnode); + } + + [[nodiscard]] inline GlobalNodeID local_to_global_node(const NodeID local_u) const final { + KASSERT(contains_local_node(local_u)); + return is_owned_node(local_u) ? _offset_n + local_u : _ghost_to_global[local_u - n()]; + } + + [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const final { + KASSERT(contains_global_node(global_u)); + + if (offset_n() <= global_u && global_u < offset_n() + n()) { + return global_u - offset_n(); + } else { + KASSERT(_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); + return (*_global_to_ghost.find(global_u + 1)).second; + } + } + + // + // Iterators for nodes / edges + // + + [[nodiscard]] inline IotaRange nodes(const NodeID from, const NodeID to) const final { + return {from, to}; + } + [[nodiscard]] inline IotaRange nodes() const final { + return nodes(0, n()); + } + [[nodiscard]] inline IotaRange ghost_nodes() const final { + return {n(), total_n()}; + } + [[nodiscard]] inline IotaRange all_nodes() const final { + return {static_cast(0), total_n()}; + } + [[nodiscard]] inline IotaRange edges() const final { + return {static_cast(0), m()}; + } + [[nodiscard]] inline IotaRange incident_edges(const NodeID u) const final { + return {_nodes[u], _nodes[u + 1]}; + } + + // + // Graph operations + // + + template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { + constexpr bool non_stoppable = std::is_invocable_r_v; + static_assert(non_stoppable || std::is_invocable_r_v); + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (non_stoppable) { + l(_edges[edge]); + } else { + if (l(_edges[edge])) { + return; + } + } + } + } + + template inline void neighbors(const NodeID u, Lambda &&l) const { + constexpr bool non_stoppable = std::is_invocable_r_v; + static_assert(non_stoppable || std::is_invocable_r_v); + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (non_stoppable) { + l(edge, _edges[edge]); + } else { + if (l(edge, _edges[edge])) { + return; + } + } + } + } + + template + inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { + constexpr bool non_stoppable = std::is_invocable_r_v; + static_assert(non_stoppable || std::is_invocable_r_v); + + const EdgeID from = _nodes[u]; + const EdgeID degree = _nodes[u + 1] - from; + const EdgeID to = from + std::min(degree, max_num_neighbors); + + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (non_stoppable) { + l(edge, _edges[edge]); + } else { + if (l(edge, _edges[edge])) { + return; + } + } + } + } + + // + // Parallel iteration + // + + template + inline void pfor_nodes(const NodeID from, const NodeID to, Lambda &&l) const { + tbb::parallel_for(from, to, std::forward(l)); + } + + template + inline void pfor_nodes_range(const NodeID from, const NodeID to, Lambda &&l) const { + tbb::parallel_for(tbb::blocked_range(from, to), std::forward(l)); + } + + template inline void pfor_ghost_nodes(Lambda &&l) const { + pfor_nodes(n(), total_n(), std::forward(l)); + } + + template inline void pfor_nodes(Lambda &&l) const { + pfor_nodes(0, n(), std::forward(l)); + } + + template inline void pfor_all_nodes(Lambda &&l) const { + pfor_nodes(0, total_n(), std::forward(l)); + } + + template inline void pfor_nodes_range(Lambda &&l) const { + pfor_nodes_range(0, n(), std::forward(l)); + } + + template inline void pfor_all_nodes_range(Lambda &&l) const { + pfor_nodes_range(0, total_n(), std::forward(l)); + } + + template inline void pfor_edges(Lambda &&l) const { + tbb::parallel_for(0, m(), [&](const EdgeID e) { l(e, _edges[e]); }); + } + + // + // Access methods + // + + [[nodiscard]] inline NodeID degree(const NodeID u) const final { + KASSERT(is_owned_node(u)); + return _nodes[u + 1] - _nodes[u]; + } + + [[nodiscard]] inline const StaticArray &node_weights() const final { + return _node_weights; + } + + [[nodiscard]] inline const StaticArray &edge_weights() const final { + return _edge_weights; + } + + inline void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) final { + KASSERT(is_ghost_node(ghost_node)); + KASSERT(is_node_weighted()); + _node_weights[ghost_node] = weight; + } + + [[nodiscard]] inline const StaticArray &node_distribution() const final { + return _node_distribution; + } + + [[nodiscard]] inline GlobalNodeID node_distribution(const PEID pe) const final { + KASSERT(static_cast(pe) < _node_distribution.size()); + return _node_distribution[pe]; + } + + [[nodiscard]] inline PEID find_owner_of_global_node(const GlobalNodeID u) const final { + KASSERT(u < global_n()); + auto it = std::upper_bound(_node_distribution.begin() + 1, _node_distribution.end(), u); + KASSERT(it != _node_distribution.end()); + return static_cast(std::distance(_node_distribution.begin(), it) - 1); + } + + [[nodiscard]] inline const StaticArray &edge_distribution() const final { + return _edge_distribution; + } + + [[nodiscard]] inline GlobalEdgeID edge_distribution(const PEID pe) const final { + KASSERT(static_cast(pe) < _edge_distribution.size()); + return _edge_distribution[pe]; + } + + // + // Cached inter-PE metrics + // + + [[nodiscard]] inline EdgeID edge_cut_to_pe(const PEID pe) const final { + KASSERT(static_cast(pe) < _edge_cut_to_pe.size()); + return _edge_cut_to_pe[pe]; + } + + [[nodiscard]] inline EdgeID comm_vol_to_pe(const PEID pe) const final { + KASSERT(static_cast(pe) < _comm_vol_to_pe.size()); + return _comm_vol_to_pe[pe]; + } + + [[nodiscard]] inline MPI_Comm communicator() const final { + return _communicator; + } + + // + // High degree classification + // + + void init_high_degree_info(const EdgeID high_degree_threshold) const final; + + [[nodiscard]] bool is_high_degree_node(const NodeID node) const final { + KASSERT(_high_degree_ghost_node.size() == ghost_n()); + KASSERT(!is_ghost_node(node) || node - n() < _high_degree_ghost_node.size()); + return is_ghost_node(node) ? _high_degree_ghost_node[node - n()] + : degree(node) > _high_degree_threshold; + } + + // + // Graph permutation + // + + void set_permutation(StaticArray permutation) final { + _permutation = std::move(permutation); + } + + [[nodiscard]] inline bool permuted() const final { + return !_permutation.empty(); + } + + [[nodiscard]] inline NodeID map_original_node(const NodeID u) const final { + KASSERT(permuted()); + KASSERT(u < _permutation.size()); + return _permutation[u]; + } + + // + // Degree buckets + // + + [[nodiscard]] inline bool sorted() const final { + return _sorted; + } + + [[nodiscard]] inline std::size_t number_of_buckets() const final { + return _number_of_buckets; + } + + [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const final { + return _buckets[bucket + 1] - _buckets[bucket]; + } + + [[nodiscard]] inline NodeID first_node_in_bucket(const std::size_t bucket) const final { + return _buckets[bucket]; + } + + [[nodiscard]] inline NodeID first_invalid_node_in_bucket(const std::size_t bucket) const final { + return first_node_in_bucket(bucket + 1); + } + + // + // Graph permutation by coloring + // + + inline void set_color_sorted(StaticArray color_sizes) final { + KASSERT(color_sizes.front() == 0u); + KASSERT(color_sizes.back() == n()); + _color_sizes = std::move(color_sizes); + } + + [[nodiscard]] inline bool color_sorted() const final { + return !_color_sizes.empty(); + } + + [[nodiscard]] inline std::size_t number_of_colors() const final { + return _color_sizes.size() - 1; + } + + [[nodiscard]] inline NodeID color_size(const std::size_t c) const final { + KASSERT(c < number_of_colors()); + return _color_sizes[c + 1] - _color_sizes[c]; + } + + [[nodiscard]] inline const StaticArray &get_color_sizes() const final { + return _color_sizes; + } + + // + // Functions to access/steal raw members of this graph + // + + [[nodiscard]] const auto &raw_nodes() const { + return _nodes; + } + [[nodiscard]] const auto &raw_node_weights() const { + return _node_weights; + } + [[nodiscard]] const auto &raw_edges() const { + return _edges; + } + [[nodiscard]] const auto &raw_edge_weights() const { + return _edge_weights; + } + + auto &&take_node_distribution() { + return std::move(_node_distribution); + } + auto &&take_edge_distribution() { + return std::move(_edge_distribution); + } + auto &&take_nodes() { + return std::move(_nodes); + } + auto &&take_edges() { + return std::move(_edges); + } + auto &&take_node_weights() { + return std::move(_node_weights); + } + auto &&take_edge_weights() { + return std::move(_edge_weights); + } + auto &&take_ghost_owner() { + return std::move(_ghost_owner); + } + auto &&take_ghost_to_global() { + return std::move(_ghost_to_global); + } + auto &&take_global_to_ghost() { + return std::move(_global_to_ghost); + } + +private: + void init_degree_buckets(); + void init_total_weights(); + void init_communication_metrics(); + + NodeID _n; + EdgeID _m; + NodeID _ghost_n; + GlobalNodeID _offset_n; + GlobalEdgeID _offset_m; + GlobalNodeID _global_n; + GlobalEdgeID _global_m; + + NodeWeight _total_node_weight{}; + GlobalNodeWeight _global_total_node_weight{}; + NodeWeight _max_node_weight{}; + NodeWeight _global_max_node_weight{}; + + EdgeWeight _total_edge_weight{}; + GlobalEdgeWeight _global_total_edge_weight{}; + + StaticArray _node_distribution{}; + StaticArray _edge_distribution{}; + + StaticArray _nodes{}; + StaticArray _edges{}; + StaticArray _node_weights{}; + StaticArray _edge_weights{}; + + StaticArray _ghost_owner{}; + StaticArray _ghost_to_global{}; + growt::StaticGhostNodeMapping _global_to_ghost{}; + + // mutable for lazy initialization + mutable StaticArray _high_degree_ghost_node{}; + mutable EdgeID _high_degree_threshold = 0; + + std::vector _edge_cut_to_pe{}; + std::vector _comm_vol_to_pe{}; + + StaticArray _permutation; + bool _sorted = false; + std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); + std::size_t _number_of_buckets = 0; + + StaticArray _color_sizes{}; + + MPI_Comm _communicator; +}; + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_graph.cc b/kaminpar-dist/datastructures/distributed_graph.cc index 52bb1b02..5f8af086 100644 --- a/kaminpar-dist/datastructures/distributed_graph.cc +++ b/kaminpar-dist/datastructures/distributed_graph.cc @@ -1,5 +1,9 @@ /******************************************************************************* - * Static distributed graph data structure. + * Wrapper class that delegates all function calls to a concrete graph object. + * + * Most function calls are resolved via dynamic binding. Thus, they should not + * be used when performance is critical. Instead, use an downcast and templatize + * tight loops. * * @file: distributed_graph.cc * @author: Daniel Seemaier @@ -21,144 +25,13 @@ #include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/parallel/vector_ets.h" -namespace kaminpar::dist { -void DistributedGraph::init_high_degree_info(const EdgeID high_degree_threshold) const { - if (_high_degree_threshold == high_degree_threshold) { - return; - } - - _high_degree_threshold = high_degree_threshold; - _high_degree_ghost_node.resize(ghost_n()); - - struct Message { - NodeID node; - std::uint8_t high_degree; - }; - - mpi::graph::sparse_alltoall_interface_to_pe( - *this, - [&](const NodeID u) -> Message { - return {.node = u, .high_degree = degree(u) > _high_degree_threshold}; - }, - [&](const auto &recv_buffer, const PEID pe) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto &[remote_node, high_degree] = recv_buffer[i]; - const NodeID local_node = map_remote_node(remote_node, pe); - _high_degree_ghost_node[local_node - n()] = high_degree; - }); - } - ); -} - namespace { -inline EdgeID degree_bucket(const EdgeID degree) { - return (degree == 0) ? 0 : math::floor_log2(degree) + 1; +template bool all_equal(const R &r) { + return std::adjacent_find(r.begin(), r.end(), std::not_equal_to{}) == r.end(); } } // namespace -void DistributedGraph::init_degree_buckets() { - KASSERT(std::all_of(_buckets.begin(), _buckets.end(), [](const auto n) { return n == 0; })); - - if (_sorted) { - parallel::vector_ets buckets_ets(_buckets.size()); - tbb::parallel_for(tbb::blocked_range(0, n()), [&](const auto &r) { - auto &buckets = buckets_ets.local(); - for (NodeID u = r.begin(); u != r.end(); ++u) { - auto bucket = degree_bucket(degree(u)) + 1; - ++buckets[bucket]; - } - }); - const auto buckets = buckets_ets.combine(std::plus{}); - std::copy(buckets.begin(), buckets.end(), _buckets.begin()); - - auto last_nonempty_bucket = - std::find_if(_buckets.rbegin(), _buckets.rend(), [](const auto n) { return n > 0; }); - _number_of_buckets = std::distance(_buckets.begin(), (last_nonempty_bucket + 1).base()); - } else { - _buckets[1] = n(); - _number_of_buckets = 1; - } - - std::partial_sum(_buckets.begin(), _buckets.end(), _buckets.begin()); -} - -void DistributedGraph::init_total_weights() { - if (is_node_weighted()) { - const auto begin_node_weights = _node_weights.begin(); - const auto end_node_weights = begin_node_weights + static_cast(n()); - - _total_node_weight = parallel::accumulate(begin_node_weights, end_node_weights, 0); - _max_node_weight = parallel::max_element(begin_node_weights, end_node_weights); - } else { - _total_node_weight = n(); - _max_node_weight = 1; - } - - if (is_edge_weighted()) { - _total_edge_weight = parallel::accumulate(_edge_weights.begin(), _edge_weights.end(), 0); - } else { - _total_edge_weight = m(); - } - - _global_total_node_weight = - mpi::allreduce(_total_node_weight, MPI_SUM, communicator()); - _global_max_node_weight = - mpi::allreduce(_max_node_weight, MPI_MAX, communicator()); - _global_total_edge_weight = - mpi::allreduce(_total_edge_weight, MPI_SUM, communicator()); -} - -void DistributedGraph::init_communication_metrics() { - const PEID size = mpi::get_comm_size(_communicator); - - tbb::enumerable_thread_specific> edge_cut_to_pe_ets{[&] { - return std::vector(size); - }}; - tbb::enumerable_thread_specific> comm_vol_to_pe_ets{[&] { - return std::vector(size); - }}; - - pfor_nodes_range([&](const auto r) { - auto &edge_cut_to_pe = edge_cut_to_pe_ets.local(); - auto &comm_vol_to_pe = comm_vol_to_pe_ets.local(); - Marker<> counted_pe{static_cast(size)}; - - for (NodeID u = r.begin(); u < r.end(); ++u) { - for (const auto v : adjacent_nodes(u)) { - if (is_ghost_node(v)) { - const PEID owner = ghost_owner(v); - KASSERT(static_cast(owner) < edge_cut_to_pe.size()); - ++edge_cut_to_pe[owner]; - - if (!counted_pe.get(owner)) { - KASSERT(static_cast(owner) < counted_pe.size()); - counted_pe.set(owner); - - KASSERT(static_cast(owner) < comm_vol_to_pe.size()); - ++comm_vol_to_pe[owner]; - } - } - } - counted_pe.reset(); - } - }); - - _edge_cut_to_pe.clear(); - _edge_cut_to_pe.resize(size); - for (const auto &edge_cut_to_pe : edge_cut_to_pe_ets) { // PE x THREADS - for (std::size_t i = 0; i < edge_cut_to_pe.size(); ++i) { - _edge_cut_to_pe[i] += edge_cut_to_pe[i]; - } - } - - _comm_vol_to_pe.clear(); - _comm_vol_to_pe.resize(size); - for (const auto &comm_vol_to_pe : comm_vol_to_pe_ets) { - for (std::size_t i = 0; i < comm_vol_to_pe.size(); ++i) { - _comm_vol_to_pe[i] += comm_vol_to_pe[i]; - } - } -} +namespace kaminpar::dist { void print_graph_summary(const DistributedGraph &graph) { const auto [n_min, n_avg, n_max, n_sum] = mpi::gather_statistics(graph.n(), graph.communicator()); @@ -202,12 +75,12 @@ void print_graph(const DistributedGraph &graph) { if (graph.is_owned_node(u)) { buf << " | "; - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { const char v_prefix = graph.is_owned_node(v) ? ' ' : '!'; buf << v_prefix << "L" << std::setw(w) << v << " G" << std::setw(w) << graph.local_to_global_node(v) << " EW" << std::setw(w) << graph.edge_weight(e) << " NW" << std::setw(w) << graph.node_weight(v) << "\t"; - } + }); if (graph.degree(u) == 0) { buf << ""; } @@ -232,13 +105,13 @@ void print_local_graph_stats(const DistributedGraph &graph) { EdgeID local_m = 0, nonlocal_m = 0; NodeID min_deg = std::numeric_limits::max(), max_deg = 0; for (NodeID u = 0; u < graph.n(); ++u) { - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (graph.is_owned_node(v)) { ++local_m; } else { ++nonlocal_m; } - } + }); if (graph.degree(u) == 0) { ++buckets[0]; } else { @@ -261,12 +134,6 @@ void print_local_graph_stats(const DistributedGraph &graph) { DLOG << ss.str(); } -namespace { -template bool all_equal(const R &r) { - return std::adjacent_find(r.begin(), r.end(), std::not_equal_to{}) == r.end(); -} -} // namespace - bool validate_graph(const DistributedGraph &graph) { MPI_Comm comm = graph.communicator(); @@ -431,21 +298,24 @@ bool validate_graph(const DistributedGraph &graph) { } bool found = false; - for (const auto v : graph.adjacent_nodes(local_owned_node)) { + graph.adjacent_nodes(local_owned_node, [&](const NodeID v) { if (v == local_ghost_node) { found = true; - break; + return true; } - } + + return false; + }); if (!found) { LOG_ERROR << "PE " << pe << " expects a local edge " << local_owned_node << " (owned, global node " << owned << ") --> " << local_ghost_node << " (ghost, global node " << ghost << ") on this PE, but the edge does not exist"; LOG_ERROR << "Outgoing edges from local node " << local_owned_node << " are:"; - for (const auto v : graph.adjacent_nodes(local_owned_node)) { + + graph.adjacent_nodes(local_owned_node, [&](const NodeID v) { LOG_ERROR << "\t- " << v << " (global " << graph.local_to_global_node(v) << ")"; - } + }); return false; } } diff --git a/kaminpar-dist/datastructures/distributed_graph.h b/kaminpar-dist/datastructures/distributed_graph.h index 7b2a7b40..1bb1655b 100644 --- a/kaminpar-dist/datastructures/distributed_graph.h +++ b/kaminpar-dist/datastructures/distributed_graph.h @@ -1,5 +1,9 @@ /******************************************************************************* - * Static distributed graph data structure. + * Wrapper class that delegates all function calls to a concrete graph object. + * + * Most function calls are resolved via dynamic binding. Thus, they should not + * be used when performance is critical. Instead, use an downcast and templatize + * tight loops. * * @file: distributed_graph.h * @author: Daniel Seemaier @@ -8,12 +12,14 @@ #pragma once #include +#include #include -#include - #include "kaminpar-mpi/utils.h" +#include "kaminpar-dist/datastructures/abstract_distributed_graph.h" +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/growt.h" #include "kaminpar-dist/dkaminpar.h" @@ -22,83 +28,23 @@ #include "kaminpar-common/ranges.h" namespace kaminpar::dist { -class DistributedGraph { + +class DistributedGraph : public AbstractDistributedGraph { public: // Data types used for this graph - using NodeID = dist::NodeID; - using EdgeID = dist::EdgeID; - using GlobalNodeID = dist::GlobalNodeID; - using GlobalEdgeID = dist::GlobalEdgeID; - using NodeWeight = dist::NodeWeight; - using EdgeWeight = dist::EdgeWeight; - using GlobalNodeWeight = dist::GlobalNodeWeight; - using GlobalEdgeWeight = dist::GlobalEdgeWeight; + using AbstractDistributedGraph::EdgeID; + using AbstractDistributedGraph::EdgeWeight; + using AbstractDistributedGraph::GlobalEdgeID; + using AbstractDistributedGraph::GlobalEdgeWeight; + using AbstractDistributedGraph::GlobalNodeID; + using AbstractDistributedGraph::GlobalNodeWeight; + using AbstractDistributedGraph::NodeID; + using AbstractDistributedGraph::NodeWeight; DistributedGraph() = default; - DistributedGraph( - StaticArray node_distribution, - StaticArray edge_distribution, - StaticArray nodes, - StaticArray edges, - StaticArray ghost_owner, - StaticArray ghost_to_global, - growt::StaticGhostNodeMapping global_to_ghost, - const bool sorted, - MPI_Comm comm - ) - : DistributedGraph( - std::move(node_distribution), - std::move(edge_distribution), - std::move(nodes), - std::move(edges), - {}, - {}, - std::move(ghost_owner), - std::move(ghost_to_global), - std::move(global_to_ghost), - sorted, - comm - ) {} - - DistributedGraph( - StaticArray node_distribution, - StaticArray edge_distribution, - StaticArray nodes, - StaticArray edges, - StaticArray node_weights, - StaticArray edge_weights, - StaticArray ghost_owner, - StaticArray ghost_to_global, - growt::StaticGhostNodeMapping global_to_ghost, - const bool sorted, - MPI_Comm comm - ) - : _node_distribution(std::move(node_distribution)), - _edge_distribution(std::move(edge_distribution)), - _nodes(std::move(nodes)), - _edges(std::move(edges)), - _node_weights(std::move(node_weights)), - _edge_weights(std::move(edge_weights)), - _ghost_owner(std::move(ghost_owner)), - _ghost_to_global(std::move(ghost_to_global)), - _global_to_ghost(std::move(global_to_ghost)), - _sorted(sorted), - _communicator(comm) { - const PEID rank = mpi::get_comm_rank(communicator()); - - _n = _nodes.size() - 1; - _m = _edges.size(); - _ghost_n = _ghost_to_global.size(); - _offset_n = _node_distribution[rank]; - _offset_m = _edge_distribution[rank]; - _global_n = _node_distribution.back(); - _global_m = _edge_distribution.back(); - - init_total_weights(); - init_communication_metrics(); - init_degree_buckets(); - } + DistributedGraph(std::unique_ptr graph) + : _underlying_graph(std::move(graph)) {} DistributedGraph(const DistributedGraph &) = delete; DistributedGraph &operator=(const DistributedGraph &) = delete; @@ -106,467 +52,396 @@ class DistributedGraph { DistributedGraph(DistributedGraph &&) noexcept = default; DistributedGraph &operator=(DistributedGraph &&) noexcept = default; + ~DistributedGraph() override = default; + + // + // Underlying graph + // + + [[nodiscard]] AbstractDistributedGraph *underlying_graph() { + return _underlying_graph.get(); + } + + [[nodiscard]] const AbstractDistributedGraph *underlying_graph() const { + return _underlying_graph.get(); + } + + [[nodiscard]] AbstractDistributedGraph *take_underlying_graph() { + return _underlying_graph.release(); + } + + // // Size of the graph - [[nodiscard]] inline GlobalNodeID global_n() const { - return _global_n; + // + + [[nodiscard]] inline GlobalNodeID global_n() const final { + return _underlying_graph->global_n(); } - [[nodiscard]] inline GlobalEdgeID global_m() const { - return _global_m; + [[nodiscard]] inline GlobalEdgeID global_m() const final { + return _underlying_graph->global_m(); } - [[nodiscard]] inline NodeID n() const { - return _n; + [[nodiscard]] inline NodeID n() const final { + return _underlying_graph->n(); } - [[nodiscard]] inline NodeID n(const PEID pe) const { - KASSERT(pe < static_cast(_node_distribution.size())); - return _node_distribution[pe + 1] - _node_distribution[pe]; + [[nodiscard]] inline NodeID n(const PEID pe) const final { + return _underlying_graph->n(pe); } - [[nodiscard]] inline NodeID ghost_n() const { - return _ghost_n; + [[nodiscard]] inline NodeID ghost_n() const final { + return _underlying_graph->ghost_n(); } - [[nodiscard]] inline NodeID total_n() const { - return ghost_n() + n(); + [[nodiscard]] inline NodeID total_n() const final { + return _underlying_graph->total_n(); } - [[nodiscard]] inline EdgeID m() const { - return _m; + [[nodiscard]] inline EdgeID m() const final { + return _underlying_graph->m(); } - [[nodiscard]] inline EdgeID m(const PEID pe) const { - KASSERT(pe < static_cast(_edge_distribution.size())); - return _edge_distribution[pe + 1] - _edge_distribution[pe]; + [[nodiscard]] inline EdgeID m(const PEID pe) const final { + return _underlying_graph->m(pe); } - [[nodiscard]] inline GlobalNodeID offset_n() const { - return _offset_n; + [[nodiscard]] inline GlobalNodeID offset_n() const final { + return _underlying_graph->offset_n(); } - [[nodiscard]] inline GlobalNodeID offset_n(const PEID pe) const { - return _node_distribution[pe]; + [[nodiscard]] inline GlobalNodeID offset_n(const PEID pe) const final { + return _underlying_graph->offset_n(pe); } - [[nodiscard]] inline GlobalEdgeID offset_m() const { - return _offset_m; + [[nodiscard]] inline GlobalEdgeID offset_m() const final { + return _underlying_graph->offset_m(); } - [[nodiscard]] inline GlobalEdgeID offset_m(const PEID pe) const { - return _edge_distribution[pe]; + [[nodiscard]] inline GlobalEdgeID offset_m(const PEID pe) const final { + return _underlying_graph->offset_m(pe); } + // // Node and edge weights - [[nodiscard]] inline bool is_node_weighted() const { - return !_node_weights.empty(); + // + + [[nodiscard]] inline bool is_node_weighted() const final { + return _underlying_graph->is_node_weighted(); } - [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const { - return is_node_weighted() ? _node_weights[u] : 1; + [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const final { + return _underlying_graph->node_weight(u); } - [[nodiscard]] inline NodeWeight max_node_weight() const { - return _max_node_weight; + [[nodiscard]] inline NodeWeight max_node_weight() const final { + return _underlying_graph->max_node_weight(); } - [[nodiscard]] inline NodeWeight global_max_node_weight() const { - return _global_max_node_weight; + [[nodiscard]] inline NodeWeight global_max_node_weight() const final { + return _underlying_graph->global_max_node_weight(); } - [[nodiscard]] inline NodeWeight total_node_weight() const { - return _total_node_weight; + [[nodiscard]] inline NodeWeight total_node_weight() const final { + return _underlying_graph->total_node_weight(); } - [[nodiscard]] inline GlobalNodeWeight global_total_node_weight() const { - return _global_total_node_weight; + [[nodiscard]] inline GlobalNodeWeight global_total_node_weight() const final { + return _underlying_graph->global_total_node_weight(); } - [[nodiscard]] inline bool is_edge_weighted() const { - return !_edge_weights.empty(); + [[nodiscard]] inline bool is_edge_weighted() const final { + return _underlying_graph->is_edge_weighted(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { - return is_edge_weighted() ? _edge_weights[e] : 1; + [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { + return _underlying_graph->edge_weight(e); } - [[nodiscard]] inline EdgeWeight total_edge_weight() const { - return _total_edge_weight; + [[nodiscard]] inline EdgeWeight total_edge_weight() const final { + return _underlying_graph->total_edge_weight(); } - [[nodiscard]] inline GlobalEdgeWeight global_total_edge_weight() const { - return _global_total_edge_weight; + [[nodiscard]] inline GlobalEdgeWeight global_total_edge_weight() const final { + return _underlying_graph->global_total_edge_weight(); } + // // Node ownership - [[nodiscard]] inline bool is_owned_global_node(const GlobalNodeID global_u) const { - return (offset_n() <= global_u && global_u < offset_n() + n()); + // + + [[nodiscard]] inline bool is_owned_global_node(const GlobalNodeID global_u) const final { + return _underlying_graph->is_owned_global_node(global_u); } - [[nodiscard]] inline bool contains_global_node(const GlobalNodeID global_u) const { - return is_owned_global_node(global_u) || - (_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); + [[nodiscard]] inline bool contains_global_node(const GlobalNodeID global_u) const final { + return _underlying_graph->contains_global_node(global_u); } - [[nodiscard]] inline bool contains_local_node(const NodeID local_u) const { - return local_u < total_n(); + [[nodiscard]] inline bool contains_local_node(const NodeID local_u) const final { + return _underlying_graph->contains_local_node(local_u); } + // // Node type - [[nodiscard]] inline bool is_ghost_node(const NodeID u) const { - KASSERT(u < total_n()); - return u >= n(); + // + + [[nodiscard]] inline bool is_ghost_node(const NodeID u) const final { + return _underlying_graph->is_ghost_node(u); } - [[nodiscard]] inline bool is_owned_node(const NodeID u) const { - KASSERT(u < total_n()); - return u < n(); + [[nodiscard]] inline bool is_owned_node(const NodeID u) const final { + return _underlying_graph->is_owned_node(u); } - [[nodiscard]] inline PEID ghost_owner(const NodeID u) const { - KASSERT(is_ghost_node(u)); - KASSERT(u - n() < _ghost_owner.size()); - KASSERT(_ghost_owner[u - n()] >= 0); - KASSERT(_ghost_owner[u - n()] < mpi::get_comm_size(communicator())); - return _ghost_owner[u - n()]; + [[nodiscard]] inline PEID ghost_owner(const NodeID u) const final { + return _underlying_graph->ghost_owner(u); } - [[nodiscard]] inline NodeID map_remote_node(const NodeID their_lnode, const PEID owner) const { - const GlobalNodeID gnode = static_cast(their_lnode + offset_n(owner)); - return global_to_local_node(gnode); + [[nodiscard]] inline NodeID + map_remote_node(const NodeID their_lnode, const PEID owner) const final { + return _underlying_graph->map_remote_node(their_lnode, owner); } - [[nodiscard]] inline GlobalNodeID local_to_global_node(const NodeID local_u) const { - KASSERT(contains_local_node(local_u)); - return is_owned_node(local_u) ? _offset_n + local_u : _ghost_to_global[local_u - n()]; + [[nodiscard]] inline GlobalNodeID local_to_global_node(const NodeID local_u) const final { + return _underlying_graph->local_to_global_node(local_u); } - [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const { - KASSERT(contains_global_node(global_u)); + [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const final { + return _underlying_graph->global_to_local_node(global_u); + } - if (offset_n() <= global_u && global_u < offset_n() + n()) { - return global_u - offset_n(); - } else { - KASSERT(_global_to_ghost.find(global_u + 1) != _global_to_ghost.end()); - return (*_global_to_ghost.find(global_u + 1)).second; - } + // + // Iterators for nodes / edges + // + + [[nodiscard]] inline IotaRange nodes(const NodeID from, const NodeID to) const final { + return _underlying_graph->nodes(from, to); } - // Access methods - [[nodiscard]] inline const auto &node_weights() const { - return _node_weights; + [[nodiscard]] inline IotaRange nodes() const final { + return _underlying_graph->nodes(); } - // convenient to have this for ghost nodes - void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) { - KASSERT(is_ghost_node(ghost_node)); - KASSERT(is_node_weighted()); - _node_weights[ghost_node] = weight; + [[nodiscard]] inline IotaRange ghost_nodes() const final { + return _underlying_graph->ghost_nodes(); } - [[nodiscard]] inline const auto &edge_weights() const { - return _edge_weights; + [[nodiscard]] inline IotaRange all_nodes() const final { + return _underlying_graph->all_nodes(); } - // Low-level access to the graph structure - [[nodiscard]] inline EdgeID first_edge(const NodeID u) const { - KASSERT(u < n()); - return _nodes[u]; + [[nodiscard]] inline IotaRange edges() const final { + return _underlying_graph->edges(); } - [[nodiscard]] inline EdgeID first_invalid_edge(const NodeID u) const { - KASSERT(u < n()); - return _nodes[u + 1]; + [[nodiscard]] inline IotaRange incident_edges(const NodeID u) const final { + return _underlying_graph->incident_edges(u); } - [[nodiscard]] inline NodeID edge_target(const EdgeID e) const { - KASSERT(e < m()); - return _edges[e]; + // + // Access methods + // + + [[nodiscard]] inline NodeID degree(const NodeID u) const final { + return _underlying_graph->degree(u); } - [[nodiscard]] inline NodeID degree(const NodeID u) const { - KASSERT(is_owned_node(u)); - return _nodes[u + 1] - _nodes[u]; + [[nodiscard]] inline const StaticArray &node_weights() const final { + return _underlying_graph->node_weights(); } - [[nodiscard]] const auto &node_distribution() const { - return _node_distribution; + [[nodiscard]] inline const StaticArray &edge_weights() const final { + return _underlying_graph->edge_weights(); } - [[nodiscard]] GlobalNodeID node_distribution(const PEID pe) const { - KASSERT(static_cast(pe) < _node_distribution.size()); - return _node_distribution[pe]; + inline void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) final { + _underlying_graph->set_ghost_node_weight(ghost_node, weight); } - PEID find_owner_of_global_node(const GlobalNodeID u) const { - KASSERT(u < global_n()); - auto it = std::upper_bound(_node_distribution.begin() + 1, _node_distribution.end(), u); - KASSERT(it != _node_distribution.end()); - return static_cast(std::distance(_node_distribution.begin(), it) - 1); + [[nodiscard]] inline const StaticArray &node_distribution() const final { + return _underlying_graph->node_distribution(); } - [[nodiscard]] const auto &edge_distribution() const { - return _edge_distribution; + [[nodiscard]] inline GlobalNodeID node_distribution(const PEID pe) const final { + return _underlying_graph->node_distribution(pe); } - [[nodiscard]] GlobalEdgeID edge_distribution(const PEID pe) const { - KASSERT(static_cast(pe) < _edge_distribution.size()); - return _edge_distribution[pe]; + [[nodiscard]] inline PEID find_owner_of_global_node(const GlobalNodeID u) const final { + return _underlying_graph->find_owner_of_global_node(u); } - [[nodiscard]] const auto &raw_nodes() const { - return _nodes; + [[nodiscard]] inline const StaticArray &edge_distribution() const final { + return _underlying_graph->edge_distribution(); } - [[nodiscard]] const auto &raw_node_weights() const { - return _node_weights; + + [[nodiscard]] inline GlobalEdgeID edge_distribution(const PEID pe) const final { + return _underlying_graph->edge_distribution(pe); } - [[nodiscard]] const auto &raw_edges() const { - return _edges; + + // + // Graph operations + // + + template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { + reified([&](auto &graph) { graph.adjacent_nodes(u, std::forward(l)); }); } - [[nodiscard]] const auto &raw_edge_weights() const { - return _edge_weights; + + template inline void neighbors(const NodeID u, Lambda &&l) const { + reified([&](auto &graph) { graph.neighbors(u, std::forward(l)); }); } + template + inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { + reified([&](auto &graph) { graph.neighbors(u, max_num_neighbors, std::forward(l)); }); + } + + // // Parallel iteration + // + template inline void pfor_nodes(const NodeID from, const NodeID to, Lambda &&l) const { - tbb::parallel_for(from, to, std::forward(l)); + reified([&](auto &graph) { graph.pfor_nodes(from, to, std::forward(l)); }); } template inline void pfor_nodes_range(const NodeID from, const NodeID to, Lambda &&l) const { - tbb::parallel_for(tbb::blocked_range(from, to), std::forward(l)); + reified([&](auto &graph) { graph.pfor_nodes_range(from, to, std::forward(l)); }); } template inline void pfor_ghost_nodes(Lambda &&l) const { - pfor_nodes(n(), total_n(), std::forward(l)); + reified([&](auto &graph) { graph.pfor_ghost_nodes(std::forward(l)); }); } template inline void pfor_nodes(Lambda &&l) const { - pfor_nodes(0, n(), std::forward(l)); + reified([&](auto &graph) { graph.pfor_nodes(std::forward(l)); }); } template inline void pfor_all_nodes(Lambda &&l) const { - pfor_nodes(0, total_n(), std::forward(l)); + reified([&](auto &graph) { graph.pfor_all_nodes(std::forward(l)); }); } template inline void pfor_nodes_range(Lambda &&l) const { - pfor_nodes_range(0, n(), std::forward(l)); + reified([&](auto &graph) { graph.pfor_nodes_range(std::forward(l)); }); } template inline void pfor_all_nodes_range(Lambda &&l) const { - pfor_nodes_range(0, total_n(), std::forward(l)); + reified([&](auto &graph) { graph.pfor_all_nodes_range(std::forward(l)); }); } template inline void pfor_edges(Lambda &&l) const { - tbb::parallel_for(static_cast(0), m(), std::forward(l)); - } - - // Iterators for nodes / edges - [[nodiscard]] inline auto nodes(const NodeID from, const NodeID to) const { - return IotaRange(from, to); - } - [[nodiscard]] inline auto nodes() const { - return nodes(0, n()); - } - [[nodiscard]] inline auto ghost_nodes() const { - return IotaRange(n(), total_n()); - } - [[nodiscard]] inline auto all_nodes() const { - return IotaRange(static_cast(0), total_n()); - } - [[nodiscard]] inline auto edges() const { - return IotaRange(static_cast(0), m()); - } - [[nodiscard]] inline auto incident_edges(const NodeID u) const { - return IotaRange(_nodes[u], _nodes[u + 1]); - } - - [[nodiscard]] inline auto adjacent_nodes(const NodeID u) const { - return TransformedIotaRange(_nodes[u], _nodes[u + 1], [this](const EdgeID e) { - return this->edge_target(e); - }); - } - - [[nodiscard]] inline auto neighbors(const NodeID u) const { - return TransformedIotaRange(_nodes[u], _nodes[u + 1], [this](const EdgeID e) { - return std::make_pair(e, this->edge_target(e)); - }); + reified([&](auto &graph) { graph.pfor_edges(std::forward(l)); }); } + // // Cached inter-PE metrics - [[nodiscard]] inline EdgeID edge_cut_to_pe(const PEID pe) const { - KASSERT(static_cast(pe) < _edge_cut_to_pe.size()); - return _edge_cut_to_pe[pe]; - } + // - [[nodiscard]] inline EdgeID comm_vol_to_pe(const PEID pe) const { - KASSERT(static_cast(pe) < _comm_vol_to_pe.size()); - return _comm_vol_to_pe[pe]; + [[nodiscard]] inline EdgeID edge_cut_to_pe(const PEID pe) const final { + return _underlying_graph->edge_cut_to_pe(pe); } - [[nodiscard]] inline MPI_Comm communicator() const { - return _communicator; + [[nodiscard]] inline EdgeID comm_vol_to_pe(const PEID pe) const final { + return _underlying_graph->comm_vol_to_pe(pe); } - // Functions to steal members of this graph - - auto &&take_node_distribution() { - return std::move(_node_distribution); - } - auto &&take_edge_distribution() { - return std::move(_edge_distribution); - } - auto &&take_nodes() { - return std::move(_nodes); - } - auto &&take_edges() { - return std::move(_edges); - } - auto &&take_node_weights() { - return std::move(_node_weights); - } - auto &&take_edge_weights() { - return std::move(_edge_weights); - } - auto &&take_ghost_owner() { - return std::move(_ghost_owner); - } - auto &&take_ghost_to_global() { - return std::move(_ghost_to_global); - } - auto &&take_global_to_ghost() { - return std::move(_global_to_ghost); + [[nodiscard]] inline MPI_Comm communicator() const final { + return _underlying_graph->communicator(); } + // // High degree classification + // - void init_high_degree_info(EdgeID high_degree_threshold) const; + void init_high_degree_info(const EdgeID high_degree_threshold) const final { + _underlying_graph->init_high_degree_info(high_degree_threshold); + } - [[nodiscard]] bool is_high_degree_node(const NodeID node) const { - KASSERT(_high_degree_ghost_node.size() == ghost_n()); - KASSERT(!is_ghost_node(node) || node - n() < _high_degree_ghost_node.size()); - return is_ghost_node(node) ? _high_degree_ghost_node[node - n()] - : degree(node) > _high_degree_threshold; + [[nodiscard]] bool is_high_degree_node(const NodeID node) const final { + return _underlying_graph->is_high_degree_node(node); } // // Graph permutation // - void set_permutation(StaticArray permutation) { - _permutation = std::move(permutation); + void set_permutation(StaticArray permutation) final { + _underlying_graph->set_permutation(std::move(permutation)); } - inline bool permuted() const { - return !_permutation.empty(); + [[nodiscard]] inline bool permuted() const final { + return _underlying_graph->permuted(); } - inline NodeID map_original_node(const NodeID u) const { - KASSERT(permuted()); - KASSERT(u < _permutation.size()); - return _permutation[u]; + [[nodiscard]] inline NodeID map_original_node(const NodeID u) const final { + return _underlying_graph->map_original_node(u); } // // Degree buckets // - [[nodiscard]] inline bool sorted() const { - return _sorted; + [[nodiscard]] inline bool sorted() const final { + return _underlying_graph->sorted(); } - [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const { - return _buckets[bucket + 1] - _buckets[bucket]; + [[nodiscard]] inline std::size_t number_of_buckets() const final { + return _underlying_graph->number_of_buckets(); } - [[nodiscard]] inline NodeID first_node_in_bucket(const std::size_t bucket) const { - return _buckets[bucket]; + [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const final { + return _underlying_graph->bucket_size(bucket); } - [[nodiscard]] inline NodeID first_invalid_node_in_bucket(const std::size_t bucket) const { - return first_node_in_bucket(bucket + 1); + [[nodiscard]] inline NodeID first_node_in_bucket(const std::size_t bucket) const final { + return _underlying_graph->first_node_in_bucket(bucket); } - [[nodiscard]] inline std::size_t number_of_buckets() const { - return _number_of_buckets; + [[nodiscard]] inline NodeID first_invalid_node_in_bucket(const std::size_t bucket) const final { + return _underlying_graph->first_invalid_node_in_bucket(bucket); } // // Graph permutation by coloring // - void set_color_sorted(StaticArray color_sizes) { - KASSERT(color_sizes.front() == 0u); - KASSERT(color_sizes.back() == n()); - _color_sizes = std::move(color_sizes); + void set_color_sorted(StaticArray color_sizes) final { + _underlying_graph->set_color_sorted(std::move(color_sizes)); } - inline bool color_sorted() const { - return !_color_sizes.empty(); + [[nodiscard]] inline bool color_sorted() const final { + return _underlying_graph->color_sorted(); } - std::size_t number_of_colors() const { - return _color_sizes.size() - 1; + [[nodiscard]] std::size_t number_of_colors() const final { + return _underlying_graph->number_of_colors(); } - NodeID color_size(const std::size_t c) const { - KASSERT(c < number_of_colors()); - return _color_sizes[c + 1] - _color_sizes[c]; + [[nodiscard]] NodeID color_size(const std::size_t c) const final { + return _underlying_graph->color_size(c); } - const auto &get_color_sizes() const { - return _color_sizes; + [[nodiscard]] const StaticArray &get_color_sizes() const final { + return _underlying_graph->get_color_sizes(); } private: - void init_degree_buckets(); - void init_total_weights(); - void init_communication_metrics(); - - NodeID _n; - EdgeID _m; - NodeID _ghost_n; - GlobalNodeID _offset_n; - GlobalEdgeID _offset_m; - GlobalNodeID _global_n; - GlobalEdgeID _global_m; + std::unique_ptr _underlying_graph; - NodeWeight _total_node_weight{}; - GlobalNodeWeight _global_total_node_weight{}; - NodeWeight _max_node_weight{}; - NodeWeight _global_max_node_weight{}; + template decltype(auto) reified(Lambda &&l) const { + const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); - EdgeWeight _total_edge_weight{}; - GlobalEdgeWeight _global_total_edge_weight{}; - - StaticArray _node_distribution{}; - StaticArray _edge_distribution{}; - - StaticArray _nodes{}; - StaticArray _edges{}; - StaticArray _node_weights{}; - StaticArray _edge_weights{}; - - StaticArray _ghost_owner{}; - StaticArray _ghost_to_global{}; - growt::StaticGhostNodeMapping _global_to_ghost{}; - - // mutable for lazy initialization - mutable StaticArray _high_degree_ghost_node{}; - mutable EdgeID _high_degree_threshold = 0; - - std::vector _edge_cut_to_pe{}; - std::vector _comm_vol_to_pe{}; - - StaticArray _permutation; - bool _sorted = false; - std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); - std::size_t _number_of_buckets = 0; - - StaticArray _color_sizes{}; + if (const auto *graph = dynamic_cast(abstract_graph); + graph != nullptr) { + return l(*graph); + } else if (const auto *graph = dynamic_cast(abstract_graph); + graph != nullptr) { + return l(*graph); + } - MPI_Comm _communicator; + __builtin_unreachable(); + } }; /** @@ -595,4 +470,5 @@ void print_local_graph_stats(const DistributedGraph &graph); */ bool validate_graph(const DistributedGraph &graph); } // namespace debug + } // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_partitioned_graph.h b/kaminpar-dist/datastructures/distributed_partitioned_graph.h index c5145eba..1764838d 100644 --- a/kaminpar-dist/datastructures/distributed_partitioned_graph.h +++ b/kaminpar-dist/datastructures/distributed_partitioned_graph.h @@ -103,18 +103,11 @@ class DistributedPartitionedGraph { [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const { return _graph->global_to_local_node(global_u); } [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const { return _graph->node_weight(u); } [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { return _graph->edge_weight(e); } - [[nodiscard]] inline EdgeID first_edge(const NodeID u) const { return _graph->first_edge(u); } - [[nodiscard]] inline EdgeID first_invalid_edge(const NodeID u) const { return _graph->first_invalid_edge(u); } - [[nodiscard]] inline NodeID edge_target(const EdgeID e) const { return _graph->edge_target(e); } [[nodiscard]] inline NodeID degree(const NodeID u) const { return _graph->degree(u); } [[nodiscard]] inline const auto &node_distribution() const { return _graph->node_distribution(); } [[nodiscard]] inline GlobalNodeID node_distribution(const PEID pe) const { return _graph->node_distribution(pe); } [[nodiscard]] inline const auto &edge_distribution() const { return _graph->edge_distribution(); } [[nodiscard]] inline GlobalEdgeID edge_distribution(const PEID pe) const { return _graph->edge_distribution(pe); } - [[nodiscard]] const auto &raw_nodes() const { return _graph->raw_nodes(); } - [[nodiscard]] const auto &raw_node_weights() const { return _graph->raw_node_weights(); } - [[nodiscard]] const auto &raw_edges() const { return _graph->raw_edges(); } - [[nodiscard]] const auto &raw_edge_weights() const { return _graph->raw_edge_weights(); } template inline void pfor_nodes(const NodeID from, const NodeID to, Lambda &&l) const { _graph->pfor_nodes(from, to, std::forward(l)); } template inline void pfor_nodes_range(const NodeID from, const NodeID to, Lambda &&l) const { _graph->pfor_nodes_range(from, to, std::forward(l)); } template inline void pfor_all_nodes(Lambda &&l) const { _graph->pfor_all_nodes(std::forward(l)); } @@ -128,8 +121,9 @@ class DistributedPartitionedGraph { [[nodiscard]] inline auto all_nodes() const { return _graph->all_nodes(); } [[nodiscard]] inline auto edges() const { return _graph->edges(); } [[nodiscard]] inline auto incident_edges(const NodeID u) const { return _graph->incident_edges(u); } - [[nodiscard]] inline auto adjacent_nodes(const NodeID u) const { return _graph->adjacent_nodes(u); } - [[nodiscard]] inline auto neighbors(const NodeID u) const { return _graph->neighbors(u); } + template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { _graph->adjacent_nodes(u, std::forward(l)); } + template inline void neighbors(const NodeID u, Lambda &&l) const { _graph->neighbors(u, std::forward(l)); } + template inline void neighbors(const NodeID u, NodeID max_num_neighbors, const Lambda &&l) const { _graph->neighbors(u, max_num_neighbors, std::forward(l)); } [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const { return _graph->bucket_size(bucket); } [[nodiscard]] inline NodeID first_node_in_bucket(const std::size_t bucket) const { return _graph->first_node_in_bucket(bucket); } [[nodiscard]] inline NodeID first_invalid_node_in_bucket(const std::size_t bucket) const { return _graph->first_invalid_node_in_bucket(bucket); } @@ -203,9 +197,14 @@ class DistributedPartitionedGraph { [[nodiscard]] inline bool check_border_node(const NodeID u) const { const BlockID u_block = block(u); - return std::any_of(adjacent_nodes(u).begin(), adjacent_nodes(u).end(), [&](const NodeID v) { - return u_block != block(v); + + bool is_border_node = false; + adjacent_nodes(u, [&](const NodeID v) { + is_border_node = u_block != block(v); + return is_border_node; }); + + return is_border_node; } private: diff --git a/kaminpar-dist/datastructures/ghost_node_mapper.h b/kaminpar-dist/datastructures/ghost_node_mapper.h index db259a69..b066ec83 100644 --- a/kaminpar-dist/datastructures/ghost_node_mapper.h +++ b/kaminpar-dist/datastructures/ghost_node_mapper.h @@ -86,7 +86,7 @@ class GhostNodeMapper { return { .global_to_ghost = std::move(global_to_ghost), .ghost_to_global = std::move(ghost_to_global), - .ghost_owner = std::move(ghost_owner) + .ghost_owner = std::move(ghost_owner), }; } diff --git a/kaminpar-dist/debug.cc b/kaminpar-dist/debug.cc index 25199316..58edbcd7 100644 --- a/kaminpar-dist/debug.cc +++ b/kaminpar-dist/debug.cc @@ -64,12 +64,13 @@ void write_metis_graph(const std::string &filename, const DistributedGraph &grap if (graph.is_node_weighted()) { out << graph.node_weight(lu) << " "; } - for (const auto &[e, lv] : graph.neighbors(lu)) { + + graph.neighbors(lu, [&](const EdgeID e, const NodeID lv) { out << graph.local_to_global_node(lv) + 1 << " "; if (graph.is_edge_weighted()) { out << graph.edge_weight(e) << " "; } - } + }); out << "\n"; } } diff --git a/kaminpar-dist/distributed_label_propagation.h b/kaminpar-dist/distributed_label_propagation.h index 116dd383..31d3ff98 100644 --- a/kaminpar-dist/distributed_label_propagation.h +++ b/kaminpar-dist/distributed_label_propagation.h @@ -282,31 +282,26 @@ template class LabelPropagation { bool is_interface_node = false; - auto add_to_rating_map = [&](const EdgeID e, const NodeID v) { + _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); const EdgeWeight rating = _graph->edge_weight(e); + map[v_cluster] += rating; + if constexpr (Config::kUseLocalActiveSetStrategy) { is_interface_node |= v >= _num_active_nodes; } } - }; - - const EdgeID from = _graph->first_edge(u); - const EdgeID to = from + std::min(_graph->degree(u), _max_num_neighbors); - for (EdgeID e = from; e < to; ++e) { - add_to_rating_map(e, _graph->edge_target(e)); - } + }); - if constexpr (Config::kUseLocalActiveSetStrategy) { + if constexpr (Config::kUseActiveSetStrategy) { + _active[u] = 0; + } else if constexpr (Config::kUseLocalActiveSetStrategy) { if (!is_interface_node) { _active[u] = 0; } } - if constexpr (Config::kUseActiveSetStrategy) { - _active[u] = 0; - } // After LP, we might want to use 2-hop clustering to merge nodes that // could not find any cluster to join for this, we store a favored cluster @@ -360,7 +355,7 @@ template class LabelPropagation { * @param u Node that was moved. */ void activate_neighbors(const NodeID u) { - for (const NodeID v : _graph->adjacent_nodes(u)) { + _graph->adjacent_nodes(u, [&](const NodeID v) { // call derived_activate_neighbor() even if we do not use the active set // strategy since the function might have side effects; the compiler // should remove it if it does not side effects @@ -369,7 +364,7 @@ template class LabelPropagation { _active[v].store(1, std::memory_order_relaxed); } } - } + }); } void match_isolated_nodes( diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index 5e180055..ef2a5e6b 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -7,6 +7,7 @@ ******************************************************************************/ #include "kaminpar-dist/dkaminpar.h" +#include #include #include @@ -15,6 +16,7 @@ #include #include "kaminpar-dist/context_io.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/distributed_graph.h" #include "kaminpar-dist/datastructures/ghost_node_mapper.h" #include "kaminpar-dist/factories.h" @@ -99,9 +101,12 @@ void print_input_summary( if (root && parseable) { LOG << "EXECUTION_MODE num_mpis=" << ctx.parallel.num_mpis << " num_threads=" << ctx.parallel.num_threads; - LOG << "INPUT_GRAPH " << "global_n=" << graph.global_n() << " " - << "global_m=" << graph.global_m() << " " << "n=[" << n_str << "] " << "m=[" << m_str - << "] " << "ghost_n=[" << ghost_n_str << "]"; + LOG << "INPUT_GRAPH " + << "global_n=" << graph.global_n() << " " + << "global_m=" << graph.global_m() << " " + << "n=[" << n_str << "] " + << "m=[" << m_str << "] " + << "ghost_n=[" << ghost_n_str << "]"; } // Output @@ -235,7 +240,7 @@ void dKaMinPar::import_graph( auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); - _graph_ptr = std::make_unique( + import_graph({std::make_unique( std::move(node_distribution), std::move(edge_distribution), std::move(nodes), @@ -247,7 +252,7 @@ void dKaMinPar::import_graph( std::move(global_to_ghost), false, _comm - ); + )}); // Fill in ghost node weights if (vwgt != nullptr) { @@ -255,6 +260,11 @@ void dKaMinPar::import_graph( } } +void dKaMinPar::import_graph(DistributedGraph graph) { + _was_rearranged = false; + _graph_ptr = std::make_unique(std::move(graph)); +} + GlobalEdgeWeight dKaMinPar::compute_partition(const BlockID k, BlockID *partition) { DistributedGraph &graph = *_graph_ptr; @@ -284,8 +294,12 @@ GlobalEdgeWeight dKaMinPar::compute_partition(const BlockID k, BlockID *partitio } START_TIMER("Partitioning"); - if (!_was_rearranged) { - graph = graph::rearrange(std::move(graph), _ctx); + if (!_was_rearranged && _ctx.rearrange_by != GraphOrdering::NATURAL) { + DistributedCSRGraph &csr_graph = + *dynamic_cast(_graph_ptr->take_underlying_graph()); + graph = DistributedGraph( + std::make_unique(graph::rearrange(std::move(csr_graph), _ctx)) + ); _was_rearranged = true; } auto p_graph = factory::create_partitioner(_ctx, graph)->partition(); diff --git a/kaminpar-dist/dkaminpar.h b/kaminpar-dist/dkaminpar.h index cc57cb53..a47e9643 100644 --- a/kaminpar-dist/dkaminpar.h +++ b/kaminpar-dist/dkaminpar.h @@ -304,6 +304,10 @@ struct RefinementContext { [[nodiscard]] bool includes_algorithm(RefinementAlgorithm algorithm) const; }; +struct GraphCompressionContext { + bool enabled; +}; + struct PartitionContext { PartitionContext(BlockID k, BlockID K, double epsilon); @@ -327,6 +331,7 @@ struct DebugContext { struct Context { GraphOrdering rearrange_by; + GraphCompressionContext compression; PartitioningMode mode; @@ -374,6 +379,8 @@ class dKaMinPar { dist::GlobalEdgeWeight *edge_weights ); + void import_graph(dist::DistributedGraph graph); + dist::GlobalEdgeWeight compute_partition(dist::BlockID k, dist::BlockID *partition); private: diff --git a/kaminpar-dist/graphutils/bfs_extractor.cc b/kaminpar-dist/graphutils/bfs_extractor.cc index 362c467b..e8a74b19 100644 --- a/kaminpar-dist/graphutils/bfs_extractor.cc +++ b/kaminpar-dist/graphutils/bfs_extractor.cc @@ -222,7 +222,7 @@ auto BfsExtractor::exchange_explored_subgraphs( std::move(node_weights_recvbufs[pe]), std::move(edge_weights_recvbufs[pe]), std::move(node_mapping_recvbufs[pe]), - std::move(partition_recvbufs[pe]) + std::move(partition_recvbufs[pe]), }; }); @@ -409,28 +409,30 @@ void BfsExtractor::explore_outgoing_edges(const NodeID node, Lambda &&lambda) { const bool is_high_degree_node = _graph->degree(node) >= _high_degree_threshold; if (!is_high_degree_node || _high_degree_strategy == HighDegreeStrategy::TAKE_ALL) { - for (const auto [e, v] : _graph->neighbors(node)) { - if (!lambda(e, v)) { - break; - } - } + _graph->neighbors(node, [&](const EdgeID e, const NodeID v) { + const bool abort = !lambda(e, v); + return abort; + }); } else if (_high_degree_strategy == HighDegreeStrategy::CUT) { - for (EdgeID e = _graph->first_edge(node); e < _graph->first_edge(node) + _high_degree_threshold; - ++e) { - if (!lambda(e, _graph->edge_target(e))) { - break; - } - } + _graph->neighbors(node, _high_degree_threshold, [&](const EdgeID e, const NodeID v) { + const bool abort = !lambda(e, v); + return abort; + }); } else if (_high_degree_strategy == HighDegreeStrategy::SAMPLE) { const double skip_prob = 1.0 * _high_degree_threshold / _graph->degree(node); std::geometric_distribution skip_dist(skip_prob); - for (EdgeID e = _graph->first_edge(node); e < _graph->first_invalid_edge(node); - ++e) { // e += skip_dist(gen)) { // @todo - if (!lambda(e, _graph->edge_target(e))) { - break; - } - } + _graph->neighbors(node, [&](const EdgeID e, const NodeID v) { + const bool abort = !lambda(e, v); + return abort; + }); + // @todo + // for (EdgeID e = _graph->first_edge(node); e < _graph->first_invalid_edge(node); + // ++e) { // e += skip_dist(gen)) { + // if (!lambda(e, _graph->edge_target(e))) { + // break; + // } + // } } else { // do nothing for HighDegreeStrategy::IGNORE } @@ -586,11 +588,11 @@ void BfsExtractor::init_external_degrees() { }); _graph->pfor_nodes([&](const NodeID u) { - for (const auto [e, v] : _graph->neighbors(u)) { + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { const BlockID v_block = _p_graph->block(v); const EdgeWeight e_weight = _graph->edge_weight(e); external_degree(u, v_block) += e_weight; - } + }); }); } diff --git a/kaminpar-dist/graphutils/communication.h b/kaminpar-dist/graphutils/communication.h index 45e42446..5fc06ebd 100644 --- a/kaminpar-dist/graphutils/communication.h +++ b/kaminpar-dist/graphutils/communication.h @@ -14,7 +14,6 @@ #include "kaminpar-mpi/sparse_alltoall.h" #include "kaminpar-mpi/utils.h" -#include "kaminpar-dist/datastructures/distributed_graph.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/timer.h" @@ -109,12 +108,13 @@ template void inclusive_col_prefix_sum(Data &data) { template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Mapper, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_ghost_custom_range( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Mapper &&mapper, @@ -165,18 +165,18 @@ void sparse_alltoall_interface_to_ghost_custom_range( const PEID thread = omp_get_thread_num(); - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (graph.is_ghost_node(v)) { if constexpr (filter_invocable_with_edge) { if (!filter(u, e, v)) { - continue; + return; } } const PEID owner = graph.ghost_owner(v); ++num_messages[thread][owner]; } - } + }); } // Offset messages for each thread @@ -200,12 +200,11 @@ void sparse_alltoall_interface_to_ghost_custom_range( } const PEID thread = omp_get_thread_num(); - - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (graph.is_ghost_node(v)) { if constexpr (filter_invocable_with_edge) { if (!filter(u, e, v)) { - continue; + return; } } @@ -217,7 +216,7 @@ void sparse_alltoall_interface_to_ghost_custom_range( send_buffers[pe][slot] = builder(u, e, v); } } - } + }); } // STOP_TIMER(); @@ -230,11 +229,12 @@ void sparse_alltoall_interface_to_ghost_custom_range( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_ghost( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, @@ -255,11 +255,12 @@ void sparse_alltoall_interface_to_ghost( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Mapper, typename Filter, typename Builder> std::vector sparse_alltoall_interface_to_ghost_custom_range_get( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Mapper &&mapper, @@ -282,11 +283,12 @@ std::vector sparse_alltoall_interface_to_ghost_custom_range_get( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_ghost( - const DistributedGraph &graph, Filter &&filter, Builder &&builder, Receiver &&receiver + const Graph &graph, Filter &&filter, Builder &&builder, Receiver &&receiver ) { sparse_alltoall_interface_to_ghost( graph, @@ -301,10 +303,11 @@ void sparse_alltoall_interface_to_ghost( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Builder, typename Receiver> void sparse_alltoall_interface_to_ghost( - const DistributedGraph &graph, Builder &&builder, Receiver &&receiver + const Graph &graph, Builder &&builder, Receiver &&receiver ) { sparse_alltoall_interface_to_ghost( graph, @@ -317,14 +320,11 @@ void sparse_alltoall_interface_to_ghost( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder> std::vector sparse_alltoall_interface_to_ghost_get( - const DistributedGraph &graph, - const NodeID from, - const NodeID to, - Filter &&filter, - Builder &&builder + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, Builder &&builder ) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_ghost( @@ -341,11 +341,11 @@ std::vector sparse_alltoall_interface_to_ghost_get( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder> -std::vector sparse_alltoall_interface_to_ghost_get( - const DistributedGraph &graph, Filter &&filter, Builder &&builder -) { +std::vector +sparse_alltoall_interface_to_ghost_get(const Graph &graph, Filter &&filter, Builder &&builder) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_ghost( graph, @@ -356,9 +356,12 @@ std::vector sparse_alltoall_interface_to_ghost_get( return recv_buffers; } -template , typename Builder> -std::vector -sparse_alltoall_interface_to_ghost_get(const DistributedGraph &graph, Builder &&builder) { +template < + typename Message, + typename Buffer = NoinitVector, + typename Graph, + typename Builder> +std::vector sparse_alltoall_interface_to_ghost_get(const Graph &graph, Builder &&builder) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_ghost( graph, @@ -429,12 +432,13 @@ sparse_alltoall_interface_to_ghost_get(const DistributedGraph &graph, Builder && template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Mapper, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_pe_custom_range( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Mapper &&mapper, @@ -490,20 +494,20 @@ void sparse_alltoall_interface_to_pe_custom_range( } } - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (!graph.is_ghost_node(v)) { - continue; + return; } const PEID pe = graph.ghost_owner(v); if (created_message_for_pe.get(pe)) { - continue; + return; } created_message_for_pe.set(pe); ++num_messages[thread][pe]; - } + }); created_message_for_pe.reset(); } @@ -539,15 +543,15 @@ void sparse_alltoall_interface_to_pe_custom_range( } } - for (const NodeID v : graph.adjacent_nodes(u)) { + graph.adjacent_nodes(u, [&](const NodeID v) { if (!graph.is_ghost_node(v)) { - continue; + return; } const PEID pe = graph.ghost_owner(v); if (created_message_for_pe.get(pe)) { - continue; + return; } created_message_for_pe.set(pe); @@ -560,7 +564,7 @@ void sparse_alltoall_interface_to_pe_custom_range( } else { send_buffers[pe][slot] = builder(u); } - } + }); created_message_for_pe.reset(); } @@ -576,11 +580,12 @@ void sparse_alltoall_interface_to_pe_custom_range( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_pe( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, @@ -601,11 +606,12 @@ void sparse_alltoall_interface_to_pe( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder, typename Receiver> void sparse_alltoall_interface_to_pe( - const DistributedGraph &graph, Filter &&filter, Builder &&builder, Receiver &&receiver + const Graph &graph, Filter &&filter, Builder &&builder, Receiver &&receiver ) { sparse_alltoall_interface_to_pe( graph, @@ -620,11 +626,10 @@ void sparse_alltoall_interface_to_pe( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Builder, typename Receiver> -void sparse_alltoall_interface_to_pe( - const DistributedGraph &graph, Builder &&builder, Receiver &&receiver -) { +void sparse_alltoall_interface_to_pe(const Graph &graph, Builder &&builder, Receiver &&receiver) { sparse_alltoall_interface_to_pe( graph, SPARSE_ALLTOALL_NOFILTER, @@ -636,14 +641,11 @@ void sparse_alltoall_interface_to_pe( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder> std::vector sparse_alltoall_interface_to_pe_get( - const DistributedGraph &graph, - const NodeID from, - const NodeID to, - Filter &&filter, - Builder &&builder + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, Builder &&builder ) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_pe( @@ -660,11 +662,12 @@ std::vector sparse_alltoall_interface_to_pe_get( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Mapper, typename Filter, typename Builder> std::vector sparse_alltoall_interface_to_pe_custom_range_get( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Mapper &&mapper, @@ -687,11 +690,11 @@ std::vector sparse_alltoall_interface_to_pe_custom_range_get( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename Builder> -std::vector sparse_alltoall_interface_to_pe_get( - const DistributedGraph &graph, Filter &&filter, Builder &&builder -) { +std::vector +sparse_alltoall_interface_to_pe_get(const Graph &graph, Filter &&filter, Builder &&builder) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_pe( graph, @@ -704,9 +707,12 @@ std::vector sparse_alltoall_interface_to_pe_get( return recv_buffers; } -template , typename Builder> -std::vector -sparse_alltoall_interface_to_pe_get(const DistributedGraph &graph, Builder &&builder) { +template < + typename Message, + typename Buffer = NoinitVector, + typename Graph, + typename Builder> +std::vector sparse_alltoall_interface_to_pe_get(const Graph &graph, Builder &&builder) { std::vector recv_buffers(mpi::get_comm_size(graph.communicator())); sparse_alltoall_interface_to_pe( graph, @@ -722,12 +728,13 @@ sparse_alltoall_interface_to_pe_get(const DistributedGraph &graph, Builder &&bui template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename PEGetter, typename Builder, typename Receiver> void sparse_alltoall_custom( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, @@ -796,11 +803,12 @@ void sparse_alltoall_custom( template < typename Message, typename Buffer = NoinitVector, + typename Graph, typename Filter, typename PEGetter, typename Builder> std::vector sparse_alltoall_custom( - const DistributedGraph &graph, + const Graph &graph, const NodeID from, const NodeID to, Filter &&filter, diff --git a/kaminpar-dist/graphutils/rearrangement.cc b/kaminpar-dist/graphutils/rearrangement.cc index be1ec1ad..1bd8bc7c 100644 --- a/kaminpar-dist/graphutils/rearrangement.cc +++ b/kaminpar-dist/graphutils/rearrangement.cc @@ -19,7 +19,7 @@ #include "kaminpar-common/timer.h" namespace kaminpar::dist::graph { -DistributedGraph rearrange(DistributedGraph graph, const Context &ctx) { +DistributedCSRGraph rearrange(DistributedCSRGraph graph, const Context &ctx) { if (ctx.rearrange_by == GraphOrdering::NATURAL) { // nothing to do } else if (ctx.rearrange_by == GraphOrdering::DEGREE_BUCKETS) { @@ -28,15 +28,17 @@ DistributedGraph rearrange(DistributedGraph graph, const Context &ctx) { graph = graph::rearrange_by_coloring(std::move(graph), ctx); } + /* KASSERT( debug::validate_graph(graph), "input graph verification failed after rearranging graph", assert::heavy ); + */ return graph; } -DistributedGraph rearrange_by_degree_buckets(DistributedGraph graph) { +DistributedCSRGraph rearrange_by_degree_buckets(DistributedCSRGraph graph) { SCOPED_TIMER("Rearrange graph", "By degree buckets"); auto permutations = shm::graph::sort_by_degree_buckets(graph.raw_nodes()); return rearrange_by_permutation( @@ -47,7 +49,7 @@ DistributedGraph rearrange_by_degree_buckets(DistributedGraph graph) { ); } -DistributedGraph rearrange_by_coloring(DistributedGraph graph, const Context &ctx) { +DistributedCSRGraph rearrange_by_coloring(DistributedCSRGraph graph, const Context &ctx) { SCOPED_TIMER("Rearrange graph", "By coloring"); auto coloring = compute_node_coloring_sequentially( @@ -87,15 +89,14 @@ DistributedGraph rearrange_by_coloring(DistributedGraph graph, const Context &ct return graph; } -DistributedGraph rearrange_by_permutation( - DistributedGraph graph, +DistributedCSRGraph rearrange_by_permutation( + DistributedCSRGraph graph, StaticArray old_to_new, StaticArray new_to_old, const bool degree_sorted ) { shm::graph::NodePermutations permutations{ - std::move(old_to_new), std::move(new_to_old) - }; + std::move(old_to_new), std::move(new_to_old)}; const auto &old_nodes = graph.raw_nodes(); const auto &old_edges = graph.raw_edges(); @@ -159,7 +160,7 @@ DistributedGraph rearrange_by_permutation( new_ghost_to_global[ghost_node - n] = new_node_global; }); - DistributedGraph new_graph( + DistributedCSRGraph new_graph( graph.take_node_distribution(), graph.take_edge_distribution(), std::move(new_nodes), diff --git a/kaminpar-dist/graphutils/rearrangement.h b/kaminpar-dist/graphutils/rearrangement.h index 8b3f7420..cd735348 100644 --- a/kaminpar-dist/graphutils/rearrangement.h +++ b/kaminpar-dist/graphutils/rearrangement.h @@ -7,20 +7,20 @@ ******************************************************************************/ #pragma once -#include "kaminpar-dist/datastructures/distributed_graph.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-common/datastructures/static_array.h" namespace kaminpar::dist::graph { -DistributedGraph rearrange(DistributedGraph graph, const Context &ctx); +DistributedCSRGraph rearrange(DistributedCSRGraph graph, const Context &ctx); -DistributedGraph rearrange_by_degree_buckets(DistributedGraph graph); +DistributedCSRGraph rearrange_by_degree_buckets(DistributedCSRGraph graph); -DistributedGraph rearrange_by_coloring(DistributedGraph graph, const Context &ctx); +DistributedCSRGraph rearrange_by_coloring(DistributedCSRGraph graph, const Context &ctx); -DistributedGraph rearrange_by_permutation( - DistributedGraph graph, +DistributedCSRGraph rearrange_by_permutation( + DistributedCSRGraph graph, StaticArray old_to_new, StaticArray new_to_old, bool degree_sorted diff --git a/kaminpar-dist/graphutils/replicator.cc b/kaminpar-dist/graphutils/replicator.cc index 240ec58a..640b6da3 100644 --- a/kaminpar-dist/graphutils/replicator.cc +++ b/kaminpar-dist/graphutils/replicator.cc @@ -68,7 +68,7 @@ allgather_graph(const DistributedPartitionedGraph &p_graph) { return {std::move(shm_graph), std::move(shm_p_graph)}; } -shm::Graph replicate_graph_everywhere(const DistributedGraph &graph) { +shm::Graph replicate_graph_everywhere(const DistributedCSRGraph &graph) { KASSERT( graph.global_n() < std::numeric_limits::max(), "number of nodes exceeds int size", @@ -84,9 +84,9 @@ shm::Graph replicate_graph_everywhere(const DistributedGraph &graph) { // copy edges array with global node IDs StaticArray remapped_edges(graph.m()); graph.pfor_nodes([&](const NodeID u) { - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { remapped_edges[e] = graph.local_to_global_node(v); - } + }); }); // gather graph @@ -193,7 +193,18 @@ shm::Graph replicate_graph_everywhere(const DistributedGraph &graph) { )}; } -DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_replications) { +shm::Graph replicate_graph_everywhere(const DistributedGraph &graph) { + const AbstractDistributedGraph *underlying_graph = graph.underlying_graph(); + + if (const auto *csr_graph = dynamic_cast(graph.underlying_graph()); + csr_graph != nullptr) { + return replicate_graph_everywhere(*csr_graph); + } + + __builtin_unreachable(); +} + +DistributedGraph replicate_graph(const DistributedCSRGraph &graph, const int num_replications) { const PEID size = mpi::get_comm_size(graph.communicator()); const PEID rank = mpi::get_comm_rank(graph.communicator()); @@ -246,9 +257,8 @@ DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_re // Create edges array with global node IDs const GlobalEdgeID my_tmp_global_edges_offset = edges_displs[primary_rank]; NoinitVector tmp_global_edges(edges_displs.back() + secondary_num_edges); - graph.pfor_edges([&](const EdgeID e) { - tmp_global_edges[my_tmp_global_edges_offset + e] = - graph.local_to_global_node(graph.edge_target(e)); + graph.pfor_edges([&](const EdgeID e, const NodeID v) { + tmp_global_edges[my_tmp_global_edges_offset + e] = graph.local_to_global_node(v); }); const bool is_node_weighted = @@ -419,7 +429,7 @@ DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_re DBG << "Have mapping " << k << " --> " << v; } - DistributedGraph new_graph( + DistributedGraph new_graph(std::make_unique( std::move(node_distribution), std::move(edge_distribution), std::move(nodes), @@ -431,7 +441,7 @@ DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_re std::move(ghost_node_info.global_to_ghost), false, new_comm - ); + )); // Fix weights of ghost nodes if (is_node_weighted) { @@ -448,6 +458,17 @@ DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_re return new_graph; } +DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_replications) { + const AbstractDistributedGraph *underlying_graph = graph.underlying_graph(); + + if (const auto *csr_graph = dynamic_cast(graph.underlying_graph()); + csr_graph != nullptr) { + return replicate_graph(*csr_graph, num_replications); + } + + __builtin_unreachable(); +} + DistributedPartitionedGraph distribute_best_partition(const DistributedGraph &dist_graph, DistributedPartitionedGraph p_graph) { // Create group with one PE of each replication diff --git a/kaminpar-dist/graphutils/subgraph_extractor.cc b/kaminpar-dist/graphutils/subgraph_extractor.cc index f95cbd18..81e65769 100644 --- a/kaminpar-dist/graphutils/subgraph_extractor.cc +++ b/kaminpar-dist/graphutils/subgraph_extractor.cc @@ -49,11 +49,11 @@ auto count_block_induced_subgraph_sizes(const DistributedPartitionedGraph &p_gra for (NodeID u = r.begin(); u != r.end(); ++u) { const BlockID u_block = p_graph.block(u); ++num_nodes_per_block[u_block]; - for (const auto [e, v] : p_graph.neighbors(u)) { + p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (u_block == p_graph.block(v)) { ++num_edges_per_block[u_block]; } - } + }); } }); @@ -207,15 +207,15 @@ extract_local_block_induced_subgraphs(const DistributedPartitionedGraph &p_graph const NodeID pos = n0 + u; const NodeID u_prime = shared_nodes[pos]; - for (const auto [e_prime, v_prime] : p_graph.neighbors(u_prime)) { + p_graph.neighbors(u_prime, [&](const EdgeID e_prime, const NodeID v_prime) { if (p_graph.block(v_prime) != b) { - continue; + return; } shared_edge_weights[e0 + e] = p_graph.edge_weight(e_prime); shared_edges[e0 + e] = mapping[v_prime]; ++e; - } + }); shared_nodes[pos] = e; shared_node_weights[pos] = p_graph.node_weight(u_prime); @@ -607,7 +607,7 @@ extract_and_scatter_block_induced_subgraphs(const DistributedPartitionedGraph &p return { std::move(gathered_subgraphs), std::move(offsets), - std::move(extracted_local_subgraphs.mapping) + std::move(extracted_local_subgraphs.mapping), }; } diff --git a/kaminpar-dist/graphutils/synchronization.cc b/kaminpar-dist/graphutils/synchronization.cc index 3f8d2728..f63247b4 100644 --- a/kaminpar-dist/graphutils/synchronization.cc +++ b/kaminpar-dist/graphutils/synchronization.cc @@ -7,9 +7,6 @@ ******************************************************************************/ #include "kaminpar-dist/graphutils/synchronization.h" -#include "kaminpar-dist/datastructures/distributed_graph.h" -#include "kaminpar-dist/datastructures/distributed_partitioned_graph.h" - namespace kaminpar::dist::graph { void synchronize_ghost_node_block_ids(DistributedPartitionedGraph &p_graph) { struct Message { @@ -31,24 +28,4 @@ void synchronize_ghost_node_block_ids(DistributedPartitionedGraph &p_graph) { } ); } - -void synchronize_ghost_node_weights(DistributedGraph &graph) { - struct Message { - NodeID node; - NodeWeight weight; - }; - - mpi::graph::sparse_alltoall_interface_to_pe( - graph, - [&](const NodeID u) -> Message { return {.node = u, .weight = graph.node_weight(u)}; }, - [&](const auto &recv_buffer, const PEID pe) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [local_node_on_pe, weight] = recv_buffer[i]; - const auto global_node = static_cast(graph.offset_n(pe) + local_node_on_pe); - const NodeID local_node = graph.global_to_local_node(global_node); - graph.set_ghost_node_weight(local_node, weight); - }); - } - ); -} } // namespace kaminpar::dist::graph diff --git a/kaminpar-dist/graphutils/synchronization.h b/kaminpar-dist/graphutils/synchronization.h index 770ac80e..f11c97b0 100644 --- a/kaminpar-dist/graphutils/synchronization.h +++ b/kaminpar-dist/graphutils/synchronization.h @@ -20,5 +20,23 @@ namespace kaminpar::dist::graph { */ void synchronize_ghost_node_block_ids(DistributedPartitionedGraph &p_graph); -void synchronize_ghost_node_weights(DistributedGraph &graph); +template void synchronize_ghost_node_weights(Graph &graph) { + struct Message { + NodeID node; + NodeWeight weight; + }; + + mpi::graph::sparse_alltoall_interface_to_pe( + graph, + [&](const NodeID u) -> Message { return {.node = u, .weight = graph.node_weight(u)}; }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [local_node_on_pe, weight] = recv_buffer[i]; + const auto global_node = static_cast(graph.offset_n(pe) + local_node_on_pe); + const NodeID local_node = graph.global_to_local_node(global_node); + graph.set_ghost_node_weight(local_node, weight); + }); + } + ); +} } // namespace kaminpar::dist::graph diff --git a/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc b/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc index 47f7ec5b..44bb687c 100644 --- a/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc +++ b/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc @@ -46,9 +46,7 @@ shm::PartitionedGraph MtKaHyParInitialPartitioner::initial_partition( NoinitVector edge_position(2 * num_edges); graph.pfor_nodes([&](const NodeID u) { - for (const auto [e, v] : graph.neighbors(u)) { - edge_position[e] = u < v; - } + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { edge_position[e] = u < v; }); }); parallel::prefix_sum(edge_position.begin(), edge_position.end(), edge_position.begin()); @@ -61,16 +59,16 @@ shm::PartitionedGraph MtKaHyParInitialPartitioner::initial_partition( graph.pfor_nodes([&](const NodeID u) { vertex_weights[u] = static_cast(graph.node_weight(u)); - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (v < u) { // Only need edges in one direction - continue; + return; } EdgeID position = edge_position[e] - 1; edges[2 * position] = static_cast(u); edges[2 * position + 1] = static_cast(v); edge_weights[position] = static_cast(graph.edge_weight(e)); - } + }); }); mt_kahypar_hypergraph_t mt_kahypar_graph = mt_kahypar_create_graph( diff --git a/kaminpar-dist/metrics.cc b/kaminpar-dist/metrics.cc index 6d28f989..023f8d6b 100644 --- a/kaminpar-dist/metrics.cc +++ b/kaminpar-dist/metrics.cc @@ -23,11 +23,11 @@ GlobalEdgeWeight local_edge_cut(const DistributedPartitionedGraph &p_graph) { auto &cut = cut_ets.local(); for (NodeID u = r.begin(); u < r.end(); ++u) { const BlockID u_block = p_graph.block(u); - for (const auto [e, v] : p_graph.neighbors(u)) { + p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (u_block != p_graph.block(v)) { cut += p_graph.edge_weight(e); } - } + }); } }); diff --git a/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc b/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc index 92a391ef..98b54c5d 100644 --- a/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc +++ b/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc @@ -104,9 +104,7 @@ bool MtKaHyParRefiner::refine() { StaticArray edge_position(2 * num_edges); shm_graph->pfor_nodes([&](const NodeID u) { - for (const auto [e, v] : shm_graph->neighbors(u)) { - edge_position[e] = u < v; - } + shm_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { edge_position[e] = u < v; }); }); parallel::prefix_sum(edge_position.begin(), edge_position.end(), edge_position.begin()); @@ -117,9 +115,9 @@ bool MtKaHyParRefiner::refine() { shm_graph->pfor_nodes([&](const NodeID u) { vertex_weights[u] = static_cast(shm_graph->node_weight(u)); - for (const auto [e, v] : shm_graph->neighbors(u)) { + shm_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { if (v < u) { // Only need edges in one direction - continue; + return; } EdgeID position = edge_position[e] - 1; @@ -127,7 +125,7 @@ bool MtKaHyParRefiner::refine() { edges[2 * position + 1] = asserting_cast(v); edge_weights[position] = asserting_cast(shm_graph->edge_weight(e)); - } + }); }); mt_kahypar_hypergraph_t mt_kahypar_graph = mt_kahypar_create_graph( diff --git a/kaminpar-dist/refinement/balancer/cluster_balancer.cc b/kaminpar-dist/refinement/balancer/cluster_balancer.cc index dc6fa1be..13354427 100644 --- a/kaminpar-dist/refinement/balancer/cluster_balancer.cc +++ b/kaminpar-dist/refinement/balancer/cluster_balancer.cc @@ -736,7 +736,7 @@ void ClusterBalancer::perform_moves( // @todo set blocks before updating other data structures to avoid max gainer changes? _p_graph.set_block(u, candidate.to); - for (const auto &[e, v] : _p_graph.neighbors(u)) { + _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (_p_graph.is_ghost_node(v)) { const PEID pe = _p_graph.ghost_owner(v); if (!created_message_for_pe.get(pe)) { @@ -747,7 +747,7 @@ void ClusterBalancer::perform_moves( created_message_for_pe.set(pe); } - continue; + return; } // !is_overloaded(.) is not a sufficient condition, since parallel moves might overload @@ -756,7 +756,7 @@ void ClusterBalancer::perform_moves( if (_clusters.contains(v)) { update_adjacent_cluster(_clusters.cluster_of(v)); } - } + }); created_message_for_pe.reset(); } diff --git a/kaminpar-dist/refinement/balancer/clusters.cc b/kaminpar-dist/refinement/balancer/clusters.cc index 3371b34b..229a3ef0 100644 --- a/kaminpar-dist/refinement/balancer/clusters.cc +++ b/kaminpar-dist/refinement/balancer/clusters.cc @@ -93,13 +93,13 @@ void Clusters::init_ghost_node_adjacency() { for (const NodeID cluster : clusters()) { for (const NodeID u : nodes(cluster)) { - for (const auto [e, v] : _p_graph->neighbors(u)) { + _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { if (!_p_graph->is_ghost_node(v)) { - continue; + return; } weight_to_ghost[v - _p_graph->n()] += _p_graph->edge_weight(e); - } + }); } for (const auto &[ghost, weight] : weight_to_ghost.entries()) { @@ -219,11 +219,11 @@ bool Clusters::dbg_check_conns(const NodeID cluster) const { std::vector actual(_p_graph->k()); for (const NodeID u : nodes(cluster)) { - for (const auto &[e, v] : _p_graph->neighbors(u)) { + _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { if (!_p_graph->is_owned_node(v) || cluster_of(v) != cluster_of(u)) { actual[_p_graph->block(v)] += _p_graph->edge_weight(e); } - } + }); } for (const BlockID b : _p_graph->blocks()) { @@ -309,7 +309,7 @@ class BatchedClusterBuilder { add_to_cluster(u); - for (const auto [e, v] : _p_graph.neighbors(u)) { + _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == kInvalidBlockID && _p_graph.block(v) == bu) { if (_frontier.contains(v)) { @@ -318,7 +318,7 @@ class BatchedClusterBuilder { _frontier.push(v, _p_graph.edge_weight(e)); } } - } + }); } finish_cluster(); @@ -338,7 +338,7 @@ class BatchedClusterBuilder { _clusters[_cur_pos] = u; ++_cur_pos; - for (const auto [e, v] : _p_graph.neighbors(u)) { + _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == _cur_cluster) { _cur_block_conn -= _p_graph.edge_weight(e); } else { @@ -351,7 +351,7 @@ class BatchedClusterBuilder { _cur_conns.change_priority(bv, -1); } } - } + }); _stopping_policy.update(_cur_conns.peek_key() - _cur_block_conn); @@ -372,13 +372,13 @@ class BatchedClusterBuilder { // @todo should do this when updating _best_* for (NodeID pos = _cluster_indices[_cur_cluster]; pos < _best_prefix_pos; ++pos) { const NodeID u = _clusters[pos]; - for (const auto &[e, v] : _p_graph.neighbors(u)) { + _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == _cur_cluster) { - continue; + return; } const BlockID bv = _p_graph.block(v); _conns[_cur_cluster * _p_graph.k() + bv] += _p_graph.edge_weight(e); - } + }); } _cluster_indices[++_cur_cluster] = _best_prefix_pos; @@ -478,12 +478,12 @@ Clusters build_singleton_clusters( for (const BlockID k : p_graph.blocks()) { m_ctx.cluster_conns.push_back(0); } - for (const auto [e, v] : p_graph.neighbors(u)) { + p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { const BlockID bv = p_graph.block(v); const std::size_t idx = cur_move_set * p_graph.k() + bv; KASSERT(idx < m_ctx.cluster_conns.size()); m_ctx.cluster_conns[idx] += p_graph.edge_weight(e); - } + }); ++cur_move_set; } else { @@ -554,13 +554,13 @@ Clusters build_local_clusters( m_ctx.clusters[cluster_sizes[clustering[u]]++] = u; m_ctx.cluster_indices[ms + 1] = cluster_sizes[clustering[u]]; - for (const auto [e, v] : p_graph.neighbors(u)) { + p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { // We may not access clustering[.] for ghost vertices if (!p_graph.is_owned_node(v) || clustering[v] != clustering[u]) { const BlockID bv = p_graph.block(v); m_ctx.cluster_conns[ms * p_graph.k() + bv] += p_graph.edge_weight(e); } - } + }); } else { m_ctx.node_to_cluster[u] = kInvalidNodeID; } diff --git a/kaminpar-dist/refinement/balancer/clusters.h b/kaminpar-dist/refinement/balancer/clusters.h index c8f9dc20..97f24766 100644 --- a/kaminpar-dist/refinement/balancer/clusters.h +++ b/kaminpar-dist/refinement/balancer/clusters.h @@ -160,20 +160,20 @@ class Clusters { for (const NodeID u : nodes(set)) { KASSERT(_p_graph->is_owned_node(u)); - for (const auto [e, v] : _p_graph->neighbors(u)) { + _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { if (!_p_graph->is_owned_node(v)) { - continue; + return; } const NodeID set_v = _node_to_cluster[v]; if (set_v == kInvalidNodeID || set_v == set) { - continue; + return; } const EdgeWeight delta = _p_graph->edge_weight(e); _cluster_conns[set_v * _p_graph->k() + from] -= delta; _cluster_conns[set_v * _p_graph->k() + to] += delta; - } + }); } } diff --git a/kaminpar-dist/refinement/balancer/node_balancer.cc b/kaminpar-dist/refinement/balancer/node_balancer.cc index 829ab89b..cff206c4 100644 --- a/kaminpar-dist/refinement/balancer/node_balancer.cc +++ b/kaminpar-dist/refinement/balancer/node_balancer.cc @@ -81,8 +81,7 @@ void NodeBalancer::reinit() { tbb::enumerable_thread_specific>> local_pq_ets{ [&] { return std::vector>(_p_graph.k()); - } - }; + }}; tbb::enumerable_thread_specific> local_pq_weight_ets{[&] { return std::vector(_p_graph.k()); }}; @@ -325,16 +324,16 @@ void NodeBalancer::perform_move(const Candidate &move, const bool update_block_w _pq_weight[from] -= weight; // Activate neighbors - for (const NodeID v : _p_graph.adjacent_nodes(u)) { + _p_graph.adjacent_nodes(u, [&, from = from](const NodeID v) { if (!_p_graph.is_owned_node(v)) { - continue; + return; } if (!_marker.get(v) && _p_graph.block(v) == from) { try_pq_insertion(from, v); _marker.set(v); } - } + }); } if (update_block_weights) { @@ -377,8 +376,7 @@ std::vector NodeBalancer::pick_sequential_candidates() if (relative_gain == actual_relative_gain) { Candidate candidate{ - _p_graph.local_to_global_node(u), from, to, u_weight, actual_relative_gain - }; + _p_graph.local_to_global_node(u), from, to, u_weight, actual_relative_gain}; candidates.push_back(candidate); } else { try_pq_insertion(from, u, u_weight, actual_relative_gain); @@ -573,8 +571,9 @@ bool NodeBalancer::perform_parallel_round(const int round) { reassigned, "could not find a feasible target block for node " << candidate.id << ", weight " << candidate.weight << ", deltas: [" - << block_weight_deltas_to << "]" << ", max block weights: " - << _p_ctx.graph->max_block_weights << ", block weights: " + << block_weight_deltas_to << "]" + << ", max block weights: " << _p_ctx.graph->max_block_weights + << ", block weights: " << std::vector( _p_graph.block_weights().begin(), _p_graph.block_weights().end() ) diff --git a/kaminpar-dist/refinement/gain_calculator.h b/kaminpar-dist/refinement/gain_calculator.h index 30f5f71e..c599be25 100644 --- a/kaminpar-dist/refinement/gain_calculator.h +++ b/kaminpar-dist/refinement/gain_calculator.h @@ -87,14 +87,14 @@ template class GainCalculator { BlockID max_target = b_u; auto action = [&](auto &map) { - for (const auto [e, v] : _p_graph->neighbors(u)) { + _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { const BlockID b_v = _p_graph->block(v); if (b_u != b_v && weight_checker(b_v, _p_graph->block_weight(b_v) + w_u)) { map[b_v] += _p_graph->edge_weight(e); } else if (b_u == b_v) { int_conn += _p_graph->edge_weight(e); } - } + }); for (const auto [target, conn] : map.entries()) { if (conn > max_ext_conn || (randomize && conn == max_ext_conn && rand.random_bool())) { diff --git a/kaminpar-dist/refinement/jet/jet_refiner.cc b/kaminpar-dist/refinement/jet/jet_refiner.cc index 60ae23cd..47579102 100644 --- a/kaminpar-dist/refinement/jet/jet_refiner.cc +++ b/kaminpar-dist/refinement/jet/jet_refiner.cc @@ -306,7 +306,7 @@ void JetRefiner::filter_bad_moves() { EdgeWeight projected_gain = 0; - for (const auto &[e, v] : _p_graph.neighbors(u)) { + _p_graph.neighbors(u, [&, gain_u = gain_u, to_u = to_u](const EdgeID e, const NodeID v) { const auto [gain_v, to_v] = _gains_and_targets[v]; const BlockID projected_b_v = (gain_v > gain_u || (gain_v == gain_u && v < u)) ? to_v : _p_graph.block(v); @@ -316,7 +316,7 @@ void JetRefiner::filter_bad_moves() { } else if (projected_b_v == from_u) { projected_gain -= _p_graph.edge_weight(e); } - } + }); // Locking the node here means that the move // will be executed by move_locked_nodes() diff --git a/kaminpar-dist/refinement/lp/clp_refiner.cc b/kaminpar-dist/refinement/lp/clp_refiner.cc index cfabb25b..9bb433e3 100644 --- a/kaminpar-dist/refinement/lp/clp_refiner.cc +++ b/kaminpar-dist/refinement/lp/clp_refiner.cc @@ -392,8 +392,8 @@ NodeID ColoredLPRefiner::perform_best_moves(const ColorID c) { return num_local_moved_nodes; } -auto ColoredLPRefiner::reduce_move_candidates(std::vector &&candidates -) -> std::vector { +auto ColoredLPRefiner::reduce_move_candidates(std::vector &&candidates) + -> std::vector { const int size = mpi::get_comm_size(_p_graph.communicator()); const int rank = mpi::get_comm_rank(_p_graph.communicator()); KASSERT(math::is_power_of_2(size), "#PE must be a power of two", assert::always); @@ -822,12 +822,12 @@ NodeID ColoredLPRefiner::find_moves(const ColorID c) { auto action = [&](auto &map) { bool is_interface_node = false; - for (const auto [e, v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { const BlockID b = _p_graph.block(v); const EdgeWeight weight = graph.edge_weight(e); map[b] += weight; is_interface_node |= graph.is_ghost_node(v); - } + }); const BlockID u_block = _p_graph.block(u); const NodeWeight u_weight = graph.node_weight(u); @@ -885,9 +885,7 @@ void ColoredLPRefiner::activate_neighbors(const NodeID u) { return; } - for (const auto &[e, v] : _p_graph.neighbors(u)) { - _is_active[v] = 1; - } + _p_graph.adjacent_nodes(u, [&](const NodeID v) { _is_active[v] = 1; }); } void ColoredLPRefiner::GainStatistics::initialize(const ColorID c) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 7b125c43..20a3544f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -116,6 +116,11 @@ if (KAMINPAR_BUILD_DISTRIBUTED) FILES dist/coarsening/internal_cluster_contraction_test.cc CORES 1) + # dKaMinPar -> Datastructures + kaminpar_add_dist_test(test_dist_compressed_graph + FILES dist/datastructures/distributed_compressed_graph_test.cc + CORES 1) + # dKaMinPar -> Graph Utils kaminpar_add_dist_test(test_dist_graphutils_block_extractor FILES dist/graphutils/block_extractor_test.cc @@ -155,4 +160,3 @@ if (KAMINPAR_BUILD_DISTRIBUTED) FILES mpi/sparse_allreduce_test.cc CORES 1 4 8) endif () - diff --git a/tests/dist/algorithms/greedy_node_coloring_test.cc b/tests/dist/algorithms/greedy_node_coloring_test.cc index a138d96d..1b9bab17 100644 --- a/tests/dist/algorithms/greedy_node_coloring_test.cc +++ b/tests/dist/algorithms/greedy_node_coloring_test.cc @@ -24,18 +24,16 @@ namespace kaminpar::dist { using namespace kaminpar::dist::testing; namespace { -template +template void validate_node_coloring( - const DistributedGraph &graph, + const Graph &graph, const Coloring &coloring, const ColorID max_num_colors = std::numeric_limits::max() ) { ASSERT_GE(coloring.size(), graph.total_n()); for (const NodeID u : graph.nodes()) { EXPECT_LT(coloring[u], max_num_colors); - for (const NodeID v : graph.adjacent_nodes(u)) { - EXPECT_NE(coloring[u], coloring[v]); - } + graph.adjacent_nodes(u, [&](const NodeID v) { EXPECT_NE(coloring[u], coloring[v]); }); } } } // namespace diff --git a/tests/dist/algorithms/independent_set_test.cc b/tests/dist/algorithms/independent_set_test.cc index 6c045aea..22514cbe 100644 --- a/tests/dist/algorithms/independent_set_test.cc +++ b/tests/dist/algorithms/independent_set_test.cc @@ -77,9 +77,7 @@ void expect_nonempty_independent_set( continue; } - for (const NodeID v : p_graph.adjacent_nodes(u)) { - EXPECT_FALSE(is_in_independent_set[v]); - } + p_graph.adjacent_nodes(u, [&](const NodeID v) { EXPECT_FALSE(is_in_independent_set[v]); }); } } } // namespace diff --git a/tests/dist/datastructures/distributed_compressed_graph_test.cc b/tests/dist/datastructures/distributed_compressed_graph_test.cc new file mode 100644 index 00000000..be4d782b --- /dev/null +++ b/tests/dist/datastructures/distributed_compressed_graph_test.cc @@ -0,0 +1,210 @@ +/******************************************************************************* + * @file: distributed_compressed_graph_test.cc + * @author: Daniel Salwasser + * @date: 08.06.2024 + * @brief: Unit tests for the distributed compressed graph. + ******************************************************************************/ +#include + +#include "tests/dist/distributed_graph_factories.h" + +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" +#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" + +#define TEST_ON_ALL_GRAPHS(test_function) \ + test_function(testing::make_csr_empty_graph()); \ + test_function(testing::make_csr_circle_graph()); \ + test_function(testing::make_csr_path(1000)); \ + test_function(testing::make_csr_isolated_nodes_graph(1000)); \ + test_function(testing::make_csr_isolated_edges_graph(1000)); \ + test_function(testing::make_csr_cut_edge_graph(1000)); \ + test_function(testing::make_csr_circle_clique_graph(1000)); \ + test_function(testing::make_csr_local_complete_graph(100)); \ + test_function(testing::make_csr_local_complete_bipartite_graph(100)); \ + test_function(testing::make_csr_global_complete_graph(100)); + +namespace kaminpar::dist { + +template static bool operator==(const IotaRange &a, const IotaRange &b) { + return a.begin() == b.begin() && a.end() == b.end(); +}; + +static void test_compressed_graph_size(const DistributedCSRGraph &graph) { + const mpi::PEID size = mpi::get_comm_size(graph.communicator()); + const mpi::PEID rank = mpi::get_comm_rank(graph.communicator()); + + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + EXPECT_EQ(graph.global_n(), compressed_graph.global_n()); + EXPECT_EQ(graph.global_m(), compressed_graph.global_m()); + + EXPECT_EQ(graph.n(), compressed_graph.n()); + EXPECT_EQ(graph.m(), compressed_graph.m()); + + EXPECT_EQ(graph.ghost_n(), compressed_graph.ghost_n()); + EXPECT_EQ(graph.total_n(), compressed_graph.total_n()); + + EXPECT_EQ(graph.offset_n(), compressed_graph.offset_n()); + EXPECT_EQ(graph.offset_m(), compressed_graph.offset_m()); + + for (mpi::PEID pe = 0; pe < size; ++pe) { + EXPECT_EQ(graph.n(pe), compressed_graph.n(pe)); + EXPECT_EQ(graph.m(pe), compressed_graph.m(pe)); + + EXPECT_EQ(graph.offset_n(pe), compressed_graph.offset_n(pe)); + EXPECT_EQ(graph.offset_m(pe), compressed_graph.offset_m(pe)); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_size) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_size); +} + +static void test_compressed_graph_node_ownership(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + for (const NodeID u : IotaRange(0, graph.global_n())) { + EXPECT_EQ(graph.is_owned_global_node(u), compressed_graph.is_owned_global_node(u)); + EXPECT_EQ(graph.contains_global_node(u), compressed_graph.contains_global_node(u)); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_node_ownership) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_node_ownership); +} + +static void test_compressed_graph_node_type(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + for (const NodeID u : graph.all_nodes()) { + EXPECT_EQ(graph.is_ghost_node(u), compressed_graph.is_ghost_node(u)); + EXPECT_EQ(graph.is_owned_node(u), compressed_graph.is_owned_node(u)); + EXPECT_EQ(graph.local_to_global_node(u), compressed_graph.local_to_global_node(u)); + } + + for (const NodeID u : graph.ghost_nodes()) { + EXPECT_EQ(graph.ghost_owner(u), compressed_graph.ghost_owner(u)); + } + + for (const NodeID u : IotaRange(0, graph.global_n())) { + if (graph.contains_global_node(u)) { + EXPECT_EQ(graph.global_to_local_node(u), compressed_graph.global_to_local_node(u)); + } + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_node_type) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_node_type); +} + +static void test_compressed_graph_iterators(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + EXPECT_TRUE(graph.nodes() == compressed_graph.nodes()); + EXPECT_TRUE(graph.ghost_nodes() == compressed_graph.ghost_nodes()); + EXPECT_TRUE(graph.all_nodes() == compressed_graph.all_nodes()); + + EXPECT_TRUE(graph.edges() == compressed_graph.edges()); + for (const NodeID u : graph.nodes()) { + EXPECT_TRUE(graph.incident_edges(u) == compressed_graph.incident_edges(u)); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_iterators) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_iterators); +} + +static void test_compressed_graph_cached_inter_pe_metrics(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + const mpi::PEID size = mpi::get_comm_size(graph.communicator()); + for (mpi::PEID pe = 0; pe < size; ++pe) { + EXPECT_EQ(graph.edge_cut_to_pe(pe), compressed_graph.edge_cut_to_pe(pe)); + EXPECT_EQ(graph.comm_vol_to_pe(pe), compressed_graph.comm_vol_to_pe(pe)); + } + + EXPECT_EQ(graph.communicator(), compressed_graph.communicator()); +} + +TEST(DistributedCompressedGraphTest, compressed_graph_cached_inter_pe_metrics) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_cached_inter_pe_metrics); +} + +static void test_compressed_graph_degree_operation(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + for (const NodeID u : graph.nodes()) { + EXPECT_EQ(graph.degree(u), compressed_graph.degree(u)); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_degree_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_degree_operation); +} + +static void test_compressed_graph_adjacent_nodes_operation(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + std::vector graph_neighbours; + std::vector compressed_graph_neighbours; + for (const NodeID u : graph.nodes()) { + graph.adjacent_nodes(u, [&](const NodeID v) { graph_neighbours.push_back(v); }); + + compressed_graph.adjacent_nodes(u, [&](const NodeID v) { + compressed_graph_neighbours.push_back(v); + }); + + EXPECT_EQ(graph_neighbours.size(), compressed_graph_neighbours.size()); + + std::sort(graph_neighbours.begin(), graph_neighbours.end()); + std::sort(compressed_graph_neighbours.begin(), compressed_graph_neighbours.end()); + EXPECT_TRUE(graph_neighbours == compressed_graph_neighbours); + + graph_neighbours.clear(); + compressed_graph_neighbours.clear(); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_adjacent_nodes_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_adjacent_nodes_operation); +} + +static void test_compressed_graph_neighbors_operation(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + std::vector graph_incident_edges; + std::vector graph_adjacent_node; + std::vector compressed_graph_incident_edges; + std::vector compressed_graph_adjacent_node; + for (const NodeID u : graph.nodes()) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph_incident_edges.push_back(e); + graph_adjacent_node.push_back(v); + }); + + compressed_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + compressed_graph_incident_edges.push_back(e); + compressed_graph_adjacent_node.push_back(v); + }); + + EXPECT_EQ(graph_incident_edges.size(), compressed_graph_incident_edges.size()); + + std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); + std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); + std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); + std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); + EXPECT_TRUE(graph_incident_edges == compressed_graph_incident_edges); + EXPECT_TRUE(graph_adjacent_node == compressed_graph_adjacent_node); + + graph_incident_edges.clear(); + graph_adjacent_node.clear(); + compressed_graph_incident_edges.clear(); + compressed_graph_adjacent_node.clear(); + } +} + +TEST(DistributedCompressedGraphTest, compressed_graph_neighbors_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_operation); +} + +} // namespace kaminpar::dist diff --git a/tests/dist/distributed_graph_builder.h b/tests/dist/distributed_graph_builder.h index 1b983467..3b4bc813 100644 --- a/tests/dist/distributed_graph_builder.h +++ b/tests/dist/distributed_graph_builder.h @@ -10,6 +10,7 @@ #include +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/ghost_node_mapper.h" #include "kaminpar-dist/datastructures/growt.h" #include "kaminpar-dist/dkaminpar.h" @@ -81,7 +82,7 @@ class Builder { return *this; } - DistributedGraph finalize() { + DistributedCSRGraph finalize() { _nodes.push_back(_edges.size()); // First step: use unit node weights for ghost nodes @@ -94,7 +95,7 @@ class Builder { const EdgeID m = _edges.size(); auto edge_distribution = mpi::build_distribution_from_local_count(m, _comm); - DistributedGraph graph( + DistributedCSRGraph graph( static_array::create(_node_distribution), static_array::create(edge_distribution), static_array::create(_nodes), @@ -105,7 +106,8 @@ class Builder { static_array::create(_ghost_to_global), build_static_ghost_node_mapping(_global_to_ghost), false, - _comm); + _comm + ); // If the graph does not have unit node weights, exchange ghost node weights // now diff --git a/tests/dist/distributed_graph_factories.h b/tests/dist/distributed_graph_factories.h index 911c35e2..2d0a9e95 100644 --- a/tests/dist/distributed_graph_factories.h +++ b/tests/dist/distributed_graph_factories.h @@ -14,6 +14,7 @@ #include "kaminpar-mpi/wrapper.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/distributed_graph.h" #include "kaminpar-dist/dkaminpar.h" @@ -22,9 +23,9 @@ namespace kaminpar::dist::testing { * Creates a distributed path with `num_nodes_per_pe` nodes per PE. * * @param num_nodes_per_pe Number of nodes per PE. - * @return Distributed graph with `num_nodes_per_pe` nodes per PE. + * @return Distributed CSR graph with `num_nodes_per_pe` nodes per PE. */ -inline DistributedGraph make_path(const NodeID num_nodes_per_pe) { +inline DistributedCSRGraph make_csr_path(const NodeID num_nodes_per_pe) { const auto [size, rank] = mpi::get_comm_info(MPI_COMM_WORLD); const NodeID n0 = num_nodes_per_pe * rank; @@ -44,13 +45,23 @@ inline DistributedGraph make_path(const NodeID num_nodes_per_pe) { return builder.finalize(); } +/*! + * Creates a distributed path with `num_nodes_per_pe` nodes per PE. + * + * @param num_nodes_per_pe Number of nodes per PE. + * @return Distributed graph with `num_nodes_per_pe` nodes per PE. + */ +inline DistributedGraph make_path(const NodeID num_nodes_per_pe) { + return {std::make_unique(make_csr_path(num_nodes_per_pe))}; +} + /*! * Creates a distributed circle with one node on each PE. * - * @return Distributed graph with one node on each PE, nodes are connected in a + * @return Distributed CSR graph with one node on each PE, nodes are connected in a * circle. */ -inline DistributedGraph make_circle_graph() { +inline DistributedCSRGraph make_csr_circle_graph() { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const PEID size = mpi::get_comm_size(MPI_COMM_WORLD); @@ -71,14 +82,24 @@ inline DistributedGraph make_circle_graph() { return builder.finalize(); } +/*! + * Creates a distributed circle with one node on each PE. + * + * @return Distributed graph with one node on each PE, nodes are connected in a + * circle. + */ +inline DistributedGraph make_circle_graph() { + return {std::make_unique(make_csr_circle_graph())}; +} + /*! * Creates a distributed graph with `num_nodes_per_pe` nodes per PE and zero * edges. * * @param num_nodes_per_pe Number of nodes on each PE. - * @return Distributed graph with `num_nodes_per_pe` nodes per PE. + * @return Distributed CSR graph with `num_nodes_per_pe` nodes per PE. */ -inline DistributedGraph make_isolated_nodes_graph(const NodeID num_nodes_per_pe) { +inline DistributedCSRGraph make_csr_isolated_nodes_graph(const NodeID num_nodes_per_pe) { graph::Builder builder(MPI_COMM_WORLD); builder.initialize(num_nodes_per_pe); for (NodeID u = 0; u < num_nodes_per_pe; ++u) { @@ -87,13 +108,33 @@ inline DistributedGraph make_isolated_nodes_graph(const NodeID num_nodes_per_pe) return builder.finalize(); } +/*! + * Creates a distributed graph with `num_nodes_per_pe` nodes per PE and zero + * edges. + * + * @param num_nodes_per_pe Number of nodes on each PE. + * @return Distributed graph with `num_nodes_per_pe` nodes per PE. + */ +inline DistributedGraph make_isolated_nodes_graph(const NodeID num_nodes_per_pe) { + return {std::make_unique(make_csr_isolated_nodes_graph(num_nodes_per_pe))}; +} + +/*! + * Creates a distributed graph without any nodes. + * + * @return Distributed CSR graph without any nodes. + */ +inline DistributedCSRGraph make_csr_empty_graph() { + return make_csr_isolated_nodes_graph(0); +} + /*! * Creates a distributed graph without any nodes. * * @return Distributed graph without any nodes. */ inline DistributedGraph make_empty_graph() { - return make_isolated_nodes_graph(0); + return {std::make_unique(make_csr_empty_graph())}; } /*! @@ -101,10 +142,10 @@ inline DistributedGraph make_empty_graph() { * each pair connected by an edge. * * @param num_edges_per_pe Number of edges on each PE, with distinct endpoints. - * @return Distributed graph with `2 * num_edges_per_pe` nodes and + * @return Distributed CSR graph with `2 * num_edges_per_pe` nodes and * `num_edges_per_pe` edges per PE. */ -inline DistributedGraph make_isolated_edges_graph(const NodeID num_edges_per_pe) { +inline DistributedCSRGraph make_csr_isolated_edges_graph(const NodeID num_edges_per_pe) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const NodeID n0 = rank * num_edges_per_pe * 2; @@ -119,7 +160,19 @@ inline DistributedGraph make_isolated_edges_graph(const NodeID num_edges_per_pe) return builder.finalize(); } -inline DistributedGraph make_local_complete_graph(const NodeID num_nodes_per_pe) { +/*! + * Creates a distributed graph with `2 * num_edges_per_pe` nodes on each PE, + * each pair connected by an edge. + * + * @param num_edges_per_pe Number of edges on each PE, with distinct endpoints. + * @return Distributed CSR graph with `2 * num_edges_per_pe` nodes and + * `num_edges_per_pe` edges per PE. + */ +inline DistributedGraph make_isolated_edges_graph(const NodeID num_edges_per_pe) { + return {std::make_unique(make_csr_isolated_edges_graph(num_edges_per_pe))}; +} + +inline DistributedCSRGraph make_csr_local_complete_graph(const NodeID num_nodes_per_pe) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const GlobalNodeID n0 = rank * num_nodes_per_pe; @@ -136,7 +189,11 @@ inline DistributedGraph make_local_complete_graph(const NodeID num_nodes_per_pe) return builder.finalize(); } -inline DistributedGraph make_local_complete_bipartite_graph(const NodeID set_size_per_pe) { +inline DistributedGraph make_local_complete_graph(const NodeID num_nodes_per_pe) { + return {std::make_unique(make_csr_local_complete_graph(num_nodes_per_pe))}; +} + +inline DistributedCSRGraph make_csr_local_complete_bipartite_graph(const NodeID set_size_per_pe) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const GlobalNodeID n0 = rank * set_size_per_pe * 2; @@ -156,7 +213,13 @@ inline DistributedGraph make_local_complete_bipartite_graph(const NodeID set_siz return builder.finalize(); } -inline DistributedGraph make_global_complete_graph(const NodeID nodes_per_pe) { +inline DistributedGraph make_local_complete_bipartite_graph(const NodeID set_size_per_pe) { + return { + std::make_unique(make_csr_local_complete_bipartite_graph(set_size_per_pe) + )}; +} + +inline DistributedCSRGraph make_csr_global_complete_graph(const NodeID nodes_per_pe) { const PEID size = mpi::get_comm_size(MPI_COMM_WORLD); const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const GlobalNodeID n0 = rank * nodes_per_pe; @@ -175,16 +238,20 @@ inline DistributedGraph make_global_complete_graph(const NodeID nodes_per_pe) { return builder.finalize(); } +inline DistributedGraph make_global_complete_graph(const NodeID nodes_per_pe) { + return {std::make_unique(make_csr_global_complete_graph(nodes_per_pe))}; +} + /*! * Creates a distributed graph with `num_nodes_per_pe` nodes on each PE. * The nodes on a single PE are connected to a clique. * Globally, nodes with the same local ID are connected to a circle. * * @param num_nodes_per_pe Number of nodes per PE. - * @return Distributed graph with a clique on `num_nodes_per_pe` nodes on each + * @return Distributed CSR graph with a clique on `num_nodes_per_pe` nodes on each * PE and `num_nodes_per_pe` global circles. */ -inline DistributedGraph make_circle_clique_graph(const NodeID num_nodes_per_pe) { +inline DistributedCSRGraph make_csr_circle_clique_graph(const NodeID num_nodes_per_pe) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const PEID size = mpi::get_comm_size(MPI_COMM_WORLD); @@ -219,6 +286,19 @@ inline DistributedGraph make_circle_clique_graph(const NodeID num_nodes_per_pe) return builder.finalize(); } +/*! + * Creates a distributed graph with `num_nodes_per_pe` nodes on each PE. + * The nodes on a single PE are connected to a clique. + * Globally, nodes with the same local ID are connected to a circle. + * + * @param num_nodes_per_pe Number of nodes per PE. + * @return Distributed graph with a clique on `num_nodes_per_pe` nodes on each + * PE and `num_nodes_per_pe` global circles. + */ +inline DistributedGraph make_circle_clique_graph(const NodeID num_nodes_per_pe) { + return {std::make_unique(make_csr_circle_clique_graph(num_nodes_per_pe))}; +} + /*! * Creates a distributed graph with `2 * num_nodes_per_pe` nodes on each PE, * that are connected to a node on the next / previous PE: @@ -228,9 +308,9 @@ inline DistributedGraph make_circle_clique_graph(const NodeID num_nodes_per_pe) * +-------------+ * * @param num_nodes_per_pe Number of nodes on each side of each PE. - * @return Distributed graph as described above. + * @return Distributed CSR graph as described above. */ -inline DistributedGraph make_cut_edge_graph(const NodeID num_nodes_per_pe) { +inline DistributedCSRGraph make_csr_cut_edge_graph(const NodeID num_nodes_per_pe) { const PEID rank = mpi::get_comm_rank(MPI_COMM_WORLD); const PEID size = mpi::get_comm_size(MPI_COMM_WORLD); @@ -267,4 +347,19 @@ inline DistributedGraph make_cut_edge_graph(const NodeID num_nodes_per_pe) { return builder.finalize(); } + +/*! + * Creates a distributed graph with `2 * num_nodes_per_pe` nodes on each PE, + * that are connected to a node on the next / previous PE: + * + * O O-#-O O-#-O O + * | ####### | + * +-------------+ + * + * @param num_nodes_per_pe Number of nodes on each side of each PE. + * @return Distributed graph as described above. + */ +inline DistributedGraph make_cut_edge_graph(const NodeID num_nodes_per_pe) { + return {std::make_unique(make_csr_cut_edge_graph(num_nodes_per_pe))}; +} } // namespace kaminpar::dist::testing diff --git a/tests/dist/distributed_graph_helpers.h b/tests/dist/distributed_graph_helpers.h index da39b85e..a3171097 100644 --- a/tests/dist/distributed_graph_helpers.h +++ b/tests/dist/distributed_graph_helpers.h @@ -10,6 +10,7 @@ #include "kaminpar-mpi/wrapper.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" #include "kaminpar-dist/datastructures/distributed_graph.h" #include "kaminpar-dist/datastructures/distributed_partitioned_graph.h" #include "kaminpar-dist/dkaminpar.h" @@ -22,25 +23,21 @@ namespace kaminpar::dist::testing { inline std::vector local_neighbors(const shm::Graph &graph, const NodeID u) { std::vector neighbors; - for (const auto &[e, v] : graph.neighbors(u)) { - neighbors.push_back(v); - } + graph.adjacent_nodes(u, [&](const NodeID v) { neighbors.push_back(v); }); return neighbors; } inline std::vector local_neighbors(const DistributedGraph &graph, const NodeID u) { std::vector neighbors; - for (const auto &[e, v] : graph.neighbors(u)) { - neighbors.push_back(v); - } + graph.adjacent_nodes(u, [&](const NodeID v) { neighbors.push_back(v); }); return neighbors; } inline std::vector global_neighbors(const DistributedGraph &graph, const NodeID u) { std::vector neighbors; - for (const auto &[e, v] : graph.neighbors(u)) { + graph.adjacent_nodes(u, [&](const NodeID v) { neighbors.push_back(graph.local_to_global_node(v)); - } + }); return neighbors; } @@ -87,27 +84,32 @@ inline DistributedPartitionedGraph make_partitioned_graph_by_rank(const Distribu //! Return the id of the edge connecting two adjacent nodes \c u and \c v in \c //! graph, found by linear search. +template inline std::pair -get_edge_by_endpoints(const DistributedGraph &graph, const NodeID u, const NodeID v) { +get_edge_by_endpoints(const Graph &graph, const NodeID u, const NodeID v) { EdgeID forward_edge = kInvalidEdgeID; EdgeID backward_edge = kInvalidEdgeID; if (graph.is_owned_node(u)) { - for (const auto [cur_e, cur_v] : graph.neighbors(u)) { + graph.neighbors(u, [&](const EdgeID cur_e, const NodeID cur_v) { if (cur_v == v) { forward_edge = cur_e; - break; + return true; } - } + + return false; + }); } if (graph.is_owned_node(v)) { - for (const auto [cur_e, cur_u] : graph.neighbors(v)) { + graph.neighbors(u, [&](const EdgeID cur_e, const NodeID cur_u) { if (cur_u == u) { backward_edge = cur_e; - break; + return true; } - } + + return false; + }); } // one of those edges might now exist due to ghost nodes @@ -116,15 +118,15 @@ get_edge_by_endpoints(const DistributedGraph &graph, const NodeID u, const NodeI //! Return the id of the edge connecting two adjacent nodes \c u and \c v given //! by their global id in \c graph, found by linear search -inline std::pair get_edge_by_endpoints_global( - const DistributedGraph &graph, const GlobalNodeID u, const GlobalNodeID v -) { +template +inline std::pair +get_edge_by_endpoints_global(const Graph &graph, const GlobalNodeID u, const GlobalNodeID v) { return get_edge_by_endpoints(graph, graph.global_to_local_node(u), graph.global_to_local_node(v)); } //! Based on some graph, build a new graph with modified edge weights. inline DistributedGraph change_edge_weights( - DistributedGraph graph, const std::vector> &changes + DistributedCSRGraph graph, const std::vector> &changes ) { auto edge_weights = graph.take_edge_weights(); if (edge_weights.empty()) { @@ -137,7 +139,7 @@ inline DistributedGraph change_edge_weights( } } - return { + return {std::make_unique( graph.take_node_distribution(), graph.take_edge_distribution(), graph.take_nodes(), @@ -148,11 +150,12 @@ inline DistributedGraph change_edge_weights( graph.take_ghost_to_global(), graph.take_global_to_ghost(), false, - graph.communicator()}; + graph.communicator() + )}; } inline DistributedGraph change_edge_weights_by_endpoints( - DistributedGraph graph, const std::vector> &changes + DistributedCSRGraph graph, const std::vector> &changes ) { std::vector> edge_id_changes; for (const auto &[u, v, weight] : changes) { @@ -165,7 +168,7 @@ inline DistributedGraph change_edge_weights_by_endpoints( } inline DistributedGraph change_edge_weights_by_global_endpoints( - DistributedGraph graph, + DistributedCSRGraph graph, const std::vector> &changes ) { std::vector> edge_id_changes; @@ -182,7 +185,7 @@ inline DistributedGraph change_edge_weights_by_global_endpoints( //! Based on some graph, build a new graph with modified node weights. inline DistributedGraph change_node_weights( - DistributedGraph graph, const std::vector> &changes + DistributedCSRGraph graph, const std::vector> &changes ) { auto node_weights = graph.take_node_weights(); if (node_weights.empty()) { @@ -193,7 +196,7 @@ inline DistributedGraph change_node_weights( node_weights[u] = weight; } - return { + return {std::make_unique( graph.take_node_distribution(), graph.take_edge_distribution(), graph.take_nodes(), @@ -204,6 +207,7 @@ inline DistributedGraph change_node_weights( graph.take_ghost_to_global(), graph.take_global_to_ghost(), false, - graph.communicator()}; + graph.communicator() + )}; } } // namespace kaminpar::dist::testing diff --git a/tests/dist/graphutils/block_extractor_test.cc b/tests/dist/graphutils/block_extractor_test.cc index d45e7951..698ded4b 100644 --- a/tests/dist/graphutils/block_extractor_test.cc +++ b/tests/dist/graphutils/block_extractor_test.cc @@ -372,17 +372,17 @@ TEST(GlobalGraphExtractionTest, extract_node_weights_in_circle_clique_graph) { const auto [size, rank] = mpi::get_comm_info(MPI_COMM_WORLD); // create clique/circle graph with rank as node weight - auto graph = make_circle_clique_graph(2 * size); + auto csr_graph = make_csr_circle_clique_graph(2 * size); std::vector> node_weights; std::vector local_partition; - for (const NodeID u : graph.nodes()) { + for (const NodeID u : csr_graph.nodes()) { node_weights.emplace_back(u, rank + 1); local_partition.push_back(u); } - for (const NodeID u : graph.ghost_nodes()) { - node_weights.emplace_back(u, graph.ghost_owner(u) + 1); + for (const NodeID u : csr_graph.ghost_nodes()) { + node_weights.emplace_back(u, csr_graph.ghost_owner(u) + 1); } - graph = change_node_weights(std::move(graph), node_weights); + auto graph = change_node_weights(std::move(csr_graph), node_weights); auto p_graph = make_partitioned_graph(graph, 2 * size, local_partition); auto subgraphs = extract_global_subgraphs(p_graph); @@ -409,13 +409,13 @@ TEST(GlobalGraphExtractionTest, extract_local_edge_weights_in_circle_clique_grap const auto [size, rank] = mpi::get_comm_info(MPI_COMM_WORLD); // create clique/circle graph with rank as node weight - auto graph = make_circle_clique_graph(2); + auto csr_graph = make_csr_circle_clique_graph(2); std::vector> edge_weights; edge_weights.emplace_back(0, 1, rank); edge_weights.emplace_back(1, 0, rank); - graph = change_edge_weights_by_endpoints(std::move(graph), edge_weights); + auto graph = change_edge_weights_by_endpoints(std::move(csr_graph), edge_weights); auto p_graph = make_partitioned_graph_by_rank(graph); auto subgraphs = extract_global_subgraphs(p_graph); @@ -601,17 +601,17 @@ TEST(GlobalGraphExtractionBlockAssignment, test_first_block_computation_P7_k3) { TEST(GlobalGraphExtractionTest, extract_from_circle_clique_graph_fewer_blocks_than_pes) { const auto [size, rank] = mpi::get_comm_info(MPI_COMM_WORLD); - auto graph = make_circle_clique_graph(size / 2); + auto csr_graph = make_csr_circle_clique_graph(size / 2); std::vector local_partition(size / 2); std::iota(local_partition.begin(), local_partition.end(), 0); // Use global node IDs as node weights std::vector> node_weights; - for (const NodeID u : graph.all_nodes()) { - node_weights.emplace_back(u, graph.local_to_global_node(u) + 1); + for (const NodeID u : csr_graph.all_nodes()) { + node_weights.emplace_back(u, csr_graph.local_to_global_node(u) + 1); } - graph = change_node_weights(std::move(graph), node_weights); + auto graph = change_node_weights(std::move(csr_graph), node_weights); auto p_graph = make_partitioned_graph(graph, size / 2, local_partition); auto subgraphs = extract_global_subgraphs(p_graph); diff --git a/tests/dist/graphutils/rearrangement_test.cc b/tests/dist/graphutils/rearrangement_test.cc index 13bb1d17..db5ba305 100644 --- a/tests/dist/graphutils/rearrangement_test.cc +++ b/tests/dist/graphutils/rearrangement_test.cc @@ -19,7 +19,7 @@ using namespace kaminpar::dist::testing; TEST(GraphRearrangementTest, sort_path_by_degree_buckets) { const auto [size, rank] = mpi::get_comm_info(MPI_COMM_WORLD); - auto graph = make_path(2); // two nodes per PE + auto graph = make_csr_path(2); // two nodes per PE auto sorted_graph = graph::rearrange_by_degree_buckets(std::move(graph)); // Check weights From 6cef2650c8eedde33a4ec9dac6ae1b1566d5346a Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 15 Jun 2024 16:08:47 +0200 Subject: [PATCH 02/54] feat(kaminpar-dist): templatize code and downcast to avoid virtual function calls --- .../algorithms/greedy_node_coloring.cc | 206 --- .../clustering/hem/hem_clusterer.cc | 804 ++++++----- .../coarsening/clustering/hem/hem_clusterer.h | 24 +- .../clustering/hem/hem_lp_clusterer.cc | 12 +- .../clustering/lp/global_lp_clusterer.cc | 58 +- .../clustering/lp/global_lp_clusterer.h | 2 +- .../clustering/lp/local_lp_clusterer.cc | 63 +- .../clustering/lp/local_lp_clusterer.h | 2 +- .../contraction/global_cluster_contraction.cc | 182 ++- .../contraction/local_cluster_contraction.cc | 21 +- .../distributed_compressed_graph.h | 2 +- .../datastructures/distributed_graph.h | 17 +- .../distributed_partitioned_graph.h | 1 + kaminpar-dist/distributed_label_propagation.h | 19 +- .../refinement/balancer/node_balancer.cc | 1185 +++++++++-------- .../refinement/balancer/node_balancer.h | 74 - kaminpar-dist/refinement/gain_calculator.h | 20 +- kaminpar-dist/refinement/jet/jet_refiner.cc | 642 ++++----- kaminpar-dist/refinement/jet/jet_refiner.h | 45 - kaminpar-dist/refinement/lp/lp_refiner.cc | 86 +- kaminpar-dist/refinement/lp/lp_refiner.h | 4 +- 21 files changed, 1734 insertions(+), 1735 deletions(-) delete mode 100644 kaminpar-dist/algorithms/greedy_node_coloring.cc diff --git a/kaminpar-dist/algorithms/greedy_node_coloring.cc b/kaminpar-dist/algorithms/greedy_node_coloring.cc deleted file mode 100644 index ebac098c..00000000 --- a/kaminpar-dist/algorithms/greedy_node_coloring.cc +++ /dev/null @@ -1,206 +0,0 @@ -/******************************************************************************* - * Basic implementation of a distributed vertex coloring algorithm. - * - * @file: greedy_node_coloring.cc - * @author: Daniel Seemaier - * @date: 11.11.2022 - ******************************************************************************/ -#include "kaminpar-dist/algorithms/greedy_node_coloring.h" - -/* -#include "kaminpar-mpi/wrapper.h" - -#include "kaminpar-dist/datastructures/distributed_csr_graph.h" -#include "kaminpar-dist/graphutils/communication.h" - -#include "kaminpar-common/assert.h" -#include "kaminpar-common/datastructures/marker.h" -#include "kaminpar-common/datastructures/noinit_vector.h" -#include "kaminpar-common/logger.h" -#include "kaminpar-common/math.h" -#include "kaminpar-common/parallel/algorithm.h" -#include "kaminpar-common/ranges.h" -#include "kaminpar-common/timer.h" - -namespace kaminpar::dist { -namespace { -SET_DEBUG(false); -} - -template -NoinitVector -compute_node_coloring_sequentially(const Graph &graph, const NodeID number_of_supersteps) { - KASSERT(number_of_supersteps > 0u, "bad parameter", assert::light); - SCOPED_TIMER("Compute greedy node coloring"); - - // Initialize coloring to 0 == no color picked yet - NoinitVector coloring(graph.total_n()); - graph.pfor_all_nodes([&](const NodeID u) { coloring[u] = 0; }); - - // Use max degree in the graph as an upper bound on the number of colors - // required - TransformedIotaRange degrees(static_cast(0), graph.n(), [&](const NodeID u) { - return graph.degree(u); - }); - const EdgeID max_degree = parallel::max_element(degrees.begin(), degrees.end()); - const ColorID max_colors = mpi::allreduce(max_degree, MPI_MAX, graph.communicator()) + 1; - - // Marker to keep track of the colors already incident to the current node - Marker<> incident_colors(max_colors); - - // Keep track of nodes that still need a color - NoinitVector active(graph.n()); - graph.pfor_nodes([&](const NodeID u) { active[u] = 1; }); - - bool converged; - do { - converged = true; - - for (NodeID superstep = 0; superstep < number_of_supersteps; ++superstep) { - const auto [from, to] = math::compute_local_range(graph.n(), number_of_supersteps, superstep); - - // Color all nodes in [from, to) - for (const NodeID u : graph.nodes(from, to)) { - if (!active[u]) { - continue; - } - - bool is_interface_node = false; - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { - is_interface_node = is_interface_node || graph.is_ghost_node(v); - - // @todo replace v < u with random numbers r(v) < r(u) - if (coloring[v] != 0 && (coloring[u] == 0 || !(coloring[v] == coloring[u] && - graph.local_to_global_node(u) < - graph.local_to_global_node(v)))) { - incident_colors.set(coloring[v] - 1); - } - }); - - if (coloring[u] == 0) { - coloring[u] = incident_colors.first_unmarked_element() + 1; - DBGC(u == 156543 || u == 262712) << "setting " << u << " to " << coloring[u] << " A"; - if (!is_interface_node) { - active[u] = 0; - } - } else if (incident_colors.get(coloring[u] - 1)) { - coloring[u] = incident_colors.first_unmarked_element() + 1; - DBGC(u == 156543 || u == 262712 || graph.local_to_global_node(u) == 681015) - << "setting " << u << " to " << coloring[u] << " B, global " - << graph.local_to_global_node(u); - } else { - active[u] = 0; - } - - incident_colors.reset(); - } - - // Synchronize coloring of interface <-> ghost nodes - struct Message { - NodeID node; - ColorID color; - }; - - mpi::graph::sparse_alltoall_interface_to_pe( - graph, - from, - to, - [&](const NodeID u) { return active[u]; }, - [&](const NodeID u) -> Message { - DBGC(u == 156543) << "Sending " << u << " --> " << coloring[u]; - return {.node = u, .color = coloring[u]}; - }, - [&](const auto &recv_buffer, const PEID pe) { - converged &= recv_buffer.empty(); - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [local_node_on_pe, color] = recv_buffer[i]; - const GlobalNodeID global_node = - static_cast(graph.offset_n(pe) + local_node_on_pe); - const NodeID local_node = graph.global_to_local_node(global_node); - coloring[local_node] = color; - DBGC(local_node == 156543 || local_node == 262712) - << "setting " << local_node << " to " << coloring[local_node] << " C, global " - << graph.local_to_global_node(local_node); - }); - } - ); - } - } while (!mpi::allreduce(converged, MPI_LAND, graph.communicator())); - - // Check that all nodes have a color assigned (i.e., coloring[u] >= 1) - KASSERT( - [&] { - for (const NodeID u : graph.all_nodes()) { - if (coloring[u] == 0) { - return false; - } - } - return true; - }(), - "node coloring is incomplete", - assert::heavy - ); - - // Check that adjacent nodes have different colores - KASSERT( - [&] { - for (const NodeID u : graph.nodes()) { - bool fail = false; - - graph.adjacent_nodes(u, [&](const NodeID v) { - if (coloring[u] == coloring[v]) { - LOG_WARNING << "bad color for node " << u << " with neighbor " << v << ": " - << coloring[u]; - fail = true; - } - - return fail; - }); - - if (fail) { - return false; - } - } - return true; - }(), - "local node coloring is invalid", - assert::heavy - ); - - // Check that interface and ghost nodes have the same colors - KASSERT( - [&] { - struct Message { - GlobalNodeID node; - ColorID color; - }; - bool inconsistent = false; - mpi::graph::sparse_alltoall_interface_to_pe( - graph, - [&](const NodeID u) -> Message { - return {.node = graph.local_to_global_node(u), .color = coloring[u]}; - }, - [&](const auto &recv_buffer) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [node, color] = recv_buffer[i]; - const NodeID local_node = graph.global_to_local_node(node); - if (coloring[local_node] != color) { - inconsistent = true; - } - }); - } - ); - return !inconsistent; - }(), - "global node coloring inconsistent", - assert::heavy - ); - - // Make colors start at 0 - tbb::parallel_for(0, graph.total_n(), [&](const NodeID u) { coloring[u] -= 1; }); - - return coloring; -} - -} // namespace kaminpar::dist -*/ diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc index 521e6502..830a5269 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc @@ -18,437 +18,505 @@ namespace { SET_DEBUG(true); } -HEMClusterer::HEMClusterer(const Context &ctx) : _input_ctx(ctx), _ctx(ctx.coarsening.hem) {} - -void HEMClusterer::initialize_coloring() { - SCOPED_TIMER("Initialize HEM clustering"); - - const auto coloring = [&] { - // Graph is already sorted by a coloring -> reconstruct this coloring - // @todo if we always want to do this, optimize this refiner - if (_graph->color_sorted()) { - LOG << "Graph sorted by colors: using precomputed coloring"; - - // We do not actually need the colors for ghost nodes - NoinitVector coloring(_graph->n()); - - // @todo parallelize - NodeID pos = 0; - for (ColorID c = 0; c < _graph->number_of_colors(); ++c) { - const std::size_t size = _graph->color_size(c); - std::fill(coloring.begin() + pos, coloring.begin() + pos + size, c); - pos += size; - } +// +// Implementation +// - return coloring; - } +template class HEMClustererImpl { +public: + HEMClustererImpl(const Context &ctx) : _input_ctx(ctx), _ctx(ctx.coarsening.hem) {} - // Otherwise, compute a coloring now - LOG << "Computing new coloring"; - return compute_node_coloring_sequentially(*_graph, _ctx.chunks.compute(_input_ctx.parallel)); - }(); + void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { + _max_cluster_weight = max_cluster_weight; + } - const ColorID num_local_colors = *std::max_element(coloring.begin(), coloring.end()) + 1; - const ColorID num_colors = mpi::allreduce(num_local_colors, MPI_MAX, _graph->communicator()); + void cluster(StaticArray &matching, const Graph &graph) { + _matching = std::move(matching); + _graph = &graph; - TIMED_SCOPE("Allocation") { - _color_sorted_nodes.resize(_graph->n()); - _color_sizes.resize(num_colors + 1); - _color_blacklist.resize(num_colors); - tbb::parallel_for(0, _color_sorted_nodes.size(), [&](const std::size_t i) { - _color_sorted_nodes[i] = 0; - }); - tbb::parallel_for(0, _color_sizes.size(), [&](const std::size_t i) { - _color_sizes[i] = 0; - }); - tbb::parallel_for(0, _color_blacklist.size(), [&](const std::size_t i) { - _color_blacklist[i] = 0; + initialize_coloring(); + + SCOPED_TIMER("Compute HEM clustering"); + + tbb::parallel_for(0, graph.total_n(), [&](const NodeID u) { + matching[u] = kInvalidGlobalNodeID; }); - }; - - TIMED_SCOPE("Count color sizes") { - if (_graph->color_sorted()) { - const auto &color_sizes = _graph->get_color_sizes(); - _color_sizes.assign(color_sizes.begin(), color_sizes.end()); - } else { - _graph->pfor_nodes([&](const NodeID u) { - const ColorID c = coloring[u]; - KASSERT(c < num_colors); - __atomic_fetch_add(&_color_sizes[c], 1, __ATOMIC_RELAXED); - }); - parallel::prefix_sum(_color_sizes.begin(), _color_sizes.end(), _color_sizes.begin()); - } - }; - - TIMED_SCOPE("Sort nodes") { - if (_graph->color_sorted()) { - // @todo parallelize - std::iota(_color_sorted_nodes.begin(), _color_sorted_nodes.end(), 0); - } else { - _graph->pfor_nodes([&](const NodeID u) { - const ColorID c = coloring[u]; - const std::size_t i = __atomic_sub_fetch(&_color_sizes[c], 1, __ATOMIC_SEQ_CST); - KASSERT(i < _color_sorted_nodes.size()); - _color_sorted_nodes[i] = u; - }); - } - }; - TIMED_SCOPE("Compute color blacklist") { - if (_ctx.small_color_blacklist == 0 || - (_ctx.only_blacklist_input_level && - _graph->global_n() != _input_ctx.partition.graph->global_n)) { - return; + for (ColorID c = 0; c + 1 < _color_sizes.size(); ++c) { + compute_local_matching(c, _max_cluster_weight); + resolve_global_conflicts(c); } - NoinitVector global_color_sizes(num_colors); - tbb::parallel_for(0, num_colors, [&](const ColorID c) { - global_color_sizes[c] = _color_sizes[c + 1] - _color_sizes[c]; + _graph->pfor_all_nodes([&](const NodeID u) { + if (matching[u] == kInvalidGlobalNodeID) { + matching[u] = _graph->local_to_global_node(u); + } }); - MPI_Allreduce( - MPI_IN_PLACE, - global_color_sizes.data(), - asserting_cast(num_colors), - mpi::type::get(), - MPI_SUM, - _graph->communicator() - ); - // @todo parallelize the rest of this section - std::vector sorted_by_size(num_colors); - std::iota(sorted_by_size.begin(), sorted_by_size.end(), 0); - std::sort( - sorted_by_size.begin(), - sorted_by_size.end(), - [&](const ColorID lhs, const ColorID rhs) { - return global_color_sizes[lhs] < global_color_sizes[rhs]; + KASSERT(validate_matching(), "matching in inconsistent state", assert::always); + + matching = std::move(_matching); + } + +private: + void initialize_coloring() { + SCOPED_TIMER("Initialize HEM clustering"); + + const auto coloring = [&] { + // Graph is already sorted by a coloring -> reconstruct this coloring + // @todo if we always want to do this, optimize this refiner + if (_graph->color_sorted()) { + LOG << "Graph sorted by colors: using precomputed coloring"; + + // We do not actually need the colors for ghost nodes + NoinitVector coloring(_graph->n()); + + // @todo parallelize + NodeID pos = 0; + for (ColorID c = 0; c < _graph->number_of_colors(); ++c) { + const std::size_t size = _graph->color_size(c); + std::fill(coloring.begin() + pos, coloring.begin() + pos + size, c); + pos += size; } - ); - GlobalNodeID excluded_so_far = 0; - for (const ColorID c : sorted_by_size) { - excluded_so_far += global_color_sizes[c]; - const double percentage = 1.0 * excluded_so_far / _graph->global_n(); - if (percentage <= _ctx.small_color_blacklist) { - _color_blacklist[c] = 1; - } else { - break; + return coloring; } - } - }; - KASSERT(_color_sizes.front() == 0u); - KASSERT(_color_sizes.back() == _graph->n()); -} + // Otherwise, compute a coloring now + LOG << "Computing new coloring"; + return compute_node_coloring_sequentially(*_graph, _ctx.chunks.compute(_input_ctx.parallel)); + }(); -void HEMClusterer::set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { - _max_cluster_weight = max_cluster_weight; -} + const ColorID num_local_colors = *std::max_element(coloring.begin(), coloring.end()) + 1; + const ColorID num_colors = mpi::allreduce(num_local_colors, MPI_MAX, _graph->communicator()); -void HEMClusterer::cluster(StaticArray &matching, const DistributedGraph &graph) { - _matching = std::move(matching); - _graph = &graph; + TIMED_SCOPE("Allocation") { + _color_sorted_nodes.resize(_graph->n()); + _color_sizes.resize(num_colors + 1); + _color_blacklist.resize(num_colors); + tbb::parallel_for(0, _color_sorted_nodes.size(), [&](const std::size_t i) { + _color_sorted_nodes[i] = 0; + }); + tbb::parallel_for(0, _color_sizes.size(), [&](const std::size_t i) { + _color_sizes[i] = 0; + }); + tbb::parallel_for(0, _color_blacklist.size(), [&](const std::size_t i) { + _color_blacklist[i] = 0; + }); + }; + + TIMED_SCOPE("Count color sizes") { + if (_graph->color_sorted()) { + const auto &color_sizes = _graph->get_color_sizes(); + _color_sizes.assign(color_sizes.begin(), color_sizes.end()); + } else { + _graph->pfor_nodes([&](const NodeID u) { + const ColorID c = coloring[u]; + KASSERT(c < num_colors); + __atomic_fetch_add(&_color_sizes[c], 1, __ATOMIC_RELAXED); + }); + parallel::prefix_sum(_color_sizes.begin(), _color_sizes.end(), _color_sizes.begin()); + } + }; + + TIMED_SCOPE("Sort nodes") { + if (_graph->color_sorted()) { + // @todo parallelize + std::iota(_color_sorted_nodes.begin(), _color_sorted_nodes.end(), 0); + } else { + _graph->pfor_nodes([&](const NodeID u) { + const ColorID c = coloring[u]; + const std::size_t i = __atomic_sub_fetch(&_color_sizes[c], 1, __ATOMIC_SEQ_CST); + KASSERT(i < _color_sorted_nodes.size()); + _color_sorted_nodes[i] = u; + }); + } + }; - initialize_coloring(); + TIMED_SCOPE("Compute color blacklist") { + if (_ctx.small_color_blacklist == 0 || + (_ctx.only_blacklist_input_level && + _graph->global_n() != _input_ctx.partition.graph->global_n)) { + return; + } - SCOPED_TIMER("Compute HEM clustering"); + NoinitVector global_color_sizes(num_colors); + tbb::parallel_for(0, num_colors, [&](const ColorID c) { + global_color_sizes[c] = _color_sizes[c + 1] - _color_sizes[c]; + }); + MPI_Allreduce( + MPI_IN_PLACE, + global_color_sizes.data(), + asserting_cast(num_colors), + mpi::type::get(), + MPI_SUM, + _graph->communicator() + ); - tbb::parallel_for(0, graph.total_n(), [&](const NodeID u) { - matching[u] = kInvalidGlobalNodeID; - }); + // @todo parallelize the rest of this section + std::vector sorted_by_size(num_colors); + std::iota(sorted_by_size.begin(), sorted_by_size.end(), 0); + std::sort( + sorted_by_size.begin(), + sorted_by_size.end(), + [&](const ColorID lhs, const ColorID rhs) { + return global_color_sizes[lhs] < global_color_sizes[rhs]; + } + ); + + GlobalNodeID excluded_so_far = 0; + for (const ColorID c : sorted_by_size) { + excluded_so_far += global_color_sizes[c]; + const double percentage = 1.0 * excluded_so_far / _graph->global_n(); + if (percentage <= _ctx.small_color_blacklist) { + _color_blacklist[c] = 1; + } else { + break; + } + } + }; - for (ColorID c = 0; c + 1 < _color_sizes.size(); ++c) { - compute_local_matching(c, _max_cluster_weight); - resolve_global_conflicts(c); + KASSERT(_color_sizes.front() == 0u); + KASSERT(_color_sizes.back() == _graph->n()); } - _graph->pfor_all_nodes([&](const NodeID u) { - if (matching[u] == kInvalidGlobalNodeID) { - matching[u] = _graph->local_to_global_node(u); - } - }); + void compute_local_matching(ColorID c, GlobalNodeWeight max_cluster_weight) { + const NodeID seq_from = _color_sizes[c]; + const NodeID seq_to = _color_sizes[c + 1]; - KASSERT(validate_matching(), "matching in inconsistent state", assert::always); + _graph->pfor_nodes(seq_from, seq_to, [&](const NodeID seq_u) { + const NodeID u = _color_sorted_nodes[seq_u]; + if (_matching[u] != kInvalidGlobalNodeID) { + return; // Node already matched + } - matching = std::move(_matching); -} + const NodeWeight u_weight = _graph->node_weight(u); -bool HEMClusterer::validate_matching() { - for (const NodeID u : _graph->nodes()) { - const GlobalNodeID u_partner = _matching[u]; - - KASSERT(_graph->contains_global_node(u_partner), "invalid matching partner for node " << u); - if (_graph->is_owned_global_node(u_partner)) { - const NodeID local_partner = _graph->global_to_local_node(u_partner); - const GlobalNodeID u_global = _graph->local_to_global_node(u); - KASSERT( - u == local_partner || _matching[local_partner] == u_partner, - "invalid clustering structure for node " - << u << " (global " << u_global << ") matched to node " << local_partner - << ", which is matched to global node " << _matching[local_partner] - ); - } - } + // @todo if matching fails due to a race condition, we could try again - // Check matched edges between PEs - struct MatchedEdge { - GlobalNodeID u; - GlobalNodeID v; - }; - mpi::graph::sparse_alltoall_interface_to_ghost( - *_graph, - [&](const NodeID u, EdgeID, const NodeID v) -> bool { - return _matching[u] == _graph->local_to_global_node(v); - }, - [&](const NodeID u, EdgeID, NodeID) -> MatchedEdge { - return {_graph->local_to_global_node(u), _matching[u]}; - }, - [&](const auto &r, const PEID pe) { - for (const auto &[u, v] : r) { - KASSERT(_graph->contains_global_node(u)); - KASSERT( - _graph->is_owned_global_node(v), "PE " << pe << " thinks that this PE owns " << v - ); - const NodeID local_u = _graph->global_to_local_node(u); - const NodeID local_v = _graph->global_to_local_node(v); - - KASSERT( - _matching[local_v] == v, - "invalid clustering structure for edge " - << u << " <-> " << v << " (local " << local_u << " <-> " << local_v - << "): expected " << v << " to be the leader, but " << v << " is in cluster " - << _matching[local_v] - ); + NodeID best_neighbor = 0; + EdgeWeight best_weight = 0; + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + // v already matched? + if (_matching[v] != kInvalidGlobalNodeID) { + return; } - } - ); - return true; -} + // v too heavy? + const NodeWeight v_weight = _graph->node_weight(v); + if (u_weight + v_weight > max_cluster_weight && !_ctx.ignore_weight_limit) { + return; + } -void HEMClusterer::compute_local_matching( - const ColorID c, const GlobalNodeWeight max_cluster_weight -) { - const NodeID seq_from = _color_sizes[c]; - const NodeID seq_to = _color_sizes[c + 1]; - _graph->pfor_nodes(seq_from, seq_to, [&](const NodeID seq_u) { - const NodeID u = _color_sorted_nodes[seq_u]; - if (_matching[u] != kInvalidGlobalNodeID) { - return; // Node already matched - } + // Already found a better neighbor? + const EdgeWeight e_weight = _graph->edge_weight(e); + if (e_weight < best_weight) { + return; + } + + // Match with v + best_weight = e_weight; + best_neighbor = v; + }); - const NodeWeight u_weight = _graph->node_weight(u); + // If we found a good neighbor, try to match with it + if (best_weight > 0) { + const GlobalNodeID neighbor_global = _graph->local_to_global_node(best_neighbor); + GlobalNodeID unmatched = kInvalidGlobalNodeID; + if (__atomic_compare_exchange_n( + &_matching[best_neighbor], + &unmatched, + neighbor_global, + true, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST + )) { + // @todo if we merge small colors, also use CAS to match our own node + // and revert matching of best_neighbor if our CAS failed + __atomic_store_n(&_matching[u], neighbor_global, __ATOMIC_RELAXED); + } + } + }); + } - // @todo if matching fails due to a race condition, we could try again + void resolve_global_conflicts(ColorID c) { + struct MatchRequest { + NodeID mine; + NodeID theirs; + EdgeWeight weight; + }; + + const NodeID seq_from = _color_sizes[c]; + const NodeID seq_to = _color_sizes[c + 1]; + + // @todo avoid O(m), use same "trick" as below? + auto all_requests = + mpi::graph::sparse_alltoall_interface_to_ghost_custom_range_get( + *_graph, + seq_from, + seq_to, + [&](const NodeID seq_u) { return _color_sorted_nodes[seq_u]; }, + [&](const NodeID u, EdgeID, const NodeID v) { + return _matching[u] == _graph->local_to_global_node(v); + }, + [&](const NodeID u, const EdgeID e, const NodeID v, const PEID pe) -> MatchRequest { + const GlobalNodeID v_global = _graph->local_to_global_node(v); + const NodeID their_v = static_cast(v_global - _graph->offset_n(pe)); + return {u, their_v, _graph->edge_weight(e)}; + } + ); + + parallel::chunked_for(all_requests, [&](MatchRequest &req, PEID) { + std::swap(req.theirs, req.mine); // Swap roles of theirs and mine + + if (_matching[req.mine] != kInvalidGlobalNodeID) { + req.mine = kInvalidNodeID; // Reject: local node matched to node + } + }); - NodeID best_neighbor = 0; - EdgeWeight best_weight = 0; - _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { - // v already matched? - if (_matching[v] != kInvalidGlobalNodeID) { + parallel::chunked_for(all_requests, [&](MatchRequest &req, const PEID pe) { + if (req.mine == kInvalidNodeID) { return; } - // v too heavy? - const NodeWeight v_weight = _graph->node_weight(v); - if (u_weight + v_weight > max_cluster_weight && !_ctx.ignore_weight_limit) { + KASSERT(_graph->contains_global_node(req.theirs + _graph->offset_n(pe))); + req.theirs = _graph->global_to_local_node(req.theirs + _graph->offset_n(pe)); + KASSERT(_graph->is_ghost_node(req.theirs)); + + GlobalNodeID current_partner = _matching[req.mine]; + GlobalNodeID new_partner = current_partner; + do { + const EdgeWeight current_weight = current_partner == kInvalidGlobalNodeID + ? 0 + : static_cast(current_partner >> 32); + if (req.weight <= current_weight) { + break; + } + new_partner = (static_cast(req.weight) << 32) | req.theirs; + } while (__atomic_compare_exchange_n( + &_matching[req.mine], + ¤t_partner, + new_partner, + true, + __ATOMIC_SEQ_CST, + __ATOMIC_SEQ_CST + )); + }); + + // Create response messages + parallel::chunked_for(all_requests, [&](MatchRequest &req, const PEID pe) { + if (req.mine == kInvalidNodeID) { return; } - // Already found a better neighbor? - const EdgeWeight e_weight = _graph->edge_weight(e); - if (e_weight < best_weight) { - return; + const NodeID winner = _matching[req.mine] & 0xFFFF'FFFF; + if (req.theirs != winner) { + // Indicate that the matching failed + req.mine = kInvalidNodeID; } - // Match with v - best_weight = e_weight; - best_neighbor = v; + req.theirs = + static_cast(_graph->local_to_global_node(req.theirs) - _graph->offset_n(pe)); }); - // If we found a good neighbor, try to match with it - if (best_weight > 0) { - const GlobalNodeID neighbor_global = _graph->local_to_global_node(best_neighbor); - GlobalNodeID unmatched = kInvalidGlobalNodeID; - if (__atomic_compare_exchange_n( - &_matching[best_neighbor], - &unmatched, - neighbor_global, - true, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST - )) { - // @todo if we merge small colors, also use CAS to match our own node - // and revert matching of best_neighbor if our CAS failed - __atomic_store_n(&_matching[u], neighbor_global, __ATOMIC_RELAXED); + // Normalize our _matching array + parallel::chunked_for(all_requests, [&](const MatchRequest &req) { + if (req.mine != kInvalidNodeID) { // Due to the previous step, this should + // only happen once per node + _matching[req.mine] = + _graph->local_to_global_node(req.mine); // We become the leader of this cluster } - } - }); -} + }); + + // Exchange response messages + auto all_responses = + mpi::sparse_alltoall_get(all_requests, _graph->communicator()); -void HEMClusterer::resolve_global_conflicts(const ColorID c) { - struct MatchRequest { - NodeID mine; - NodeID theirs; - EdgeWeight weight; - }; - - const NodeID seq_from = _color_sizes[c]; - const NodeID seq_to = _color_sizes[c + 1]; - - // @todo avoid O(m), use same "trick" as below? - auto all_requests = mpi::graph::sparse_alltoall_interface_to_ghost_custom_range_get( - *_graph, - seq_from, - seq_to, - [&](const NodeID seq_u) { return _color_sorted_nodes[seq_u]; }, - [&](const NodeID u, EdgeID, const NodeID v) { - return _matching[u] == _graph->local_to_global_node(v); - }, - [&](const NodeID u, const EdgeID e, const NodeID v, const PEID pe) -> MatchRequest { - const GlobalNodeID v_global = _graph->local_to_global_node(v); - const NodeID their_v = static_cast(v_global - _graph->offset_n(pe)); - return {u, their_v, _graph->edge_weight(e)}; + parallel::chunked_for(all_responses, [&](MatchRequest &rsp) { + std::swap(rsp.mine, rsp.theirs); // Swap roles of theirs and mine + + if (rsp.theirs == kInvalidNodeID) { + // We have to unmatch the ghost node + _matching[rsp.mine] = kInvalidGlobalNodeID; } - ); + }); - parallel::chunked_for(all_requests, [&](MatchRequest &req, PEID) { - std::swap(req.theirs, req.mine); // Swap roles of theirs and mine + // Synchronize matching: + // - nodes that where active during this round + // - their matching partners + // - interface nodes that got matched by nodes on other PEs + struct MatchedMessage { + NodeID node; + GlobalNodeID partner; + }; + + const PEID size = mpi::get_comm_size(_graph->communicator()); + std::vector> sync_msgs(size); + Marker<> marked(size); + + auto add_node = [&](const NodeID u) { + marked.reset(); + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + if (!_graph->is_ghost_node(v)) { + return; + } - if (_matching[req.mine] != kInvalidGlobalNodeID) { - req.mine = kInvalidNodeID; // Reject: local node matched to node + const PEID owner = _graph->ghost_owner(v); + if (!marked.get(owner)) { + sync_msgs[owner].push_back({u, _matching[u]}); + marked.set(owner); + } + }); + }; + + for (const NodeID seq_u : _graph->nodes(seq_from, seq_to)) { + const NodeID u = _color_sorted_nodes[seq_u]; + const GlobalNodeID partner = _matching[u]; + if (partner != kInvalidGlobalNodeID) { + add_node(u); + + if (_graph->is_owned_global_node(partner)) { + const NodeID local_partner = _graph->global_to_local_node(partner); + if (u != local_partner) { + add_node(local_partner); + } + } + } } - }); - parallel::chunked_for(all_requests, [&](MatchRequest &req, const PEID pe) { - if (req.mine == kInvalidNodeID) { - return; + for (const auto &pe_requests : all_requests) { + for (const auto &req : pe_requests) { + if (req.mine != kInvalidNodeID) { + add_node(req.mine); + } + } } - KASSERT(_graph->contains_global_node(req.theirs + _graph->offset_n(pe))); - req.theirs = _graph->global_to_local_node(req.theirs + _graph->offset_n(pe)); - KASSERT(_graph->is_ghost_node(req.theirs)); - - GlobalNodeID current_partner = _matching[req.mine]; - GlobalNodeID new_partner = current_partner; - do { - const EdgeWeight current_weight = current_partner == kInvalidGlobalNodeID - ? 0 - : static_cast(current_partner >> 32); - if (req.weight <= current_weight) { - break; + mpi::sparse_alltoall( + sync_msgs, + [&](const auto &r, const PEID pe) { + tbb::parallel_for(0, r.size(), [&](const std::size_t i) { + const auto [local_node_on_pe, partner] = r[i]; + const auto global_node = + static_cast(_graph->offset_n(pe) + local_node_on_pe); + const NodeID local_node = _graph->global_to_local_node(global_node); + _matching[local_node] = partner; + }); + }, + _graph->communicator() + ); + } + + bool validate_matching() { + for (const NodeID u : _graph->nodes()) { + const GlobalNodeID u_partner = _matching[u]; + + KASSERT(_graph->contains_global_node(u_partner), "invalid matching partner for node " << u); + if (_graph->is_owned_global_node(u_partner)) { + const NodeID local_partner = _graph->global_to_local_node(u_partner); + const GlobalNodeID u_global = _graph->local_to_global_node(u); + KASSERT( + u == local_partner || _matching[local_partner] == u_partner, + "invalid clustering structure for node " + << u << " (global " << u_global << ") matched to node " << local_partner + << ", which is matched to global node " << _matching[local_partner] + ); } - new_partner = (static_cast(req.weight) << 32) | req.theirs; - } while (__atomic_compare_exchange_n( - &_matching[req.mine], - ¤t_partner, - new_partner, - true, - __ATOMIC_SEQ_CST, - __ATOMIC_SEQ_CST - )); - }); - - // Create response messages - parallel::chunked_for(all_requests, [&](MatchRequest &req, const PEID pe) { - if (req.mine == kInvalidNodeID) { - return; } - const NodeID winner = _matching[req.mine] & 0xFFFF'FFFF; - if (req.theirs != winner) { - // Indicate that the matching failed - req.mine = kInvalidNodeID; - } + // Check matched edges between PEs + struct MatchedEdge { + GlobalNodeID u; + GlobalNodeID v; + }; + mpi::graph::sparse_alltoall_interface_to_ghost( + *_graph, + [&](const NodeID u, EdgeID, const NodeID v) -> bool { + return _matching[u] == _graph->local_to_global_node(v); + }, + [&](const NodeID u, EdgeID, NodeID) -> MatchedEdge { + return {_graph->local_to_global_node(u), _matching[u]}; + }, + [&](const auto &r, const PEID pe) { + for (const auto &[u, v] : r) { + KASSERT(_graph->contains_global_node(u)); + KASSERT( + _graph->is_owned_global_node(v), "PE " << pe << " thinks that this PE owns " << v + ); + const NodeID local_u = _graph->global_to_local_node(u); + const NodeID local_v = _graph->global_to_local_node(v); + + KASSERT( + _matching[local_v] == v, + "invalid clustering structure for edge " + << u << " <-> " << v << " (local " << local_u << " <-> " << local_v + << "): expected " << v << " to be the leader, but " << v << " is in cluster " + << _matching[local_v] + ); + } + } + ); - req.theirs = - static_cast(_graph->local_to_global_node(req.theirs) - _graph->offset_n(pe)); - }); + return true; + } - // Normalize our _matching array - parallel::chunked_for(all_requests, [&](const MatchRequest &req) { - if (req.mine != kInvalidNodeID) { // Due to the previous step, this should - // only happen once per node - _matching[req.mine] = - _graph->local_to_global_node(req.mine); // We become the leader of this cluster - } - }); + const Context &_input_ctx; + const HEMCoarseningContext &_ctx; - // Exchange response messages - auto all_responses = mpi::sparse_alltoall_get(all_requests, _graph->communicator()); + const Graph *_graph; - parallel::chunked_for(all_responses, [&](MatchRequest &rsp) { - std::swap(rsp.mine, rsp.theirs); // Swap roles of theirs and mine + NoinitVector _color_blacklist; + NoinitVector _color_sizes; + NoinitVector _color_sorted_nodes; - if (rsp.theirs == kInvalidNodeID) { - // We have to unmatch the ghost node - _matching[rsp.mine] = kInvalidGlobalNodeID; - } - }); - - // Synchronize matching: - // - nodes that where active during this round - // - their matching partners - // - interface nodes that got matched by nodes on other PEs - struct MatchedMessage { - NodeID node; - GlobalNodeID partner; - }; - - const PEID size = mpi::get_comm_size(_graph->communicator()); - std::vector> sync_msgs(size); - Marker<> marked(size); - - auto add_node = [&](const NodeID u) { - marked.reset(); - _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { - if (!_graph->is_ghost_node(v)) { - return; - } + GlobalNodeWeight _max_cluster_weight = 0; + StaticArray _matching; +}; - const PEID owner = _graph->ghost_owner(v); - if (!marked.get(owner)) { - sync_msgs[owner].push_back({u, _matching[u]}); - marked.set(owner); - } - }); - }; - - for (const NodeID seq_u : _graph->nodes(seq_from, seq_to)) { - const NodeID u = _color_sorted_nodes[seq_u]; - const GlobalNodeID partner = _matching[u]; - if (partner != kInvalidGlobalNodeID) { - add_node(u); - - if (_graph->is_owned_global_node(partner)) { - const NodeID local_partner = _graph->global_to_local_node(partner); - if (u != local_partner) { - add_node(local_partner); - } - } - } +// +// Private interface +// + +class HEMClustererImplWrapper { +public: + HEMClustererImplWrapper(const Context &ctx) + : _csr_impl(std::make_unique>(ctx)), + _compressed_impl(std::make_unique>(ctx)) {} + + void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { + _csr_impl->set_max_cluster_weight(max_cluster_weight); + _compressed_impl->set_max_cluster_weight(max_cluster_weight); } - for (const auto &pe_requests : all_requests) { - for (const auto &req : pe_requests) { - if (req.mine != kInvalidNodeID) { - add_node(req.mine); - } - } + void cluster(StaticArray &matching, const DistributedGraph &graph) { + graph.reified( + [&](const DistributedCSRGraph &csr_graph) { _csr_impl->cluster(matching, csr_graph); }, + [&](const DistributedCompressedGraph &compressed_graph) { + _compressed_impl->cluster(matching, compressed_graph); + } + ); } - mpi::sparse_alltoall( - sync_msgs, - [&](const auto &r, const PEID pe) { - tbb::parallel_for(0, r.size(), [&](const std::size_t i) { - const auto [local_node_on_pe, partner] = r[i]; - const auto global_node = - static_cast(_graph->offset_n(pe) + local_node_on_pe); - const NodeID local_node = _graph->global_to_local_node(global_node); - _matching[local_node] = partner; - }); - }, - _graph->communicator() - ); +private: + std::unique_ptr> _csr_impl; + std::unique_ptr> _compressed_impl; +}; + +// +// Public interface +// + +HEMClusterer::HEMClusterer(const Context &ctx) + : _impl_wrapper(std::make_unique(ctx)) {} + +HEMClusterer::~HEMClusterer() = default; + +void HEMClusterer::set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { + _impl_wrapper->set_max_cluster_weight(max_cluster_weight); } + +void HEMClusterer::cluster(StaticArray &matching, const DistributedGraph &graph) { + _impl_wrapper->cluster(matching, graph); +} + } // namespace kaminpar::dist diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.h b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.h index 5345e884..581dcd79 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.h +++ b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.h @@ -7,7 +7,6 @@ ******************************************************************************/ #pragma once -#include "kaminpar-dist/algorithms/greedy_node_coloring.h" #include "kaminpar-dist/coarsening/clusterer.h" #include "kaminpar-dist/context.h" #include "kaminpar-dist/dkaminpar.h" @@ -15,7 +14,7 @@ namespace kaminpar::dist { class HEMClusterer : public Clusterer { public: - HEMClusterer(const Context &ctx); + explicit HEMClusterer(const Context &ctx); HEMClusterer(const HEMClusterer &) = delete; HEMClusterer &operator=(const HEMClusterer &) = delete; @@ -23,28 +22,13 @@ class HEMClusterer : public Clusterer { HEMClusterer(HEMClusterer &&) noexcept = default; HEMClusterer &operator=(HEMClusterer &&) = delete; + ~HEMClusterer() override; + void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) final; void cluster(StaticArray &matching, const DistributedGraph &graph) final; private: - void initialize_coloring(); - - void compute_local_matching(ColorID c, GlobalNodeWeight max_cluster_weight); - void resolve_global_conflicts(ColorID c); - - bool validate_matching(); - - const Context &_input_ctx; - const HEMCoarseningContext &_ctx; - - const DistributedGraph *_graph; - - NoinitVector _color_blacklist; - NoinitVector _color_sizes; - NoinitVector _color_sorted_nodes; - - GlobalNodeWeight _max_cluster_weight = 0; - StaticArray _matching; + std::unique_ptr _impl_wrapper; }; } // namespace kaminpar::dist diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.cc index 50fffda8..48e5fe25 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/hem/hem_lp_clusterer.cc @@ -48,13 +48,15 @@ GlobalNodeID HEMLPClusterer::compute_size_after_matching_contraction(const StaticArray &clustering ) { tbb::enumerable_thread_specific num_matched_edges_ets; - _graph->pfor_nodes([&](const NodeID u) { - if (clustering[u] != _graph->local_to_global_node(u)) { - ++num_matched_edges_ets.local(); - } + _graph->reified([&](const auto &graph) { + graph.pfor_nodes([&](const NodeID u) { + if (clustering[u] != graph.local_to_global_node(u)) { + ++num_matched_edges_ets.local(); + } + }); }); - const NodeID num_matched_edges = num_matched_edges_ets.combine(std::plus{}); + const NodeID num_matched_edges = num_matched_edges_ets.combine(std::plus{}); const GlobalNodeID num_matched_edges_globally = mpi::allreduce(num_matched_edges, MPI_SUM, _graph->communicator()); diff --git a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc index fdd01350..ea31a82f 100644 --- a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc @@ -34,15 +34,23 @@ struct GlobalLPClusteringConfig : public LabelPropagationConfig { }; } // namespace -class GlobalLPClusteringImpl final - : public ChunkRandomdLabelPropagation, - public NonatomicClusterVectorRef { +template +class GlobalLPClusteringImpl final : public ChunkRandomdLabelPropagation< + GlobalLPClusteringImpl, + GlobalLPClusteringConfig, + Graph>, + public NonatomicClusterVectorRef { SET_DEBUG(false); - using Base = ChunkRandomdLabelPropagation; + using Base = + ChunkRandomdLabelPropagation, GlobalLPClusteringConfig, Graph>; using ClusterBase = NonatomicClusterVectorRef; using WeightDeltaMap = growt::GlobalNodeIDMap; + using Config = GlobalLPClusteringConfig; + using ClusterID = Config::ClusterID; + using ClusterWeight = Config::ClusterWeight; + public: explicit GlobalLPClusteringImpl(const Context &ctx) : _ctx(ctx), @@ -52,11 +60,11 @@ class GlobalLPClusteringImpl final _local_cluster_weights(ctx.partition.graph->n), _passive_high_degree_threshold(_c_ctx.global_lp.passive_high_degree_threshold) { set_max_num_iterations(_c_ctx.global_lp.num_iterations); - set_max_degree(_c_ctx.global_lp.active_high_degree_threshold); - set_max_num_neighbors(_c_ctx.global_lp.max_num_neighbors); + Base::set_max_degree(_c_ctx.global_lp.active_high_degree_threshold); + Base::set_max_num_neighbors(_c_ctx.global_lp.max_num_neighbors); } - void initialize(const DistributedGraph &graph) { + void initialize(const Graph &graph) { TIMER_BARRIER(graph.communicator()); SCOPED_TIMER("Label propagation"); @@ -90,7 +98,7 @@ class GlobalLPClusteringImpl final _max_cluster_weight = weight; } - void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) { + void compute_clustering(StaticArray &clustering, const Graph &graph) { TIMER_BARRIER(graph.communicator()); SCOPED_TIMER("Label propagation"); @@ -301,7 +309,7 @@ class GlobalLPClusteringImpl final GlobalNodeID process_chunk(const NodeID from, const NodeID to) { TIMER_BARRIER(_graph->communicator()); START_TIMER("Chunk iteration"); - const NodeID local_num_moved_nodes = perform_iteration(from, to); + const NodeID local_num_moved_nodes = Base::perform_iteration(from, to); STOP_TIMER(); const GlobalNodeID global_num_moved_nodes = @@ -320,7 +328,7 @@ class GlobalLPClusteringImpl final return global_num_moved_nodes; } - void allocate(const DistributedGraph &graph) { + void allocate(const Graph &graph) { const NodeID allocated_num_active_nodes = _changed_label.size(); if (allocated_num_active_nodes < graph.n()) { @@ -644,12 +652,40 @@ class GlobalLPClusteringImpl final }}; }; +class GlobalLPClusteringImplWrapper { +public: + GlobalLPClusteringImplWrapper(const Context &ctx) + : _csr_impl(std::make_unique>(ctx)), + _compressed_impl(std::make_unique>(ctx) + ) {} + + void set_max_cluster_weight(const GlobalNodeWeight weight) { + _csr_impl->set_max_cluster_weight(weight); + _compressed_impl->set_max_cluster_weight(weight); + } + + void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) { + graph.reified( + [&](const DistributedCSRGraph &csr_graph) { + _csr_impl->compute_clustering(clustering, csr_graph); + }, + [&](const DistributedCompressedGraph &compressed_graph) { + _compressed_impl->compute_clustering(clustering, compressed_graph); + } + ); + } + +private: + std::unique_ptr> _csr_impl; + std::unique_ptr> _compressed_impl; +}; + // // Public interface // GlobalLPClusterer::GlobalLPClusterer(const Context &ctx) - : _impl(std::make_unique(ctx)) {} + : _impl(std::make_unique(ctx)) {} GlobalLPClusterer::~GlobalLPClusterer() = default; diff --git a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.h b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.h index e02aa7b5..d8d7eb85 100644 --- a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.h +++ b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.h @@ -29,6 +29,6 @@ class GlobalLPClusterer : public Clusterer { void cluster(StaticArray &clustering, const DistributedGraph &graph) final; private: - std::unique_ptr _impl; + std::unique_ptr _impl; }; } // namespace kaminpar::dist diff --git a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc index e8ab2095..aca7beb4 100644 --- a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc @@ -19,25 +19,32 @@ struct LocalLPClusteringConfig : public LabelPropagationConfig { static constexpr bool kUseTwoHopClustering = true; }; -class LocalLPClusteringImpl final - : public ChunkRandomdLabelPropagation, - public NonatomicClusterVectorRef, - public OwnedRelaxedClusterWeightVector { +template +class LocalLPClusteringImpl final : public ChunkRandomdLabelPropagation< + LocalLPClusteringImpl, + LocalLPClusteringConfig, + Graph>, + public NonatomicClusterVectorRef, + public OwnedRelaxedClusterWeightVector { SET_DEBUG(false); - using Base = ChunkRandomdLabelPropagation; + using Base = + ChunkRandomdLabelPropagation, LocalLPClusteringConfig, Graph>; using ClusterBase = NonatomicClusterVectorRef; using ClusterWeightBase = OwnedRelaxedClusterWeightVector; + using Config = LocalLPClusteringConfig; + using ClusterID = Config::ClusterID; + public: LocalLPClusteringImpl(const NodeID max_n, const CoarseningContext &c_ctx) : _ignore_ghost_nodes(c_ctx.local_lp.ignore_ghost_nodes), _keep_ghost_clusters(c_ctx.local_lp.keep_ghost_clusters) { - allocate_cluster_weights(max_n); - allocate(max_n, max_n); set_max_num_iterations(c_ctx.local_lp.num_iterations); - set_max_degree(c_ctx.local_lp.active_high_degree_threshold); - set_max_num_neighbors(c_ctx.local_lp.max_num_neighbors); + Base::set_max_degree(c_ctx.local_lp.active_high_degree_threshold); + Base::set_max_num_neighbors(c_ctx.local_lp.max_num_neighbors); + Base::allocate(max_n, max_n); + ClusterWeightBase::allocate_cluster_weights(max_n); } void initialize(const DistributedGraph &graph) { @@ -64,7 +71,7 @@ class LocalLPClusteringImpl final std::size_t iteration; for (iteration = 0; iteration < _max_num_iterations; ++iteration) { - if (perform_iteration() == 0) { + if (Base::perform_iteration() == 0) { break; } } @@ -149,12 +156,42 @@ class LocalLPClusteringImpl final const BlockID *_partition = nullptr; }; +class LocalLPClusteringImplWrapper { +public: + LocalLPClusteringImplWrapper(const NodeID max_n, const CoarseningContext &c_ctx) + : _csr_impl(std::make_unique>(max_n, c_ctx)), + _compressed_impl( + std::make_unique>(max_n, c_ctx) + ) {} + + void set_communities(const StaticArray &communities) { + _csr_impl->_partition = communities.data(); + _compressed_impl->_partition = communities.data(); + } + + void clear_communities() { + _csr_impl->_partition = nullptr; + _compressed_impl->_partition = nullptr; + } + + void set_max_cluster_weight(const GlobalNodeWeight weight) { + _csr_impl->set_max_cluster_weight(weight); + _compressed_impl->set_max_cluster_weight(weight); + } + + void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) {} + +private: + std::unique_ptr> _csr_impl; + std::unique_ptr> _compressed_impl; +}; + // // Interface // LocalLPClusterer::LocalLPClusterer(const Context &ctx) - : _impl(std::make_unique( + : _impl(std::make_unique( ctx.coarsening.local_lp.ignore_ghost_nodes ? ctx.partition.graph->n : ctx.partition.graph->total_n, ctx.coarsening @@ -163,11 +200,11 @@ LocalLPClusterer::LocalLPClusterer(const Context &ctx) LocalLPClusterer::~LocalLPClusterer() = default; void LocalLPClusterer::set_communities(const StaticArray &communities) { - _impl->_partition = communities.data(); + _impl->set_communities(communities); } void LocalLPClusterer::clear_communities() { - _impl->_partition = nullptr; + _impl->clear_communities(); } void LocalLPClusterer::set_max_cluster_weight(GlobalNodeWeight weight) { diff --git a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.h b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.h index f19baba0..128df553 100644 --- a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.h +++ b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.h @@ -33,6 +33,6 @@ class LocalLPClusterer : public Clusterer { void cluster(StaticArray &clustering, const DistributedGraph &graph) final; private: - std::unique_ptr _impl; + std::unique_ptr _impl; }; } // namespace kaminpar::dist diff --git a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc index c61a8b57..dd98b99f 100644 --- a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc @@ -38,20 +38,6 @@ SET_STATISTICS_FROM_GLOBAL(); SET_DEBUG(false); } // namespace -std::unique_ptr contract_clustering( - const DistributedGraph &graph, - StaticArray &clustering, - const CoarseningContext &c_ctx -) { - return contract_clustering( - graph, - clustering, - c_ctx.max_cnode_imbalance, - c_ctx.migrate_cnode_prefix, - c_ctx.force_perfect_cnode_balance - ); -} - namespace { // Stores technical mappings necessary to project a partition of the coarse graph to the fine graph. // Part of the contraction result and should not be used outside the `project_partition()` function. @@ -100,11 +86,13 @@ class GlobalCoarseGraphImpl : public CoarseGraph { ); TIMED_SCOPE("Exchange migrated node blocks") { - tbb::parallel_for(0, migrated_nodes_sendbuf.size(), [&](const std::size_t i) { - const NodeID lcnode = _migration.nodes[i]; - const BlockID block = c_partition[lcnode]; - const GlobalNodeID gcnode = _c_graph.local_to_global_node(lcnode); - migrated_nodes_sendbuf[i] = {.gcnode = gcnode, .block = block}; + _c_graph.reified([&](const auto &graph) { + tbb::parallel_for(0, migrated_nodes_sendbuf.size(), [&](const std::size_t i) { + const NodeID lcnode = _migration.nodes[i]; + const BlockID block = c_partition[lcnode]; + const GlobalNodeID gcnode = graph.local_to_global_node(lcnode); + migrated_nodes_sendbuf[i] = {.gcnode = gcnode, .block = block}; + }); }); MPI_Alltoallv( @@ -135,20 +123,22 @@ class GlobalCoarseGraphImpl : public CoarseGraph { } ); - _f_graph.pfor_nodes_range([&](const auto &r) { - auto &gcnode_to_block_handle = gcnode_to_block_handle_ets.local(); + _c_graph.reified([&](const auto &graph) { + _f_graph.pfor_nodes_range([&](const auto &r) { + auto &gcnode_to_block_handle = gcnode_to_block_handle_ets.local(); - for (NodeID u = r.begin(); u != r.end(); ++u) { - const GlobalNodeID gcnode = _mapping[u]; - if (_c_graph.is_owned_global_node(gcnode)) { - const NodeID lcnode = _c_graph.global_to_local_node(gcnode); - f_partition[u] = c_partition[lcnode]; - } else { - auto it = gcnode_to_block_handle.find(gcnode + 1); - KASSERT(it != gcnode_to_block_handle.end(), V(gcnode)); - f_partition[u] = (*it).second; + for (NodeID u = r.begin(); u != r.end(); ++u) { + const GlobalNodeID gcnode = _mapping[u]; + if (graph.is_owned_global_node(gcnode)) { + const NodeID lcnode = graph.global_to_local_node(gcnode); + f_partition[u] = c_partition[lcnode]; + } else { + auto it = gcnode_to_block_handle.find(gcnode + 1); + KASSERT(it != gcnode_to_block_handle.end(), V(gcnode)); + f_partition[u] = (*it).second; + } } - } + }); }); }; @@ -157,20 +147,22 @@ class GlobalCoarseGraphImpl : public CoarseGraph { BlockID block; }; - mpi::graph::sparse_alltoall_interface_to_pe( - _f_graph, - [&](const NodeID lnode) -> GhostNodeLabel { - return {lnode, f_partition[lnode]}; - }, - [&](const auto buffer, const PEID pe) { - tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { - const auto &[sender_lnode, block] = buffer[i]; - const GlobalNodeID gnode = _f_graph.offset_n(pe) + sender_lnode; - const NodeID lnode = _f_graph.global_to_local_node(gnode); - f_partition[lnode] = block; - }); - } - ); + _f_graph.reified([&](const auto &graph) { + mpi::graph::sparse_alltoall_interface_to_pe( + graph, + [&](const NodeID lnode) -> GhostNodeLabel { + return {lnode, f_partition[lnode]}; + }, + [&](const auto buffer, const PEID pe) { + tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { + const auto &[sender_lnode, block] = buffer[i]; + const GlobalNodeID gnode = graph.offset_n(pe) + sender_lnode; + const NodeID lnode = graph.global_to_local_node(gnode); + f_partition[lnode] = block; + }); + } + ); + }); } private: @@ -218,9 +210,9 @@ struct MigratedNodesMapping { StaticArray their_req_to_lcnode; }; -StaticArray find_nonlocal_nodes( - const DistributedGraph &graph, const StaticArray &lnode_to_gcluster -) { +template +StaticArray +find_nonlocal_nodes(const Graph &graph, const StaticArray &lnode_to_gcluster) { SCOPED_TIMER("Collect nonlocal nodes"); StaticArray node_position_buffer(graph.n() + 1); @@ -245,9 +237,9 @@ StaticArray find_nonlocal_nodes( return nonlocal_nodes; } -StaticArray find_nonlocal_edges( - const DistributedGraph &graph, const StaticArray &lnode_to_gcluster -) { +template +StaticArray +find_nonlocal_edges(const Graph &graph, const StaticArray &lnode_to_gcluster) { SCOPED_TIMER("Collect nonlocal edges"); StaticArray edge_position_buffer(graph.n() + 1); @@ -349,7 +341,7 @@ void sort_node_list(StaticArray &nodes) { }); } -void update_ghost_node_weights(DistributedGraph &graph) { +template void update_ghost_node_weights(Graph &graph) { SCOPED_TIMER("Update ghost node weights"); struct Message { @@ -401,8 +393,9 @@ template double compute_distribution_imbalance(const StaticArray return 1.0 * max / (1.0 * distribution.back() / (distribution.size() - 1)); } +template StaticArray build_lcluster_to_lcnode_mapping( - const DistributedGraph &graph, + const Graph &graph, const StaticArray &lnode_to_gcluster, const StaticArray &local_nodes ) { @@ -450,8 +443,9 @@ void localize_global_edge_list( }); } +template std::pair, StaticArray> build_node_buckets( - const DistributedGraph &graph, + const Graph &graph, const StaticArray &lcluster_to_lcnode, const GlobalNodeID c_n, const StaticArray &local_edges, @@ -559,8 +553,9 @@ MigrationResult migrate_elements( .rdispls = std::move(rdispls)}; } +template MigrationResult -migrate_nodes(const DistributedGraph &graph, const StaticArray &nonlocal_nodes) { +migrate_nodes(const Graph &graph, const StaticArray &nonlocal_nodes) { SCOPED_TIMER("Exchange nonlocal nodes"); const PEID size = mpi::get_comm_size(graph.communicator()); @@ -579,8 +574,9 @@ migrate_nodes(const DistributedGraph &graph, const StaticArray &nonl return migrate_elements(num_nodes_for_pe, nonlocal_nodes, graph.communicator()); } +template MigrationResult -migrate_edges(const DistributedGraph &graph, const StaticArray &nonlocal_edges) { +migrate_edges(const Graph &graph, const StaticArray &nonlocal_edges) { SCOPED_TIMER("Exchange nonlocal edges"); const PEID size = mpi::get_comm_size(graph.communicator()); @@ -614,8 +610,9 @@ migrate_edges(const DistributedGraph &graph, const StaticArray &nonl return migrate_elements(num_edges_for_pe, nonlocal_edges, graph.communicator()); } +template MigratedNodesMapping exchange_migrated_nodes_mapping( - const DistributedGraph &graph, + const Graph &graph, const StaticArray &nonlocal_nodes, const MigrationResult &local_nodes, const StaticArray &lcluster_to_lcnode, @@ -861,8 +858,9 @@ AssignmentShifts compute_assignment_shifts( }; } +template void rebalance_cluster_placement( - const DistributedGraph &graph, + const Graph &graph, const StaticArray ¤t_cnode_distribution, const StaticArray &lcluster_to_lcnode, const StaticArray &nonlocal_gcluster_to_gcnode, @@ -1010,12 +1008,14 @@ bool validate_clustering( } } // namespace debug +template std::unique_ptr contract_clustering( - const DistributedGraph &graph, + const DistributedGraph &fine_graph, + const Graph &graph, StaticArray &lnode_to_gcluster, - const double max_cnode_imbalance, - const bool migrate_cnode_prefix, - const bool force_perfect_cnode_balance + const double max_cnode_imbalance = std::numeric_limits::max(), + const bool migrate_cnode_prefix = false, + const bool force_perfect_cnode_balance = true ) { TIMER_BARRIER(graph.communicator()); START_TIMER("Contract clustering"); @@ -1101,7 +1101,7 @@ std::unique_ptr contract_clustering( // max_imbalance (this is because the subgraph of a PE cannot grow in size during coarsening). // Thus, we accept any imbalance for the "rebalanced try" to avoid an infinite loop. // @todo can this actually happen? - return contract_clustering(graph, lnode_to_gcluster); + return contract_clustering(fine_graph, graph, lnode_to_gcluster); } auto nonlocal_edges = find_nonlocal_edges(graph, lnode_to_gcluster); @@ -1476,7 +1476,7 @@ std::unique_ptr contract_clustering( } }); - DistributedGraph c_graph(std::make_unique( + DistributedCSRGraph coarse_csr_graph( std::move(c_node_distribution), std::move(c_edge_distribution), std::move(c_nodes), @@ -1488,16 +1488,16 @@ std::unique_ptr contract_clustering( std::move(c_global_to_ghost), false, graph.communicator() - )); + ); STOP_TIMER(); - update_ghost_node_weights(c_graph); + update_ghost_node_weights(coarse_csr_graph); STOP_TIMER(); // Contract clustering timer return std::make_unique( - graph, - std::move(c_graph), + fine_graph, + DistributedGraph(std::make_unique(std::move(coarse_csr_graph))), std::move(lnode_to_gcnode), MigratedNodes{ .nodes = std::move(their_req_to_lcnode), @@ -1508,4 +1508,50 @@ std::unique_ptr contract_clustering( } ); } + +std::unique_ptr contract_clustering( + const DistributedGraph &graph, + StaticArray &clustering, + const CoarseningContext &c_ctx +) { + return contract_clustering( + graph, + clustering, + c_ctx.max_cnode_imbalance, + c_ctx.migrate_cnode_prefix, + c_ctx.force_perfect_cnode_balance + ); +} + +std::unique_ptr contract_clustering( + const DistributedGraph &graph, + StaticArray &clustering, + double max_cnode_imbalance, + bool migrate_cnode_prefix, + bool force_perfect_cnode_balance +) { + return graph.reified( + [&](const DistributedCSRGraph &csr_graph) { + return contract_clustering( + graph, + csr_graph, + clustering, + max_cnode_imbalance, + migrate_cnode_prefix, + force_perfect_cnode_balance + ); + }, + [&](const DistributedCompressedGraph &compressed_graph) { + return contract_clustering( + graph, + compressed_graph, + clustering, + max_cnode_imbalance, + migrate_cnode_prefix, + force_perfect_cnode_balance + ); + } + ); +} + } // namespace kaminpar::dist diff --git a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc index aaefcf38..4fe53a28 100644 --- a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc @@ -63,8 +63,10 @@ class LocalCoarseGraphImpl : public CoarseGraph { }; } // namespace -std::unique_ptr -contract_local_clustering(const DistributedGraph &graph, const StaticArray &clustering) { +template +std::unique_ptr contract_local_clustering( + const DistributedGraph &fine_graph, const Graph &graph, const StaticArray &clustering +) { KASSERT( clustering.size() >= graph.n(), "clustering array is too small for the given graph", @@ -284,6 +286,19 @@ contract_local_clustering(const DistributedGraph &graph, const StaticArray(graph, std::move(c_graph), std::move(mapping)); + return std::make_unique(fine_graph, std::move(c_graph), std::move(mapping)); +} + +std::unique_ptr +contract_local_clustering(const DistributedGraph &graph, const StaticArray &clustering) { + return graph.reified( + [&](const DistributedCSRGraph &csr_graph) { + return contract_local_clustering(graph, csr_graph, clustering); + }, + [&](const DistributedCompressedGraph &compressed_graph) { + return contract_local_clustering(graph, compressed_graph, clustering); + } + ); } + } // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index 5d2ccba1..75d18a2a 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -33,7 +33,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { using AbstractDistributedGraph::NodeID; using AbstractDistributedGraph::NodeWeight; - using CompressedEdges = CompressedEdges; + using CompressedEdges = kaminpar::CompressedEdges; DistributedCompressedGraph( StaticArray node_distribution, diff --git a/kaminpar-dist/datastructures/distributed_graph.h b/kaminpar-dist/datastructures/distributed_graph.h index 1bb1655b..60006d96 100644 --- a/kaminpar-dist/datastructures/distributed_graph.h +++ b/kaminpar-dist/datastructures/distributed_graph.h @@ -426,22 +426,27 @@ class DistributedGraph : public AbstractDistributedGraph { return _underlying_graph->get_color_sizes(); } -private: - std::unique_ptr _underlying_graph; - - template decltype(auto) reified(Lambda &&l) const { + template + decltype(auto) reified(Lambda1 &&l1, Lambda2 &&l2) const { const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); if (const auto *graph = dynamic_cast(abstract_graph); graph != nullptr) { - return l(*graph); + return l1(*graph); } else if (const auto *graph = dynamic_cast(abstract_graph); graph != nullptr) { - return l(*graph); + return l2(*graph); } __builtin_unreachable(); } + + template decltype(auto) reified(Lambda &&l) const { + return reified(std::forward(l), std::forward(l)); + } + +private: + std::unique_ptr _underlying_graph; }; /** diff --git a/kaminpar-dist/datastructures/distributed_partitioned_graph.h b/kaminpar-dist/datastructures/distributed_partitioned_graph.h index 1764838d..12e518c4 100644 --- a/kaminpar-dist/datastructures/distributed_partitioned_graph.h +++ b/kaminpar-dist/datastructures/distributed_partitioned_graph.h @@ -134,6 +134,7 @@ class DistributedPartitionedGraph { [[nodiscard]] inline MPI_Comm communicator() const { return _graph->communicator(); } [[nodiscard]] inline bool permuted() const { return _graph->permuted(); } [[nodiscard]] inline NodeID map_original_node(const NodeID u) const { return _graph->map_original_node(u); } + template decltype(auto) reified(Lambda1 &&l1, Lambda2 &&l2) const { return _graph->reified(std::forward(l1), std::forward(l2)); } // clang-format on [[nodiscard]] BlockID k() const { diff --git a/kaminpar-dist/distributed_label_propagation.h b/kaminpar-dist/distributed_label_propagation.h index 31d3ff98..9d233410 100644 --- a/kaminpar-dist/distributed_label_propagation.h +++ b/kaminpar-dist/distributed_label_propagation.h @@ -27,8 +27,6 @@ namespace kaminpar::dist { struct LabelPropagationConfig { - using Graph = DistributedGraph; - // Data structure used to accumulate edge weights for gain value calculation using RatingMap = ::kaminpar::RatingMap; @@ -68,7 +66,7 @@ struct LabelPropagationConfig { * @tparam Derived Derived class for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template class LabelPropagation { +template class LabelPropagation { static_assert(std::is_base_of_v); SET_DEBUG(false); @@ -76,7 +74,6 @@ template class LabelPropagation { protected: using RatingMap = typename Config::RatingMap; - using Graph = typename Config::Graph; using NodeID = typename Graph::NodeID; using NodeWeight = typename Graph::NodeWeight; using EdgeID = typename Graph::EdgeID; @@ -849,15 +846,14 @@ template class LabelPropagation { * @tparam Derived Derived subclass for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template -class InOrderLabelPropagation : public LabelPropagation { +template +class InOrderLabelPropagation : public LabelPropagation { static_assert(std::is_base_of_v); SET_DEBUG(true); protected: - using Base = LabelPropagation; + using Base = LabelPropagation; - using Graph = typename Base::Graph; using ClusterID = typename Base::ClusterID; using ClusterWeight = typename Base::ClusterWeight; using EdgeID = typename Base::EdgeID; @@ -933,15 +929,14 @@ class InOrderLabelPropagation : public LabelPropagation { * @tparam Derived Derived subclass for static polymorphism. * @tparam Config Algorithmic configuration and data types. */ -template -class ChunkRandomdLabelPropagation : public LabelPropagation { - using Base = LabelPropagation; +template +class ChunkRandomdLabelPropagation : public LabelPropagation { + using Base = LabelPropagation; static_assert(std::is_base_of_v); SET_DEBUG(false); protected: - using Graph = typename Base::Graph; using ClusterID = typename Base::ClusterID; using ClusterWeight = typename Base::ClusterWeight; using EdgeID = typename Base::EdgeID; diff --git a/kaminpar-dist/refinement/balancer/node_balancer.cc b/kaminpar-dist/refinement/balancer/node_balancer.cc index cff206c4..94c83222 100644 --- a/kaminpar-dist/refinement/balancer/node_balancer.cc +++ b/kaminpar-dist/refinement/balancer/node_balancer.cc @@ -14,8 +14,12 @@ #include "kaminpar-dist/logger.h" #include "kaminpar-dist/metrics.h" #include "kaminpar-dist/refinement/balancer/reductions.h" +#include "kaminpar-dist/refinement/balancer/weight_buckets.h" +#include "kaminpar-dist/refinement/gain_calculator.h" #include "kaminpar-dist/timer.h" +#include "kaminpar-common/datastructures/binary_heap.h" +#include "kaminpar-common/datastructures/marker.h" #include "kaminpar-common/random.h" #define HEAVY assert::heavy @@ -26,696 +30,755 @@ SET_STATISTICS_FROM_GLOBAL(); SET_DEBUG(false); } // namespace -NodeBalancerFactory::NodeBalancerFactory(const Context &ctx) : _ctx(ctx) {} - -std::unique_ptr -NodeBalancerFactory::create(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { - return std::make_unique(_ctx, p_graph, p_ctx); -} - -NodeBalancer::NodeBalancer( - const Context &ctx, DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx -) - : _p_graph(p_graph), - _ctx(ctx), - _nb_ctx(ctx.refinement.node_balancer), - _p_ctx(p_ctx), - _pq(p_graph.n(), p_graph.k()), - _pq_weight(p_graph.k()), - _marker(p_graph.n()), - _buckets( - p_graph, p_ctx, _nb_ctx.par_enable_positive_gain_buckets, _nb_ctx.par_gain_bucket_base - ), - _cached_cutoff_buckets(_p_graph.k()), - _gain_calculator(_p_ctx.k), - _target_blocks(_p_graph.n()), - _tmp_gains(!_nb_ctx.par_update_pq_gains * _p_graph.n()) { - _gain_calculator.init(_p_graph); -} +// +// Implementation +// + +template class NodeBalancer : public GlobalRefiner { + struct Candidate { + GlobalNodeID id; + BlockID from; + BlockID to; + NodeWeight weight; + double gain; + }; + +public: + NodeBalancer( + const Context &ctx, + DistributedPartitionedGraph &p_graph, + const Graph &graph, + const PartitionContext &p_ctx + ) + : _p_graph(p_graph), + _graph(graph), + _ctx(ctx), + _nb_ctx(ctx.refinement.node_balancer), + _p_ctx(p_ctx), + _pq(_graph.n(), p_graph.k()), + _pq_weight(p_graph.k()), + _marker(p_graph.n()), + _buckets( + p_graph, p_ctx, _nb_ctx.par_enable_positive_gain_buckets, _nb_ctx.par_gain_bucket_base + ), + _cached_cutoff_buckets(_p_graph.k()), + _gain_calculator(_p_ctx.k), + _target_blocks(_graph.n()), + _tmp_gains(!_nb_ctx.par_update_pq_gains * _graph.n()) { + _gain_calculator.init(_p_graph, _graph); + } -void NodeBalancer::initialize() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Node balancer"); + NodeBalancer(const NodeBalancer &) = delete; + NodeBalancer &operator=(const NodeBalancer &) = delete; - START_TIMER("Initialization"); - reinit(); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); -} + NodeBalancer(NodeBalancer &&) noexcept = default; + NodeBalancer &operator=(NodeBalancer &&) = delete; -void NodeBalancer::reinit() { - // debug::print_local_graph_stats(_p_graph.graph()); + void initialize() final { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Node balancer"); - // Only initialize the balancer is the partition is actually imbalanced - if (metrics::is_feasible(_p_graph, _p_ctx)) { - return; + START_TIMER("Initialization"); + reinit(); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); } - // Allocate _marker memory - _marker.reset(); - if (_marker.capacity() < _p_graph.n()) { - _marker.resize(_p_graph.n()); - } + bool refine() final { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Node balancer"); - // Allocate helper PQs - tbb::enumerable_thread_specific>> local_pq_ets{ - [&] { - return std::vector>(_p_graph.k()); - }}; - tbb::enumerable_thread_specific> local_pq_weight_ets{[&] { - return std::vector(_p_graph.k()); - }}; - - // Build thread-local PQs: one PQ for each thread and block, each PQ for block - // b has at most roughly |overload[b]| weight - tbb::parallel_for(static_cast(0), _p_graph.n(), [&](const NodeID u) { - if (_p_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { - return; + // Only balance the partition if it is infeasible + if (metrics::is_feasible(_p_graph, _p_ctx)) { + return false; } - auto &pq = local_pq_ets.local(); - auto &pq_weight = local_pq_weight_ets.local(); + KASSERT(debug::validate_partition(_p_graph), "invalid partition before balancing", HEAVY); - const BlockID from = _p_graph.block(u); - const BlockWeight overload = block_overload(from); + const PEID size = mpi::get_comm_size(_graph.communicator()); + const PEID rank = mpi::get_comm_rank(_graph.communicator()); - if (overload > 0) { // Node in overloaded block - const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); - const double rel_gain = max_gainer.relative_gain(); - _target_blocks[u] = max_gainer.block; + double previous_imbalance_distance = + is_sequential_balancing_enabled() ? metrics::imbalance_l1(_p_graph, _p_ctx) : 0.0; - const bool need_more_nodes = (pq_weight[from] < overload); - if (need_more_nodes || pq[from].empty() || rel_gain > pq[from].peek_key()) { - if (!need_more_nodes) { - const NodeWeight u_weight = _p_graph.node_weight(u); - const NodeWeight min_weight = _p_graph.node_weight(pq[from].peek_id()); - if (pq_weight[from] + u_weight - min_weight >= overload) { - pq[from].pop(); - } + for (int round = 0; round < _nb_ctx.max_num_rounds; round++) { + TIMER_BARRIER(_graph.communicator()); + DBG0 << "Starting rebalancing round " << round << " of (at most) " << _nb_ctx.max_num_rounds; + + if (metrics::is_feasible(_p_graph, _p_ctx)) { + DBG0 << "Partition is feasible ==> terminating"; + break; + } + + if (is_sequential_balancing_enabled() && !perform_sequential_round()) { + if (!_stalled) { + DBG0 << "Sequential round stalled: switch to stalled mode"; + switch_to_stalled(); + continue; + } else { + DBG0 << "Terminated by sequential round"; + break; } - pq[from].push(u, rel_gain); - _marker.set(u); } - } - }); - // Build global PQ: one PQ per block, block-level parallelism - _pq.clear(); - if (_pq.capacity() < _p_graph.n()) { - _pq = DynamicBinaryMinMaxForest(_p_graph.n(), _ctx.partition.k); - } + if (is_parallel_balancing_enabled()) { + const double current_imbalance_distance = metrics::imbalance_l1(_p_graph, _p_ctx); + const double seq_rebalance_rate = + (previous_imbalance_distance - current_imbalance_distance) / + previous_imbalance_distance; + + DBG0 << "Sequential rebalancing changed imbalance: " << previous_imbalance_distance + << " --> " << current_imbalance_distance << " = by " << seq_rebalance_rate + << "; threshold: " << _ctx.refinement.node_balancer.par_threshold; + + if (seq_rebalance_rate < _nb_ctx.par_threshold || !is_sequential_balancing_enabled()) { + if (!perform_parallel_round(round)) { + DBG0 << "Parallel round stalled: switch to stalled mode"; + switch_to_stalled(); + continue; + } - _p_graph.pfor_blocks([&](const BlockID block) { - _pq_weight[block] = 0; + const double next_imbalance_distance = metrics::imbalance_l1(_p_graph, _p_ctx); + [[maybe_unused]] const double par_rebalance_rate = + (current_imbalance_distance - next_imbalance_distance) / current_imbalance_distance; + DBG0 << "Parallel rebalancing changed imbalance: " << current_imbalance_distance + << " --> " << next_imbalance_distance << " = by " << par_rebalance_rate; - for (auto &pq : local_pq_ets) { - for (const auto &[u, rel_gain] : pq[block].elements()) { - try_pq_insertion(block, u, _p_graph.node_weight(u), rel_gain); - } - } - }); + if (current_imbalance_distance == next_imbalance_distance) { + DBG0 << "Parallel round stalled: switch to stalled mode"; + switch_to_stalled(); + // no continue -> update previous_imbalance_distance + } - _stalled = false; -} + previous_imbalance_distance = next_imbalance_distance; + } else { + previous_imbalance_distance = current_imbalance_distance; + } + } -bool NodeBalancer::refine() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Node balancer"); + KASSERT( + debug::validate_partition(_p_graph), "invalid partition after balancing round", HEAVY + ); + } - // Only balance the partition if it is infeasible - if (metrics::is_feasible(_p_graph, _p_ctx)) { + KASSERT(debug::validate_partition(_p_graph), "invalid partition after balancing", HEAVY); return false; } - KASSERT(debug::validate_partition(_p_graph), "invalid partition before balancing", HEAVY); - - const PEID size = mpi::get_comm_size(_p_graph.communicator()); - const PEID rank = mpi::get_comm_rank(_p_graph.communicator()); - - double previous_imbalance_distance = - is_sequential_balancing_enabled() ? metrics::imbalance_l1(_p_graph, _p_ctx) : 0.0; - - for (int round = 0; round < _nb_ctx.max_num_rounds; round++) { - TIMER_BARRIER(_p_graph.communicator()); - DBG0 << "Starting rebalancing round " << round << " of (at most) " << _nb_ctx.max_num_rounds; +private: + void reinit() { + // debug::print_local_graph_stats(_p_graph.graph()); + // Only initialize the balancer is the partition is actually imbalanced if (metrics::is_feasible(_p_graph, _p_ctx)) { - DBG0 << "Partition is feasible ==> terminating"; - break; + return; } - if (is_sequential_balancing_enabled() && !perform_sequential_round()) { - if (!_stalled) { - DBG0 << "Sequential round stalled: switch to stalled mode"; - switch_to_stalled(); - continue; - } else { - DBG0 << "Terminated by sequential round"; - break; - } + // Allocate _marker memory + _marker.reset(); + if (_marker.capacity() < _graph.n()) { + _marker.resize(_graph.n()); } - if (is_parallel_balancing_enabled()) { - const double current_imbalance_distance = metrics::imbalance_l1(_p_graph, _p_ctx); - const double seq_rebalance_rate = - (previous_imbalance_distance - current_imbalance_distance) / previous_imbalance_distance; + // Allocate helper PQs + tbb::enumerable_thread_specific>> local_pq_ets{ + [&] { + return std::vector>(_p_graph.k()); + }}; + tbb::enumerable_thread_specific> local_pq_weight_ets{[&] { + return std::vector(_p_graph.k()); + }}; + + // Build thread-local PQs: one PQ for each thread and block, each PQ for block + // b has at most roughly |overload[b]| weight + tbb::parallel_for(static_cast(0), _graph.n(), [&](const NodeID u) { + if (_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { + return; + } - DBG0 << "Sequential rebalancing changed imbalance: " << previous_imbalance_distance << " --> " - << current_imbalance_distance << " = by " << seq_rebalance_rate - << "; threshold: " << _ctx.refinement.node_balancer.par_threshold; + auto &pq = local_pq_ets.local(); + auto &pq_weight = local_pq_weight_ets.local(); - if (seq_rebalance_rate < _nb_ctx.par_threshold || !is_sequential_balancing_enabled()) { - if (!perform_parallel_round(round)) { - DBG0 << "Parallel round stalled: switch to stalled mode"; - switch_to_stalled(); - continue; - } + const BlockID from = _p_graph.block(u); + const BlockWeight overload = block_overload(from); - const double next_imbalance_distance = metrics::imbalance_l1(_p_graph, _p_ctx); - [[maybe_unused]] const double par_rebalance_rate = - (current_imbalance_distance - next_imbalance_distance) / current_imbalance_distance; - DBG0 << "Parallel rebalancing changed imbalance: " << current_imbalance_distance << " --> " - << next_imbalance_distance << " = by " << par_rebalance_rate; + if (overload > 0) { // Node in overloaded block + const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); + const double rel_gain = max_gainer.relative_gain(); + _target_blocks[u] = max_gainer.block; - if (current_imbalance_distance == next_imbalance_distance) { - DBG0 << "Parallel round stalled: switch to stalled mode"; - switch_to_stalled(); - // no continue -> update previous_imbalance_distance + const bool need_more_nodes = (pq_weight[from] < overload); + if (need_more_nodes || pq[from].empty() || rel_gain > pq[from].peek_key()) { + if (!need_more_nodes) { + const NodeWeight u_weight = _graph.node_weight(u); + const NodeWeight min_weight = _graph.node_weight(pq[from].peek_id()); + if (pq_weight[from] + u_weight - min_weight >= overload) { + pq[from].pop(); + } + } + pq[from].push(u, rel_gain); + _marker.set(u); } - - previous_imbalance_distance = next_imbalance_distance; - } else { - previous_imbalance_distance = current_imbalance_distance; } - } + }); - KASSERT(debug::validate_partition(_p_graph), "invalid partition after balancing round", HEAVY); - } + // Build global PQ: one PQ per block, block-level parallelism + _pq.clear(); + if (_pq.capacity() < _graph.n()) { + _pq = DynamicBinaryMinMaxForest(_graph.n(), _ctx.partition.k); + } - KASSERT(debug::validate_partition(_p_graph), "invalid partition after balancing", HEAVY); - return false; -} + _p_graph.pfor_blocks([&](const BlockID block) { + _pq_weight[block] = 0; -void NodeBalancer::switch_to_stalled() { - TIMER_BARRIER(_p_graph.communicator()); + for (auto &pq : local_pq_ets) { + for (const auto &[u, rel_gain] : pq[block].elements()) { + try_pq_insertion(block, u, _graph.node_weight(u), rel_gain); + } + } + }); - _stalled = true; + _stalled = false; + } - // Reinit the balancer to fix blocks that were not overloaded in the beginning, but are - // overloaded now due to imbalanced parallel moves - START_TIMER("Reinitialize"); - reinit(); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); -} + bool is_sequential_balancing_enabled() const { + return _stalled || _nb_ctx.enable_sequential_balancing; + } + bool is_parallel_balancing_enabled() const { + return !_stalled && _nb_ctx.enable_parallel_balancing; + } -bool NodeBalancer::perform_sequential_round() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Sequential round"); + bool perform_sequential_round() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Sequential round"); - const PEID rank = mpi::get_comm_rank(_p_graph.communicator()); + const PEID rank = mpi::get_comm_rank(_graph.communicator()); - START_TIMER("Pick and reduce move candidates"); - auto candidates = reduce_candidates( - pick_sequential_candidates(), - _ctx.refinement.node_balancer.seq_num_nodes_per_block, - _p_graph, - _p_ctx - ); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - START_TIMER("Perform moves on root PE"); - if (rank == 0) { - // Move nodes that already have a target block - for (const auto &move : candidates) { - if (move.from != move.to) { - perform_move(move, true); + START_TIMER("Pick and reduce move candidates"); + auto candidates = reduce_candidates( + pick_sequential_candidates(), + _ctx.refinement.node_balancer.seq_num_nodes_per_block, + _p_graph, + _p_ctx + ); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); + + START_TIMER("Perform moves on root PE"); + if (rank == 0) { + // Move nodes that already have a target block + for (const auto &move : candidates) { + if (move.from != move.to) { + perform_move(move, true); + } } - } - // Move nodes that do not have a target block - BlockID cur = 0; - for (auto &candidate : candidates) { - auto &[node, from, to, weight, rel_gain] = candidate; - - if (from == to) { - // Look for next block that can take node - while (cur == from || - _p_graph.block_weight(cur) + weight > _p_ctx.graph->max_block_weight(cur)) { - ++cur; - if (cur >= _p_ctx.k) { - cur = 0; + // Move nodes that do not have a target block + BlockID cur = 0; + for (auto &candidate : candidates) { + auto &[node, from, to, weight, rel_gain] = candidate; + + if (from == to) { + // Look for next block that can take node + while (cur == from || + _p_graph.block_weight(cur) + weight > _p_ctx.graph->max_block_weight(cur)) { + ++cur; + if (cur >= _p_ctx.k) { + cur = 0; + } } - } - to = cur; - perform_move(candidate, true); + to = cur; + perform_move(candidate, true); + } } } - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - // Broadcast winners - START_TIMER("Broadcast winners"); - const std::size_t num_winners = mpi::bcast(candidates.size(), 0, _p_graph.communicator()); - candidates.resize(num_winners); - mpi::bcast(candidates.data(), num_winners, 0, _p_graph.communicator()); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - START_TIMER("Perform moves"); - if (rank != 0) { - perform_moves(candidates, true); - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - KASSERT(debug::validate_partition(_p_graph), "balancer produced invalid partition", HEAVY); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); - return num_winners > 0; -} + // Broadcast winners + START_TIMER("Broadcast winners"); + const std::size_t num_winners = mpi::bcast(candidates.size(), 0, _graph.communicator()); + candidates.resize(num_winners); + mpi::bcast(candidates.data(), num_winners, 0, _graph.communicator()); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); -void NodeBalancer::perform_moves( - const std::vector &moves, const bool update_block_weights -) { - for (const auto &move : moves) { - perform_move(move, update_block_weights); - } -} + START_TIMER("Perform moves"); + if (rank != 0) { + perform_moves(candidates, true); + } + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); -void NodeBalancer::perform_move(const Candidate &move, const bool update_block_weights) { - const auto &[node, from, to, weight, rel_gain] = move; + KASSERT(debug::validate_partition(_p_graph), "balancer produced invalid partition", HEAVY); - if (from == to) { // Should only happen on root - KASSERT(mpi::get_comm_rank(_p_graph.communicator()) == 0); - return; + return num_winners > 0; } - if (_p_graph.contains_global_node(node)) { - const NodeID u = _p_graph.global_to_local_node(node); + std::vector pick_sequential_candidates() { + std::vector candidates; - if (_p_graph.is_owned_global_node(node)) { // Move node on this PE - KASSERT(u < _p_graph.n()); - KASSERT(_pq.contains(u)); - - _pq.remove(from, u); - _pq_weight[from] -= weight; + for (const BlockID from : _p_graph.blocks()) { + if (block_overload(from) == 0) { + continue; + } - // Activate neighbors - _p_graph.adjacent_nodes(u, [&, from = from](const NodeID v) { - if (!_p_graph.is_owned_node(v)) { - return; + // Fetch up to `num_nodes_per_block` move candidates from the PQ, + // but keep them in the PQ, since they might not get moved + NodeID num = 0; + for (num = 0; num < _nb_ctx.seq_num_nodes_per_block; ++num) { + if (_pq.empty(from)) { + break; } - if (!_marker.get(v) && _p_graph.block(v) == from) { - try_pq_insertion(from, v); - _marker.set(v); + const NodeID u = _pq.peek_max_id(from); + const double relative_gain = _pq.peek_max_key(from); + const NodeWeight u_weight = _graph.node_weight(u); + _pq.pop_max(from); + _pq_weight[from] -= u_weight; + + const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); + const double actual_relative_gain = max_gainer.relative_gain(); + const BlockID to = max_gainer.block; + _target_blocks[u] = to; + + if (relative_gain == actual_relative_gain) { + Candidate candidate{ + _graph.local_to_global_node(u), from, to, u_weight, actual_relative_gain}; + candidates.push_back(candidate); + } else { + try_pq_insertion(from, u, u_weight, actual_relative_gain); + --num; // Retry } - }); + } + + for (NodeID rnum = 0; rnum < num; ++rnum) { + KASSERT(candidates.size() > rnum); + const auto &candidate = candidates[candidates.size() - rnum - 1]; + _pq.push(from, _graph.global_to_local_node(candidate.id), candidate.gain); + _pq_weight[from] += candidate.weight; + } } - if (update_block_weights) { - _p_graph.set_block(u, to); - } else { - _p_graph.set_block(u, to); + return candidates; + } + + void perform_moves(const std::vector &moves, bool update_block_weights) { + for (const auto &move : moves) { + perform_move(move, update_block_weights); } - } else if (update_block_weights) { // Only update block weight - _p_graph.set_block_weight(from, _p_graph.block_weight(from) - weight); - _p_graph.set_block_weight(to, _p_graph.block_weight(to) + weight); } -} -std::vector NodeBalancer::pick_sequential_candidates() { - std::vector candidates; + void perform_move(const Candidate &move, bool update_block_weights) { + const auto &[node, from, to, weight, rel_gain] = move; - for (const BlockID from : _p_graph.blocks()) { - if (block_overload(from) == 0) { - continue; + if (from == to) { // Should only happen on root + KASSERT(mpi::get_comm_rank(_graph.communicator()) == 0); + return; } - // Fetch up to `num_nodes_per_block` move candidates from the PQ, - // but keep them in the PQ, since they might not get moved - NodeID num = 0; - for (num = 0; num < _nb_ctx.seq_num_nodes_per_block; ++num) { - if (_pq.empty(from)) { - break; + if (_graph.contains_global_node(node)) { + const NodeID u = _graph.global_to_local_node(node); + + if (_graph.is_owned_global_node(node)) { // Move node on this PE + KASSERT(u < _graph.n()); + KASSERT(_pq.contains(u)); + + _pq.remove(from, u); + _pq_weight[from] -= weight; + + // Activate neighbors + _graph.adjacent_nodes(u, [&, from = from](const NodeID v) { + if (!_graph.is_owned_node(v)) { + return; + } + + if (!_marker.get(v) && _p_graph.block(v) == from) { + try_pq_insertion(from, v); + _marker.set(v); + } + }); } - const NodeID u = _pq.peek_max_id(from); - const double relative_gain = _pq.peek_max_key(from); - const NodeWeight u_weight = _p_graph.node_weight(u); - _pq.pop_max(from); - _pq_weight[from] -= u_weight; - - const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); - const double actual_relative_gain = max_gainer.relative_gain(); - const BlockID to = max_gainer.block; - _target_blocks[u] = to; - - if (relative_gain == actual_relative_gain) { - Candidate candidate{ - _p_graph.local_to_global_node(u), from, to, u_weight, actual_relative_gain}; - candidates.push_back(candidate); + if (update_block_weights) { + _p_graph.set_block(u, to); } else { - try_pq_insertion(from, u, u_weight, actual_relative_gain); - --num; // Retry + _p_graph.set_block(u, to); } - } - - for (NodeID rnum = 0; rnum < num; ++rnum) { - KASSERT(candidates.size() > rnum); - const auto &candidate = candidates[candidates.size() - rnum - 1]; - _pq.push(from, _p_graph.global_to_local_node(candidate.id), candidate.gain); - _pq_weight[from] += candidate.weight; + } else if (update_block_weights) { // Only update block weight + _p_graph.set_block_weight(from, _p_graph.block_weight(from) - weight); + _p_graph.set_block_weight(to, _p_graph.block_weight(to) + weight); } } - return candidates; -} - -BlockWeight NodeBalancer::block_overload(const BlockID block) const { - static_assert( - std::numeric_limits::is_signed, - "This must be changed when using an unsigned data type for " - "block weights!" - ); - KASSERT(block < _p_graph.k()); - return std::max( - 0, _p_graph.block_weight(block) - _p_ctx.graph->max_block_weight(block) - ); -} - -BlockWeight NodeBalancer::block_underload(const BlockID block) const { - static_assert( - std::numeric_limits::is_signed, - "This must be changed when using an unsigned data type for " - "block weights!" - ); - KASSERT(block < _p_graph.k()); - return std::max( - 0, _p_ctx.graph->max_block_weight(block) - _p_graph.block_weight(block) - ); -} - -bool NodeBalancer::try_pq_insertion(const BlockID b_u, const NodeID u) { - KASSERT(b_u == _p_graph.block(u)); + BlockWeight block_overload(BlockID b) const { + static_assert( + std::numeric_limits::is_signed, + "This must be changed when using an unsigned data type for " + "block weights!" + ); + KASSERT(b < _p_graph.k()); + return std::max(0, _p_graph.block_weight(b) - _p_ctx.graph->max_block_weight(b)); + } - if (_p_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { - return false; + BlockWeight block_underload(BlockID b) const { + static_assert( + std::numeric_limits::is_signed, + "This must be changed when using an unsigned data type for " + "block weights!" + ); + KASSERT(b < _p_graph.k()); + return std::max(0, _p_ctx.graph->max_block_weight(b) - _p_graph.block_weight(b)); } - const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); - _target_blocks[u] = max_gainer.block; - return try_pq_insertion(b_u, u, _p_graph.node_weight(u), max_gainer.relative_gain()); -} + bool try_pq_insertion(BlockID b_u, NodeID u) { + KASSERT(b_u == _p_graph.block(u)); -bool NodeBalancer::try_pq_insertion( - const BlockID b_u, const NodeID u, const NodeWeight w_u, const double rel_gain -) { - KASSERT(w_u == _p_graph.node_weight(u)); - KASSERT(b_u == _p_graph.block(u)); + if (_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { + return false; + } - if (_p_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { - return false; + const auto max_gainer = _gain_calculator.compute_max_gainer(u, _p_ctx); + _target_blocks[u] = max_gainer.block; + return try_pq_insertion(b_u, u, _graph.node_weight(u), max_gainer.relative_gain()); } - if (_pq_weight[b_u] < block_overload(b_u) || _pq.empty(b_u) || rel_gain > _pq.peek_min_key(b_u)) { - _pq.push(b_u, u, rel_gain); - _pq_weight[b_u] += w_u; + bool try_pq_insertion(BlockID b_u, NodeID u, NodeWeight u_weight, double rel_gain) { + KASSERT(u_weight == _graph.node_weight(u)); + KASSERT(b_u == _p_graph.block(u)); - if (rel_gain > _pq.peek_min_key(b_u)) { - const NodeID min_node = _pq.peek_min_id(b_u); - const NodeWeight min_weight = _p_graph.node_weight(min_node); - if (_pq_weight[b_u] - min_weight >= block_overload(b_u)) { - _pq.pop_min(b_u); - _pq_weight[b_u] -= min_weight; + if (_graph.degree(u) > _nb_ctx.par_high_degree_insertion_threshold) { + return false; + } + + if (_pq_weight[b_u] < block_overload(b_u) || _pq.empty(b_u) || + rel_gain > _pq.peek_min_key(b_u)) { + _pq.push(b_u, u, rel_gain); + _pq_weight[b_u] += u_weight; + + if (rel_gain > _pq.peek_min_key(b_u)) { + const NodeID min_node = _pq.peek_min_id(b_u); + const NodeWeight min_weight = _p_graph.node_weight(min_node); + if (_pq_weight[b_u] - min_weight >= block_overload(b_u)) { + _pq.pop_min(b_u); + _pq_weight[b_u] -= min_weight; + } } + + return true; } - return true; + return false; } - return false; -} - -bool NodeBalancer::perform_parallel_round(const int round) { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Parallel round"); + bool perform_parallel_round(int round) { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Parallel round"); - const PEID rank = mpi::get_comm_rank(_p_graph.communicator()); + const PEID rank = mpi::get_comm_rank(_graph.communicator()); - // Postpone PQ updates until after the iteration - std::vector> pq_updates; + // Postpone PQ updates until after the iteration + std::vector> pq_updates; - START_TIMER("Computing weight buckets"); - _buckets.clear(); - for (const BlockID from : _p_graph.blocks()) { - for (const auto &[node, pq_gain] : _pq.elements(from)) { - KASSERT(_p_graph.block(node) == from); + START_TIMER("Computing weight buckets"); + _buckets.clear(); + for (const BlockID from : _p_graph.blocks()) { + for (const auto &[node, pq_gain] : _pq.elements(from)) { + KASSERT(_p_graph.block(node) == from); - // For high-degree nodes, assume that the PQ gain is up-to-date and skip recomputation - if (_p_graph.degree(node) > _nb_ctx.par_high_degree_update_thresold && - ((round + 1) % _nb_ctx.par_high_degree_update_interval) == 0) { - _buckets.add(from, _p_graph.node_weight(node), pq_gain); - if (!_nb_ctx.par_update_pq_gains) { - _tmp_gains[node] = pq_gain; + // For high-degree nodes, assume that the PQ gain is up-to-date and skip recomputation + if (_graph.degree(node) > _nb_ctx.par_high_degree_update_thresold && + ((round + 1) % _nb_ctx.par_high_degree_update_interval) == 0) { + _buckets.add(from, _graph.node_weight(node), pq_gain); + if (!_nb_ctx.par_update_pq_gains) { + _tmp_gains[node] = pq_gain; + } + continue; } - continue; - } - // For low-degree nodes, recalculate gain and update PQ - const auto max_gainer = _gain_calculator.compute_max_gainer(node, _p_ctx); - const double actual_gain = max_gainer.relative_gain(); - const BlockID to = max_gainer.block; + // For low-degree nodes, recalculate gain and update PQ + const auto max_gainer = _gain_calculator.compute_max_gainer(node, _p_ctx); + const double actual_gain = max_gainer.relative_gain(); + const BlockID to = max_gainer.block; + + if (_nb_ctx.par_update_pq_gains && pq_gain != actual_gain) { + pq_updates.emplace_back(from, node, actual_gain); + } else if (!_nb_ctx.par_update_pq_gains) { + _tmp_gains[node] = actual_gain; + } - if (_nb_ctx.par_update_pq_gains && pq_gain != actual_gain) { - pq_updates.emplace_back(from, node, actual_gain); - } else if (!_nb_ctx.par_update_pq_gains) { - _tmp_gains[node] = actual_gain; + _buckets.add(from, _graph.node_weight(node), actual_gain); + _target_blocks[node] = to; } + } + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); - _buckets.add(from, _p_graph.node_weight(node), actual_gain); - _target_blocks[node] = to; + START_TIMER("Apply PQ updates"); + for (const auto &[from, node, gain] : pq_updates) { + _pq.change_priority(from, node, gain); } - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); - START_TIMER("Apply PQ updates"); - for (const auto &[from, node, gain] : pq_updates) { - _pq.change_priority(from, node, gain); - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - START_TIMER("Computing cut-off buckets"); - const auto &cutoff_buckets = - _buckets.compute_cutoff_buckets(reduce_buckets_mpireduce(_buckets, _p_graph)); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - // Find move candidates - std::vector candidates; - std::vector block_weight_deltas_to(_p_graph.k()); - std::vector block_weight_deltas_from(_p_graph.k()); - - START_TIMER("Find move candidates"); - for (const BlockID from : _p_graph.blocks()) { - for (const auto &pq_element : _pq.elements(from)) { - const NodeID &node = pq_element.id; - const double &gain = (_nb_ctx.par_update_pq_gains ? pq_element.key : _tmp_gains[node]); - - if (block_overload(from) <= block_weight_deltas_from[from]) { - break; - } + START_TIMER("Computing cut-off buckets"); + const auto &cutoff_buckets = + _buckets.compute_cutoff_buckets(reduce_buckets_mpireduce(_buckets, _p_graph)); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); - const BlockID to = _target_blocks[node]; - const auto bucket = _buckets.compute_bucket(gain); + // Find move candidates + std::vector candidates; + std::vector block_weight_deltas_to(_p_graph.k()); + std::vector block_weight_deltas_from(_p_graph.k()); - KASSERT( - [&] { - const auto max_gainer = _gain_calculator.compute_max_gainer(node, _p_ctx); + START_TIMER("Find move candidates"); + for (const BlockID from : _p_graph.blocks()) { + for (const auto &pq_element : _pq.elements(from)) { + const NodeID &node = pq_element.id; + const double &gain = (_nb_ctx.par_update_pq_gains ? pq_element.key : _tmp_gains[node]); - if (gain != max_gainer.relative_gain()) { - LOG_WARNING << "bad relative gain for node " << node << ": " << gain - << " != " << max_gainer.relative_gain(); - return false; - } - // Skip check: does not work when using the randomized gain calculator - /*if (to != max_gainer.block) { - LOG_WARNING << "bad target block for node " << node << ": " << to - << " != " << max_gainer.block; - return false; - }*/ - return true; - }(), - "inconsistent PQ gains", - HEAVY - ); + if (block_overload(from) <= block_weight_deltas_from[from]) { + break; + } - if (!_nb_ctx.par_partial_buckets || bucket < cutoff_buckets[from]) { - Candidate candidate = { - .id = _p_graph.local_to_global_node(node), - .from = from, - .to = to, - .weight = _p_graph.node_weight(node), - .gain = gain, - }; + const BlockID to = _target_blocks[node]; + const auto bucket = _buckets.compute_bucket(gain); + + KASSERT( + [&] { + const auto max_gainer = _gain_calculator.compute_max_gainer(node, _p_ctx); + + if (gain != max_gainer.relative_gain()) { + LOG_WARNING << "bad relative gain for node " << node << ": " << gain + << " != " << max_gainer.relative_gain(); + return false; + } + // Skip check: does not work when using the randomized gain calculator + /*if (to != max_gainer.block) { + LOG_WARNING << "bad target block for node " << node << ": " << to + << " != " << max_gainer.block; + return false; + }*/ + return true; + }(), + "inconsistent PQ gains", + HEAVY + ); + + if (!_nb_ctx.par_partial_buckets || bucket < cutoff_buckets[from]) { + Candidate candidate = { + .id = _graph.local_to_global_node(node), + .from = from, + .to = to, + .weight = _graph.node_weight(node), + .gain = gain, + }; + + if (candidate.from == candidate.to) { + [[maybe_unused]] const bool reassigned = + assign_feasible_target_block(candidate, block_weight_deltas_to); + KASSERT( + reassigned, + "could not find a feasible target block for node " + << candidate.id << ", weight " << candidate.weight << ", deltas: [" + << block_weight_deltas_to << "]" + << ", max block weights: " << _p_ctx.graph->max_block_weights + << ", block weights: " + << std::vector( + _p_graph.block_weights().begin(), _p_graph.block_weights().end() + ) + ); + } - if (candidate.from == candidate.to) { - [[maybe_unused]] const bool reassigned = - assign_feasible_target_block(candidate, block_weight_deltas_to); - KASSERT( - reassigned, - "could not find a feasible target block for node " - << candidate.id << ", weight " << candidate.weight << ", deltas: [" - << block_weight_deltas_to << "]" - << ", max block weights: " << _p_ctx.graph->max_block_weights - << ", block weights: " - << std::vector( - _p_graph.block_weights().begin(), _p_graph.block_weights().end() - ) - ); + block_weight_deltas_to[candidate.to] += candidate.weight; + block_weight_deltas_from[candidate.from] += candidate.weight; + candidates.push_back(candidate); } - - block_weight_deltas_to[candidate.to] += candidate.weight; - block_weight_deltas_from[candidate.from] += candidate.weight; - candidates.push_back(candidate); - } - } - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - - // Compute total weight to each block - START_TIMER("Allreduce weight to block"); - MPI_Allreduce( - MPI_IN_PLACE, - block_weight_deltas_to.data(), - asserting_cast(_p_graph.k()), - mpi::type::get(), - MPI_SUM, - _p_graph.communicator() - ); - STOP_TIMER(); - - // Perform moves - START_TIMER("Attempt to move candidates"); - Random &rand = Random::instance(); - - std::size_t num_rejected_candidates; - std::vector actual_block_weight_deltas; - bool balanced_moves = false; - - for (int attempt = 0; - !balanced_moves && attempt < std::max(1, _nb_ctx.par_num_dicing_attempts); - ++attempt) { - num_rejected_candidates = 0; - actual_block_weight_deltas.clear(); - actual_block_weight_deltas.resize(_p_graph.k()); - - for (std::size_t i = 0; i < candidates.size() - num_rejected_candidates; ++i) { - const auto &candidate = candidates[i]; - const double probability = - 1.0 * block_underload(candidate.to) / block_weight_deltas_to[candidate.to]; - if (rand.random_bool(probability)) { - actual_block_weight_deltas[candidate.to] += candidate.weight; - actual_block_weight_deltas[candidate.from] -= candidate.weight; - } else { - ++num_rejected_candidates; - std::swap(candidates[i], candidates[candidates.size() - num_rejected_candidates]); - --i; } } + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); + // Compute total weight to each block + START_TIMER("Allreduce weight to block"); MPI_Allreduce( MPI_IN_PLACE, - actual_block_weight_deltas.data(), - asserting_cast(actual_block_weight_deltas.size()), + block_weight_deltas_to.data(), + asserting_cast(_p_graph.k()), mpi::type::get(), MPI_SUM, - _p_graph.communicator() + _graph.communicator() ); + STOP_TIMER(); - // Check that the moves do not overload a previously non-overloaded block - balanced_moves = true; - for (const BlockID block : _p_graph.blocks()) { - if (block_overload(block) == 0 && - block_underload(block) < actual_block_weight_deltas[block]) { - balanced_moves = false; - break; + // Perform moves + START_TIMER("Attempt to move candidates"); + Random &rand = Random::instance(); + + std::size_t num_rejected_candidates; + std::vector actual_block_weight_deltas; + bool balanced_moves = false; + + for (int attempt = 0; + !balanced_moves && attempt < std::max(1, _nb_ctx.par_num_dicing_attempts); + ++attempt) { + num_rejected_candidates = 0; + actual_block_weight_deltas.clear(); + actual_block_weight_deltas.resize(_p_graph.k()); + + for (std::size_t i = 0; i < candidates.size() - num_rejected_candidates; ++i) { + const auto &candidate = candidates[i]; + const double probability = + 1.0 * block_underload(candidate.to) / block_weight_deltas_to[candidate.to]; + if (rand.random_bool(probability)) { + actual_block_weight_deltas[candidate.to] += candidate.weight; + actual_block_weight_deltas[candidate.from] -= candidate.weight; + } else { + ++num_rejected_candidates; + std::swap(candidates[i], candidates[candidates.size() - num_rejected_candidates]); + --i; + } } - } - } - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); - if (balanced_moves || _nb_ctx.par_accept_imbalanced_moves) { - for (const BlockID block : _p_graph.blocks()) { - _p_graph.set_block_weight( - block, _p_graph.block_weight(block) + actual_block_weight_deltas[block] + MPI_Allreduce( + MPI_IN_PLACE, + actual_block_weight_deltas.data(), + asserting_cast(actual_block_weight_deltas.size()), + mpi::type::get(), + MPI_SUM, + _graph.communicator() ); + + // Check that the moves do not overload a previously non-overloaded block + balanced_moves = true; + for (const BlockID block : _p_graph.blocks()) { + if (block_overload(block) == 0 && + block_underload(block) < actual_block_weight_deltas[block]) { + balanced_moves = false; + break; + } + } } + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); - candidates.resize(candidates.size() - num_rejected_candidates); + if (balanced_moves || _nb_ctx.par_accept_imbalanced_moves) { + for (const BlockID block : _p_graph.blocks()) { + _p_graph.set_block_weight( + block, _p_graph.block_weight(block) + actual_block_weight_deltas[block] + ); + } - START_TIMER("Perform moves"); - perform_moves(candidates, false); - STOP_TIMER(); - TIMER_BARRIER(_p_graph.communicator()); + candidates.resize(candidates.size() - num_rejected_candidates); + + START_TIMER("Perform moves"); + perform_moves(candidates, false); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); + + TIMED_SCOPE("Synchronize partition state after fast rebalance round") { + struct Message { + NodeID node; + BlockID block; + }; - TIMED_SCOPE("Synchronize partition state after fast rebalance round") { - struct Message { - NodeID node; - BlockID block; + mpi::graph::sparse_alltoall_interface_to_pe_custom_range( + _graph, + 0, + candidates.size(), + [&](const NodeID i) -> NodeID { return _graph.global_to_local_node(candidates[i].id); }, + [&](NodeID) -> bool { return true; }, + [&](const NodeID u) -> Message { return {.node = u, .block = _p_graph.block(u)}; }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [their_lnode, to] = recv_buffer[i]; + const NodeID lnode = _graph.map_remote_node(their_lnode, pe); + _p_graph.set_block(lnode, to); + }); + } + ); }; - mpi::graph::sparse_alltoall_interface_to_pe_custom_range( - _p_graph.graph(), - 0, - candidates.size(), - [&](const NodeID i) -> NodeID { return _p_graph.global_to_local_node(candidates[i].id); }, - [&](NodeID) -> bool { return true; }, - [&](const NodeID u) -> Message { return {.node = u, .block = _p_graph.block(u)}; }, - [&](const auto &recv_buffer, const PEID pe) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [their_lnode, to] = recv_buffer[i]; - const NodeID lnode = _p_graph.map_remote_node(their_lnode, pe); - _p_graph.set_block(lnode, to); - }); - } - ); - }; + TIMER_BARRIER(_graph.communicator()); + return true; + } + + // Parallel rebalancing stalled + return false; + } + + bool + assign_feasible_target_block(Candidate &candidate, const std::vector &deltas) const { + do { + ++candidate.to; + if (candidate.to == _p_ctx.k) { + candidate.to = 0; + } + } while (candidate.from != candidate.to && + block_underload(candidate.to) < candidate.weight + deltas[candidate.to]); - TIMER_BARRIER(_p_graph.communicator()); - return true; + return candidate.from != candidate.to; } - // Parallel rebalancing stalled - return false; -} + void switch_to_stalled() { + TIMER_BARRIER(_graph.communicator()); -bool NodeBalancer::is_sequential_balancing_enabled() const { - return _stalled || _nb_ctx.enable_sequential_balancing; -} + _stalled = true; -bool NodeBalancer::is_parallel_balancing_enabled() const { - return !_stalled && _nb_ctx.enable_parallel_balancing; -} + // Reinit the balancer to fix blocks that were not overloaded in the beginning, but are + // overloaded now due to imbalanced parallel moves + START_TIMER("Reinitialize"); + reinit(); + STOP_TIMER(); + TIMER_BARRIER(_graph.communicator()); + } -bool NodeBalancer::assign_feasible_target_block( - Candidate &candidate, const std::vector &deltas -) const { - do { - ++candidate.to; - if (candidate.to == _p_ctx.k) { - candidate.to = 0; - } - } while (candidate.from != candidate.to && - block_underload(candidate.to) < candidate.weight + deltas[candidate.to]); + DistributedPartitionedGraph &_p_graph; + const Graph &_graph; + + const Context &_ctx; + const NodeBalancerContext &_nb_ctx; + const PartitionContext &_p_ctx; + + DynamicBinaryMinMaxForest _pq; + std::vector _pq_weight; + Marker<> _marker; - return candidate.from != candidate.to; + Buckets _buckets; + RandomizedGainCalculator _gain_calculator; + + bool _stalled = false; + + std::vector _cached_cutoff_buckets; + + StaticArray _target_blocks; + StaticArray _tmp_gains; +}; + +// +// Public interface +// + +NodeBalancerFactory::NodeBalancerFactory(const Context &ctx) : _ctx(ctx) {} + +std::unique_ptr +NodeBalancerFactory::create(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { + return p_graph.graph().reified( + [&](const DistributedCSRGraph &csr_graph) { + std::unique_ptr refiner = + std::make_unique>(_ctx, p_graph, csr_graph, p_ctx); + return refiner; + }, + [&](const DistributedCompressedGraph &compressed_graph) { + std::unique_ptr refiner = + std::make_unique>( + _ctx, p_graph, compressed_graph, p_ctx + ); + return refiner; + } + ); } + } // namespace kaminpar::dist diff --git a/kaminpar-dist/refinement/balancer/node_balancer.h b/kaminpar-dist/refinement/balancer/node_balancer.h index 7d1cde04..15e5a075 100644 --- a/kaminpar-dist/refinement/balancer/node_balancer.h +++ b/kaminpar-dist/refinement/balancer/node_balancer.h @@ -10,13 +10,8 @@ #include "kaminpar-dist/context.h" #include "kaminpar-dist/datastructures/distributed_partitioned_graph.h" #include "kaminpar-dist/dkaminpar.h" -#include "kaminpar-dist/refinement/balancer/weight_buckets.h" -#include "kaminpar-dist/refinement/gain_calculator.h" #include "kaminpar-dist/refinement/refiner.h" -#include "kaminpar-common/datastructures/binary_heap.h" -#include "kaminpar-common/datastructures/marker.h" - namespace kaminpar::dist { class NodeBalancerFactory : public GlobalRefinerFactory { public: @@ -34,73 +29,4 @@ class NodeBalancerFactory : public GlobalRefinerFactory { private: const Context &_ctx; }; - -class NodeBalancer : public GlobalRefiner { - struct Candidate { - GlobalNodeID id; - BlockID from; - BlockID to; - NodeWeight weight; - double gain; - }; - -public: - NodeBalancer( - const Context &ctx, DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx - ); - - NodeBalancer(const NodeBalancer &) = delete; - NodeBalancer &operator=(const NodeBalancer &) = delete; - - NodeBalancer(NodeBalancer &&) noexcept = default; - NodeBalancer &operator=(NodeBalancer &&) = delete; - - void initialize() final; - bool refine() final; - -private: - void reinit(); - - bool is_sequential_balancing_enabled() const; - bool is_parallel_balancing_enabled() const; - - bool perform_sequential_round(); - std::vector pick_sequential_candidates(); - - void perform_moves(const std::vector &moves, bool update_block_weights); - void perform_move(const Candidate &move, bool update_block_weights); - - BlockWeight block_overload(BlockID b) const; - BlockWeight block_underload(BlockID b) const; - - bool try_pq_insertion(BlockID b, NodeID u); - bool try_pq_insertion(BlockID b, NodeID u, NodeWeight u_weight, double rel_gain); - - bool perform_parallel_round(int round); - - bool - assign_feasible_target_block(Candidate &candidate, const std::vector &deltas) const; - - void switch_to_stalled(); - - DistributedPartitionedGraph &_p_graph; - - const Context &_ctx; - const NodeBalancerContext &_nb_ctx; - const PartitionContext &_p_ctx; - - DynamicBinaryMinMaxForest _pq; - std::vector _pq_weight; - Marker<> _marker; - - Buckets _buckets; - RandomizedGainCalculator _gain_calculator; - - bool _stalled = false; - - std::vector _cached_cutoff_buckets; - - StaticArray _target_blocks; - StaticArray _tmp_gains; -}; }; // namespace kaminpar::dist diff --git a/kaminpar-dist/refinement/gain_calculator.h b/kaminpar-dist/refinement/gain_calculator.h index c599be25..954a2091 100644 --- a/kaminpar-dist/refinement/gain_calculator.h +++ b/kaminpar-dist/refinement/gain_calculator.h @@ -20,7 +20,7 @@ #include "kaminpar-common/random.h" namespace kaminpar::dist { -template class GainCalculator { +template class GainCalculator { public: GainCalculator(const BlockID max_k) : _rating_map_ets([max_k] { return RatingMap(max_k); }) {} @@ -44,8 +44,9 @@ template class GainCalculator { } }; - void init(const DistributedPartitionedGraph &p_graph) { + void init(const DistributedPartitionedGraph &p_graph, const Graph &graph) { _p_graph = &p_graph; + _graph = &graph; } MaxGainer compute_max_gainer(const NodeID u, const PartitionContext &p_ctx) const { @@ -79,7 +80,7 @@ template class GainCalculator { Random &rand = Random::instance(); - const NodeWeight w_u = _p_graph->node_weight(u); + const NodeWeight w_u = _graph->node_weight(u); const BlockID b_u = _p_graph->block(u); EdgeWeight int_conn = 0; @@ -87,12 +88,12 @@ template class GainCalculator { BlockID max_target = b_u; auto action = [&](auto &map) { - _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { const BlockID b_v = _p_graph->block(v); if (b_u != b_v && weight_checker(b_v, _p_graph->block_weight(b_v) + w_u)) { - map[b_v] += _p_graph->edge_weight(e); + map[b_v] += _graph->edge_weight(e); } else if (b_u == b_v) { - int_conn += _p_graph->edge_weight(e); + int_conn += _graph->edge_weight(e); } }); @@ -106,7 +107,7 @@ template class GainCalculator { map.clear(); }; - _rating_map_ets.local().execute(std::min(_p_graph->k(), _p_graph->degree(u)), action); + _rating_map_ets.local().execute(std::min(_p_graph->k(), _graph->degree(u)), action); return { .int_degree = int_conn, @@ -117,9 +118,10 @@ template class GainCalculator { } const DistributedPartitionedGraph *_p_graph = nullptr; + const Graph *_graph = nullptr; mutable tbb::enumerable_thread_specific> _rating_map_ets; }; -using DeterministicGainCalculator = GainCalculator; -using RandomizedGainCalculator = GainCalculator; +template using DeterministicGainCalculator = GainCalculator; +template using RandomizedGainCalculator = GainCalculator; } // namespace kaminpar::dist diff --git a/kaminpar-dist/refinement/jet/jet_refiner.cc b/kaminpar-dist/refinement/jet/jet_refiner.cc index 47579102..6784f311 100644 --- a/kaminpar-dist/refinement/jet/jet_refiner.cc +++ b/kaminpar-dist/refinement/jet/jet_refiner.cc @@ -8,6 +8,7 @@ ******************************************************************************/ #include "kaminpar-dist/refinement/jet/jet_refiner.h" +#include #include #include "kaminpar-dist/context.h" @@ -28,371 +29,418 @@ SET_STATISTICS_FROM_GLOBAL(); SET_DEBUG(false); } // namespace -JetRefinerFactory::JetRefinerFactory(const Context &ctx) : _ctx(ctx) {} - -std::unique_ptr -JetRefinerFactory::create(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { - return std::make_unique(_ctx, p_graph, p_ctx); -} - -JetRefiner::JetRefiner( - const Context &ctx, DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx -) - : _ctx(ctx), - _jet_ctx(ctx.refinement.jet), - _p_graph(p_graph), - _p_ctx(p_ctx), - _snapshooter(p_graph.total_n(), p_graph.k()), - _gain_calculator(p_graph.k()), - _gains_and_targets(p_graph.total_n()), - _block_weight_deltas(p_graph.k()), - _locked(p_graph.n()), - _balancer(factory::create_refiner(_ctx, _ctx.refinement.jet.balancing_algorithm) - ->create(_p_graph, _p_ctx)) {} - -void JetRefiner::initialize() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Jet Refiner"); - SCOPED_TIMER("Initialization"); - - if (_jet_ctx.dynamic_negative_gain_factor && - (_jet_ctx.num_fine_rounds <= 1 || _jet_ctx.num_coarse_rounds <= 1)) { - if (mpi::get_comm_rank(_p_graph.communicator()) == 0) { - LOG_WARNING << "dynamic negative gain factors are enabled, but only one round is configured"; +// +// Implementation +// + +template class JetRefiner : public GlobalRefiner { +public: + JetRefiner( + const Context &ctx, + DistributedPartitionedGraph &p_graph, + const Graph &graph, + const PartitionContext &p_ctx + ) + : _ctx(ctx), + _jet_ctx(ctx.refinement.jet), + _p_graph(p_graph), + _graph(graph), + _p_ctx(p_ctx), + _snapshooter(p_graph.total_n(), p_graph.k()), + _gain_calculator(p_graph.k()), + _gains_and_targets(p_graph.total_n()), + _block_weight_deltas(p_graph.k()), + _locked(p_graph.n()), + _balancer(factory::create_refiner(_ctx, _ctx.refinement.jet.balancing_algorithm) + ->create(_p_graph, _p_ctx)) {} + + JetRefiner(const JetRefiner &) = delete; + JetRefiner &operator=(const JetRefiner &) = delete; + + JetRefiner(JetRefiner &&) noexcept = default; + JetRefiner &operator=(JetRefiner &&) = delete; + + void initialize() final { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Jet Refiner"); + SCOPED_TIMER("Initialization"); + + if (_jet_ctx.dynamic_negative_gain_factor && + (_jet_ctx.num_fine_rounds <= 1 || _jet_ctx.num_coarse_rounds <= 1)) { + if (mpi::get_comm_rank(_graph.communicator()) == 0) { + LOG_WARNING + << "dynamic negative gain factors are enabled, but only one round is configured"; + } } - } - - _gain_calculator.init(_p_graph); - reset(); - - TIMER_BARRIER(_p_graph.communicator()); -} - -void JetRefiner::reset() { - _snapshooter.init(_p_graph, _p_ctx); - KASSERT(_locked.size() >= _p_graph.n(), "locked vector is too small", assert::light); - KASSERT( - _gains_and_targets.size() >= _p_graph.total_n(), - "gains_and_targets vector is too small", - assert::light - ); - KASSERT( - _block_weight_deltas.size() >= _p_graph.k(), - "block_weight_deltas vector is too small", - assert::light - ); + _gain_calculator.init(_p_graph, _graph); + reset(); - tbb::parallel_invoke( - [&] { _p_graph.pfor_nodes([&](const NodeID u) { _locked[u] = 0; }); }, - [&] { - _p_graph.pfor_all_nodes([&](const NodeID u) { - _gains_and_targets[u] = {0, _p_graph.block(u)}; - }); - }, - [&] { _p_graph.pfor_blocks([&](const BlockID b) { _block_weight_deltas[b] = 0; }); } - ); -} + TIMER_BARRIER(_graph.communicator()); + } -bool JetRefiner::refine() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Jet Refiner"); + bool refine() final { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Jet Refiner"); - KASSERT( - [&] { - for (const NodeID u : _p_graph.nodes()) { - if (_locked[u]) { - LOG_WARNING << "node " << u << " already locked: refiner was not properly initialized"; - return false; + KASSERT( + [&] { + for (const NodeID u : _graph.nodes()) { + if (_locked[u]) { + LOG_WARNING << "node " << u + << " already locked: refiner was not properly initialized"; + return false; + } } - } - for (const BlockID block : _p_graph.blocks()) { - if (_block_weight_deltas[block] != 0) { - LOG_WARNING << "block " << block << " has nonzero initial block weight delta"; - return false; + for (const BlockID block : _p_graph.blocks()) { + if (_block_weight_deltas[block] != 0) { + LOG_WARNING << "block " << block << " has nonzero initial block weight delta"; + return false; + } } - } - return true; - }(), - "refiner was not properly initialized", - HEAVY - ); + return true; + }(), + "refiner was not properly initialized", + HEAVY + ); - const bool toplevel = (_p_graph.global_n() == _ctx.partition.graph->global_n); - const int max_num_rounds = - toplevel ? _ctx.refinement.jet.num_fine_rounds : _ctx.refinement.jet.num_coarse_rounds; - const int max_num_fruitless_iterations = (_ctx.refinement.jet.num_fruitless_iterations == 0) - ? std::numeric_limits::max() - : _ctx.refinement.jet.num_fruitless_iterations; - const int max_num_iterations = (_ctx.refinement.jet.num_iterations == 0) - ? std::numeric_limits::max() - : _ctx.refinement.jet.num_iterations; - DBG0 << "Running JET refinement for " << max_num_rounds << " rounds, each with at most " - << max_num_iterations << " iterations or " << max_num_fruitless_iterations - << " fruitless iterations"; - - for (int round = 0; round < max_num_rounds; ++round) { - if (_jet_ctx.dynamic_negative_gain_factor) { - if (max_num_rounds >= 1) { - _negative_gain_factor = - _jet_ctx.initial_negative_gain_factor + - (1.0 * round / (max_num_rounds - 1.0)) * - (_jet_ctx.final_negative_gain_factor - _jet_ctx.initial_negative_gain_factor); + const bool toplevel = (_graph.global_n() == _ctx.partition.graph->global_n); + const int max_num_rounds = + toplevel ? _ctx.refinement.jet.num_fine_rounds : _ctx.refinement.jet.num_coarse_rounds; + const int max_num_fruitless_iterations = (_ctx.refinement.jet.num_fruitless_iterations == 0) + ? std::numeric_limits::max() + : _ctx.refinement.jet.num_fruitless_iterations; + const int max_num_iterations = (_ctx.refinement.jet.num_iterations == 0) + ? std::numeric_limits::max() + : _ctx.refinement.jet.num_iterations; + DBG0 << "Running JET refinement for " << max_num_rounds << " rounds, each with at most " + << max_num_iterations << " iterations or " << max_num_fruitless_iterations + << " fruitless iterations"; + + for (int round = 0; round < max_num_rounds; ++round) { + if (_jet_ctx.dynamic_negative_gain_factor) { + if (max_num_rounds >= 1) { + _negative_gain_factor = + _jet_ctx.initial_negative_gain_factor + + (1.0 * round / (max_num_rounds - 1.0)) * + (_jet_ctx.final_negative_gain_factor - _jet_ctx.initial_negative_gain_factor); + } else { + _negative_gain_factor = + (_jet_ctx.final_negative_gain_factor + _jet_ctx.initial_negative_gain_factor) / 2.0; + } } else { _negative_gain_factor = - (_jet_ctx.final_negative_gain_factor + _jet_ctx.initial_negative_gain_factor) / 2.0; + !toplevel ? _jet_ctx.coarse_negative_gain_factor : _jet_ctx.fine_negative_gain_factor; } - } else { - _negative_gain_factor = - !toplevel ? _jet_ctx.coarse_negative_gain_factor : _jet_ctx.fine_negative_gain_factor; - } - DBG0 << "Starting round " << (round + 1) << " of " << max_num_rounds - << " with negative gain factor " << _negative_gain_factor; + DBG0 << "Starting round " << (round + 1) << " of " << max_num_rounds + << " with negative gain factor " << _negative_gain_factor; - if (round > 0) { - reset(); - } + if (round > 0) { + reset(); + } - int cur_fruitless_iteration = 0; - int cur_iteration = 0; + int cur_fruitless_iteration = 0; + int cur_iteration = 0; - const EdgeWeight initial_cut = metrics::edge_cut(_p_graph); - EdgeWeight best_cut = initial_cut; + const EdgeWeight initial_cut = metrics::edge_cut(_p_graph); + EdgeWeight best_cut = initial_cut; - do { - TIMER_BARRIER(_p_graph.communicator()); + do { + TIMER_BARRIER(_graph.communicator()); - find_moves(); - synchronize_ghost_node_move_candidates(); - filter_bad_moves(); - move_locked_nodes(); - synchronize_ghost_node_labels(); - apply_block_weight_deltas(); + find_moves(); + synchronize_ghost_node_move_candidates(); + filter_bad_moves(); + move_locked_nodes(); + synchronize_ghost_node_labels(); + apply_block_weight_deltas(); - KASSERT( - debug::validate_partition(_p_graph), - "graph partition is in an inconsistent state after JET iterations " << cur_iteration, - HEAVY - ); + KASSERT( + debug::validate_partition(_p_graph), + "graph partition is in an inconsistent state after JET iterations " << cur_iteration, + HEAVY + ); - const EdgeWeight before_rebalance_cut = IFDBG(metrics::edge_cut(_p_graph)); - const double before_rebalance_l1 = IFDBG(metrics::imbalance_l1(_p_graph, _p_ctx)); - DBG0 << "Partition *before* rebalancing: cut=" << before_rebalance_cut - << ", l1=" << before_rebalance_l1; + const EdgeWeight before_rebalance_cut = IFDBG(metrics::edge_cut(_p_graph)); + const double before_rebalance_l1 = IFDBG(metrics::imbalance_l1(_p_graph, _p_ctx)); + DBG0 << "Partition *before* rebalancing: cut=" << before_rebalance_cut + << ", l1=" << before_rebalance_l1; - _balancer->initialize(); - _balancer->refine(); + _balancer->initialize(); + _balancer->refine(); - const EdgeWeight final_cut = metrics::edge_cut(_p_graph); - const double final_l1 = metrics::imbalance_l1(_p_graph, _p_ctx); - DBG0 << "Partition *after* rebalancing: cut=" << final_cut << ", l1=" << final_l1; + const EdgeWeight final_cut = metrics::edge_cut(_p_graph); + const double final_l1 = metrics::imbalance_l1(_p_graph, _p_ctx); + DBG0 << "Partition *after* rebalancing: cut=" << final_cut << ", l1=" << final_l1; - TIMED_SCOPE("Update best partition") { - _snapshooter.update(_p_graph, _p_ctx, final_cut, final_l1); + TIMED_SCOPE("Update best partition") { + _snapshooter.update(_p_graph, _p_ctx, final_cut, final_l1); + }; + + ++cur_iteration; + ++cur_fruitless_iteration; + + if (best_cut - final_cut > (1.0 - _ctx.refinement.jet.fruitless_threshold) * best_cut) { + DBG0 << "Improved cut from " << initial_cut << " to " << best_cut << " to " << final_cut + << ": resetting number of fruitless iterations (threshold: " + << _ctx.refinement.jet.fruitless_threshold << ")"; + best_cut = final_cut; + cur_fruitless_iteration = 0; + } else { + DBG0 << "Fruitless edge cut change from " << initial_cut << " to " << best_cut << " to " + << final_cut << " (threshold: " << _ctx.refinement.jet.fruitless_threshold + << "): incrementing fruitless iterations counter to " << cur_fruitless_iteration; + } + } while (cur_iteration < max_num_iterations && + cur_fruitless_iteration < max_num_fruitless_iterations); + + TIMED_SCOPE("Rollback") { + _snapshooter.rollback(_p_graph); }; - ++cur_iteration; - ++cur_fruitless_iteration; + KASSERT( + debug::validate_partition(_p_graph), + "graph partition is in an inconsistent state after JET refinement", + HEAVY + ); + } - if (best_cut - final_cut > (1.0 - _ctx.refinement.jet.fruitless_threshold) * best_cut) { - DBG0 << "Improved cut from " << initial_cut << " to " << best_cut << " to " << final_cut - << ": resetting number of fruitless iterations (threshold: " - << _ctx.refinement.jet.fruitless_threshold << ")"; - best_cut = final_cut; - cur_fruitless_iteration = 0; - } else { - DBG0 << "Fruitless edge cut change from " << initial_cut << " to " << best_cut << " to " - << final_cut << " (threshold: " << _ctx.refinement.jet.fruitless_threshold - << "): incrementing fruitless iterations counter to " << cur_fruitless_iteration; - } - } while (cur_iteration < max_num_iterations && - cur_fruitless_iteration < max_num_fruitless_iterations); + TIMER_BARRIER(_graph.communicator()); + return false; + } - TIMED_SCOPE("Rollback") { - _snapshooter.rollback(_p_graph); - }; +private: + void reset() { + _snapshooter.init(_p_graph, _p_ctx); + KASSERT(_locked.size() >= _graph.n(), "locked vector is too small", assert::light); KASSERT( - debug::validate_partition(_p_graph), - "graph partition is in an inconsistent state after JET refinement", - HEAVY + _gains_and_targets.size() >= _graph.total_n(), + "gains_and_targets vector is too small", + assert::light + ); + KASSERT( + _block_weight_deltas.size() >= _p_graph.k(), + "block_weight_deltas vector is too small", + assert::light ); - } - TIMER_BARRIER(_p_graph.communicator()); - return false; -} + tbb::parallel_invoke( + [&] { _graph.pfor_nodes([&](const NodeID u) { _locked[u] = 0; }); }, + [&] { + _graph.pfor_all_nodes([&](const NodeID u) { + _gains_and_targets[u] = {0, _p_graph.block(u)}; + }); + }, + [&] { _p_graph.pfor_blocks([&](const BlockID b) { _block_weight_deltas[b] = 0; }); } + ); + } -void JetRefiner::find_moves() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Find moves"); + void find_moves() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Find moves"); - _p_graph.pfor_nodes([&](const NodeID u) { - const BlockID b_u = _p_graph.block(u); - const NodeWeight w_u = _p_graph.node_weight(u); + _graph.pfor_nodes([&](const NodeID u) { + const BlockID b_u = _p_graph.block(u); + const NodeWeight w_u = _graph.node_weight(u); - if (_locked[u]) { - _gains_and_targets[u] = {0, b_u}; - return; - } + if (_locked[u]) { + _gains_and_targets[u] = {0, b_u}; + return; + } - const auto max_gainer = _gain_calculator.compute_max_gainer(u); + const auto max_gainer = _gain_calculator.compute_max_gainer(u); - if ( // Is a border node ... + if ( // Is a border node ... max_gainer.block != b_u && // ... and the move is not too bad max_gainer.absolute_gain() > -std::floor(_negative_gain_factor * max_gainer.int_degree) ) { - _gains_and_targets[u] = {max_gainer.absolute_gain(), max_gainer.block}; - } else { - _gains_and_targets[u] = {0, b_u}; - } - }); -} + _gains_and_targets[u] = {max_gainer.absolute_gain(), max_gainer.block}; + } else { + _gains_and_targets[u] = {0, b_u}; + } + }); + } -void JetRefiner::synchronize_ghost_node_move_candidates() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Exchange moves"); + void filter_bad_moves() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Filter moves"); - _p_graph.pfor_ghost_nodes([&](const NodeID ghost) { - _gains_and_targets[ghost] = {0, _p_graph.block(ghost)}; - }); + _graph.pfor_nodes([&](const NodeID u) { + _locked[u] = 0; - struct Message { - NodeID node; - EdgeWeight gain; - BlockID target; - }; + const BlockID from_u = _p_graph.block(u); + const auto [gain_u, to_u] = _gains_and_targets[u]; - mpi::graph::sparse_alltoall_interface_to_pe( - _p_graph.graph(), + if (from_u == to_u) { + return; + } - // Only consider vertices for which we found a new target block - [&](const NodeID u) { return _gains_and_targets[u].second != _p_graph.block(u); }, + EdgeWeight projected_gain = 0; - [&](const NodeID u) -> Message { - return { - .node = u, - .gain = _gains_and_targets[u].first, - .target = _gains_and_targets[u].second, - }; - }, + _graph.neighbors(u, [&, gain_u = gain_u, to_u = to_u](const EdgeID e, const NodeID v) { + const auto [gain_v, to_v] = _gains_and_targets[v]; + const BlockID projected_b_v = + (gain_v > gain_u || (gain_v == gain_u && v < u)) ? to_v : _p_graph.block(v); - [&](const auto &recv_buffer, const PEID pe) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [their_lnode, gain, target] = recv_buffer[i]; - const NodeID lnode = _p_graph.map_remote_node(their_lnode, pe); - _gains_and_targets[lnode] = {gain, target}; - }); - } - ); -} + if (projected_b_v == to_u) { + projected_gain += _graph.edge_weight(e); + } else if (projected_b_v == from_u) { + projected_gain -= _graph.edge_weight(e); + } + }); -void JetRefiner::filter_bad_moves() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Filter moves"); + // Locking the node here means that the move + // will be executed by move_locked_nodes() + if (projected_gain >= 0) { + _locked[u] = 1; + } + }); + } - _p_graph.pfor_nodes([&](const NodeID u) { - _locked[u] = 0; + void move_locked_nodes() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Execute moves"); - const BlockID from_u = _p_graph.block(u); - const auto [gain_u, to_u] = _gains_and_targets[u]; + _graph.pfor_nodes([&](const NodeID u) { + if (!_locked[u]) { + return; + } - if (from_u == to_u) { - return; - } + const BlockID from = _p_graph.block(u); + const BlockID to = _gains_and_targets[u].second; + _p_graph.set_block(u, to); - EdgeWeight projected_gain = 0; + const NodeWeight w_u = _graph.node_weight(u); + __atomic_fetch_sub(&_block_weight_deltas[from], w_u, __ATOMIC_RELAXED); + __atomic_fetch_add(&_block_weight_deltas[to], w_u, __ATOMIC_RELAXED); + }); + } - _p_graph.neighbors(u, [&, gain_u = gain_u, to_u = to_u](const EdgeID e, const NodeID v) { - const auto [gain_v, to_v] = _gains_and_targets[v]; - const BlockID projected_b_v = - (gain_v > gain_u || (gain_v == gain_u && v < u)) ? to_v : _p_graph.block(v); + void synchronize_ghost_node_move_candidates() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Exchange moves"); - if (projected_b_v == to_u) { - projected_gain += _p_graph.edge_weight(e); - } else if (projected_b_v == from_u) { - projected_gain -= _p_graph.edge_weight(e); - } + _graph.pfor_ghost_nodes([&](const NodeID ghost) { + _gains_and_targets[ghost] = {0, _p_graph.block(ghost)}; }); - // Locking the node here means that the move - // will be executed by move_locked_nodes() - if (projected_gain >= 0) { - _locked[u] = 1; - } - }); -} + struct Message { + NodeID node; + EdgeWeight gain; + BlockID target; + }; -void JetRefiner::move_locked_nodes() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Execute moves"); + mpi::graph::sparse_alltoall_interface_to_pe( + _graph, + // Only consider vertices for which we found a new target block + [&](const NodeID u) { return _gains_and_targets[u].second != _p_graph.block(u); }, + [&](const NodeID u) -> Message { + return { + .node = u, + .gain = _gains_and_targets[u].first, + .target = _gains_and_targets[u].second, + }; + }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [their_lnode, gain, target] = recv_buffer[i]; + const NodeID lnode = _graph.map_remote_node(their_lnode, pe); + _gains_and_targets[lnode] = {gain, target}; + }); + } + ); + } + void synchronize_ghost_node_labels() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Synchronize ghost node labels"); - _p_graph.pfor_nodes([&](const NodeID u) { - if (!_locked[u]) { - return; - } + struct Message { + NodeID node; + BlockID block; + }; - const BlockID from = _p_graph.block(u); - const BlockID to = _gains_and_targets[u].second; - _p_graph.set_block(u, to); + mpi::graph::sparse_alltoall_interface_to_pe( + _graph, + // Only exchange messages for nodes that were moved during the last round + [&](const NodeID u) { return _locked[u]; }, + [&](const NodeID u) -> Message { + return { + .node = u, + .block = _p_graph.block(u), + }; + }, + [&](const auto &recv_buffer, const PEID pe) { + tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { + const auto [their_lnode, block] = recv_buffer[i]; + const NodeID lnode = _graph.map_remote_node(their_lnode, pe); + _p_graph.set_block(lnode, block); + }); + } + ); + } - const NodeWeight w_u = _p_graph.node_weight(u); - __atomic_fetch_sub(&_block_weight_deltas[from], w_u, __ATOMIC_RELAXED); - __atomic_fetch_add(&_block_weight_deltas[to], w_u, __ATOMIC_RELAXED); - }); -} + void apply_block_weight_deltas() { + TIMER_BARRIER(_graph.communicator()); + SCOPED_TIMER("Apply block weight deltas"); + + MPI_Allreduce( + MPI_IN_PLACE, + _block_weight_deltas.data(), + asserting_cast(_p_graph.k()), + mpi::type::get(), + MPI_SUM, + _graph.communicator() + ); -void JetRefiner::synchronize_ghost_node_labels() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Synchronize ghost node labels"); + _p_graph.pfor_blocks([&](const BlockID block) { + _p_graph.set_block_weight(block, _p_graph.block_weight(block) + _block_weight_deltas[block]); + _block_weight_deltas[block] = 0; + }); + } - struct Message { - NodeID node; - BlockID block; - }; + const Context &_ctx; + const JetRefinementContext &_jet_ctx; + DistributedPartitionedGraph &_p_graph; + const Graph &_graph; + const PartitionContext &_p_ctx; - mpi::graph::sparse_alltoall_interface_to_pe( - _p_graph.graph(), + BestPartitionSnapshooter _snapshooter; + RandomizedGainCalculator _gain_calculator; + StaticArray> _gains_and_targets; + StaticArray _block_weight_deltas; + StaticArray _locked; - // Only exchange messages for nodes that were moved during the last round - [&](const NodeID u) { return _locked[u]; }, + std::unique_ptr _balancer; - [&](const NodeID u) -> Message { - return { - .node = u, - .block = _p_graph.block(u), - }; - }, + double _negative_gain_factor; +}; + +// +// Public interface +// + +JetRefinerFactory::JetRefinerFactory(const Context &ctx) : _ctx(ctx) {} - [&](const auto &recv_buffer, const PEID pe) { - tbb::parallel_for(0, recv_buffer.size(), [&](const std::size_t i) { - const auto [their_lnode, block] = recv_buffer[i]; - const NodeID lnode = _p_graph.map_remote_node(their_lnode, pe); - _p_graph.set_block(lnode, block); - }); +std::unique_ptr +JetRefinerFactory::create(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { + return p_graph.graph().reified( + [&](const DistributedCSRGraph &csr_graph) { + std::unique_ptr refiner = + std::make_unique>(_ctx, p_graph, csr_graph, p_ctx); + return refiner; + }, + [&](const DistributedCompressedGraph &compressed_graph) { + std::unique_ptr refiner = + std::make_unique>( + _ctx, p_graph, compressed_graph, p_ctx + ); + return refiner; } ); } -void JetRefiner::apply_block_weight_deltas() { - TIMER_BARRIER(_p_graph.communicator()); - SCOPED_TIMER("Apply block weight deltas"); - - MPI_Allreduce( - MPI_IN_PLACE, - _block_weight_deltas.data(), - asserting_cast(_p_graph.k()), - mpi::type::get(), - MPI_SUM, - _p_graph.communicator() - ); - - _p_graph.pfor_blocks([&](const BlockID block) { - _p_graph.set_block_weight(block, _p_graph.block_weight(block) + _block_weight_deltas[block]); - _block_weight_deltas[block] = 0; - }); -} } // namespace kaminpar::dist diff --git a/kaminpar-dist/refinement/jet/jet_refiner.h b/kaminpar-dist/refinement/jet/jet_refiner.h index e5f759f6..973d3532 100644 --- a/kaminpar-dist/refinement/jet/jet_refiner.h +++ b/kaminpar-dist/refinement/jet/jet_refiner.h @@ -8,13 +8,9 @@ ******************************************************************************/ #pragma once -#include - #include "kaminpar-dist/context.h" #include "kaminpar-dist/datastructures/distributed_partitioned_graph.h" -#include "kaminpar-dist/refinement/gain_calculator.h" #include "kaminpar-dist/refinement/refiner.h" -#include "kaminpar-dist/refinement/snapshooter.h" namespace kaminpar::dist { class JetRefinerFactory : public GlobalRefinerFactory { @@ -33,45 +29,4 @@ class JetRefinerFactory : public GlobalRefinerFactory { private: const Context &_ctx; }; - -class JetRefiner : public GlobalRefiner { -public: - JetRefiner( - const Context &ctx, DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx - ); - - JetRefiner(const JetRefiner &) = delete; - JetRefiner &operator=(const JetRefiner &) = delete; - - JetRefiner(JetRefiner &&) noexcept = default; - JetRefiner &operator=(JetRefiner &&) = delete; - - void initialize() final; - bool refine() final; - -private: - void reset(); - - void find_moves(); - void filter_bad_moves(); - void move_locked_nodes(); - void synchronize_ghost_node_move_candidates(); - void synchronize_ghost_node_labels(); - void apply_block_weight_deltas(); - - const Context &_ctx; - const JetRefinementContext &_jet_ctx; - DistributedPartitionedGraph &_p_graph; - const PartitionContext &_p_ctx; - - BestPartitionSnapshooter _snapshooter; - RandomizedGainCalculator _gain_calculator; - StaticArray> _gains_and_targets; - StaticArray _block_weight_deltas; - StaticArray _locked; - - std::unique_ptr _balancer; - - double _negative_gain_factor; -}; } // namespace kaminpar::dist diff --git a/kaminpar-dist/refinement/lp/lp_refiner.cc b/kaminpar-dist/refinement/lp/lp_refiner.cc index 80ac1e56..8db12ad8 100644 --- a/kaminpar-dist/refinement/lp/lp_refiner.cc +++ b/kaminpar-dist/refinement/lp/lp_refiner.cc @@ -38,11 +38,13 @@ struct LPRefinerConfig : public LabelPropagationConfig { static constexpr bool kUseLocalActiveSetStrategy = true; }; -class LPRefinerImpl final : public ChunkRandomdLabelPropagation { +template +class LPRefinerImpl final + : public ChunkRandomdLabelPropagation, LPRefinerConfig, Graph> { SET_STATISTICS_FROM_GLOBAL(); SET_DEBUG(false); - using Base = ChunkRandomdLabelPropagation; + using Base = ChunkRandomdLabelPropagation, LPRefinerConfig, Graph>; using Config = LPRefinerConfig; struct Statistics { @@ -123,27 +125,28 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationk); // needs access to _p_graph + Base::initialize(&graph, _p_ctx->k); IFSTATS(_statistics = Statistics{_p_graph->communicator()}); IFSTATS(_statistics.cut_before = metrics::edge_cut(*_p_graph)); @@ -153,7 +156,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation(_p_graph->n(), num_chunks, chunk); + const auto [from, to] = math::compute_local_range(graph.n(), num_chunks, chunk); num_moved_nodes += process_chunk(from, to); } if (num_moved_nodes == 0) { @@ -175,7 +178,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation(num_moved_nodes, MPI_SUM, _graph->communicator()); STOP_TIMER(); @@ -189,13 +192,13 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation weight_to_block_ets(_p_ctx->k); parallel::vector_ets gain_to_block_ets(_p_ctx->k); - _p_graph->pfor_nodes_range(from, to, [&](const auto r) { + _graph->pfor_nodes_range(from, to, [&](const auto r) { auto &weight_to_block = weight_to_block_ets.local(); auto &gain_to_block = gain_to_block_ets.local(); for (NodeID u = r.begin(); u < r.end(); ++u) { if (_p_graph->block(u) != _next_partition[u]) { - weight_to_block[_next_partition[u]] += _p_graph->node_weight(u); + weight_to_block[_next_partition[u]] += _graph->node_weight(u); gain_to_block[_next_partition[u]] += _gains[u]; } } @@ -234,9 +237,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationpfor_nodes(from, to, [&](const NodeID u) { - _next_partition[u] = _p_graph->block(u); - }); + _graph->pfor_nodes(from, to, [&](const NodeID u) { _next_partition[u] = _p_graph->block(u); }); STOP_TIMER(); // _next_partition should be in a consistent state at this point @@ -271,7 +272,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation moves; - _p_graph->pfor_nodes_range(from, to, [&](const auto &r) { + _graph->pfor_nodes_range(from, to, [&](const auto &r) { auto &rand = Random::instance(); for (NodeID u = r.begin(); u < r.end(); ++u) { @@ -291,7 +292,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation(residual_block_weights[b]) / _p_graph->node_weight(u)); + (static_cast(residual_block_weights[b]) / _graph->node_weight(u)); IFSTATS(_statistics.expected_gain += probability * _gains[u]); // perform move with probability @@ -300,7 +301,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationblock(u); const BlockID to = _next_partition[u]; - const NodeWeight u_weight = _p_graph->node_weight(u); + const NodeWeight u_weight = _graph->node_weight(u); moves.emplace_back(u, from); __atomic_fetch_sub(&block_weight_deltas[from], u_weight, __ATOMIC_RELAXED); @@ -320,9 +321,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationk, MPI_SUM, _p_graph->communicator() - ); + mpi::inplace_sparse_allreduce(block_weight_deltas, _p_ctx->k, MPI_SUM, _graph->communicator()); // check for balance violations parallel::Atomic feasible = 1; @@ -399,8 +398,8 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation(_p_graph->offset_n(pe) + local_node_on_pe); - const NodeID local_node = _p_graph->global_to_local_node(global_node); + static_cast(_graph->offset_n(pe) + local_node_on_pe); + const NodeID local_node = _graph->global_to_local_node(global_node); KASSERT(new_block != _p_graph->block(local_node)); // otherwise, we should not // have gotten this message @@ -422,17 +421,17 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationn()); + KASSERT(u < _graph->n()); return _p_graph->block(u); } [[nodiscard]] BlockID cluster(const NodeID u) { - KASSERT(u < _p_graph->total_n()); - return _p_graph->is_owned_node(u) ? _next_partition[u] : _p_graph->block(u); + KASSERT(u < _graph->total_n()); + return _graph->is_owned_node(u) ? _next_partition[u] : _p_graph->block(u); } void move_node(const NodeID u, const BlockID b) { - KASSERT(u < _p_graph->n()); + KASSERT(u < _graph->n()); _next_partition[u] = b; } @@ -477,7 +476,7 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagationn(); + return u < _graph->n(); } private: @@ -496,6 +495,8 @@ class LPRefinerImpl final : public ChunkRandomdLabelPropagation>(ctx, p_graph)), + _compressed_impl(std::make_unique>(ctx, p_graph) + ) {} + + void refine(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { + p_graph.reified( + [&](const DistributedCSRGraph &csr_graph) { _csr_impl->refine(csr_graph, p_graph, p_ctx); }, + [&](const DistributedCompressedGraph &compressed_graph) { + _compressed_impl->refine(compressed_graph, p_graph, p_ctx); + } + ); + } + +private: + std::unique_ptr> _csr_impl; + std::unique_ptr> _compressed_impl; +}; + /* * Public interface */ @@ -523,7 +545,7 @@ LPRefinerFactory::create(DistributedPartitionedGraph &p_graph, const PartitionCo LPRefiner::LPRefiner( const Context &ctx, DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx ) - : _impl(std::make_unique(ctx, p_graph)), + : _impl(std::make_unique(ctx, p_graph)), _p_graph(p_graph), _p_ctx(p_ctx) {} diff --git a/kaminpar-dist/refinement/lp/lp_refiner.h b/kaminpar-dist/refinement/lp/lp_refiner.h index 880c2b65..a9c59995 100644 --- a/kaminpar-dist/refinement/lp/lp_refiner.h +++ b/kaminpar-dist/refinement/lp/lp_refiner.h @@ -42,13 +42,13 @@ class LPRefiner : public GlobalRefiner { LPRefiner(LPRefiner &&) noexcept = default; LPRefiner &operator=(LPRefiner &&) = delete; - ~LPRefiner(); + ~LPRefiner() override; void initialize() final; bool refine() final; private: - std::unique_ptr _impl; + std::unique_ptr _impl; DistributedPartitionedGraph &_p_graph; const PartitionContext &_p_ctx; From 365cffd71f7d35fcd10d92c12ca37756d203b7ff Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 16 Jun 2024 14:23:54 +0200 Subject: [PATCH 03/54] feat(kaminpar-dist): reduce memory usage by sharing data structures between algorithm implementations --- .../clustering/hem/hem_clusterer.cc | 37 +++++- .../clustering/lp/global_lp_clusterer.cc | 70 ++++++++++-- .../clustering/lp/local_lp_clusterer.cc | 57 +++++++++- kaminpar-dist/distributed_label_propagation.h | 105 ++++++++++++------ kaminpar-dist/refinement/lp/lp_refiner.cc | 63 +++++++++-- 5 files changed, 273 insertions(+), 59 deletions(-) diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc index 830a5269..df330035 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc @@ -22,9 +22,29 @@ SET_DEBUG(true); // Implementation // +struct HEMClustererMemoryContext { + NoinitVector color_blacklist; + NoinitVector color_sizes; + NoinitVector color_sorted_nodes; +}; + template class HEMClustererImpl { public: - HEMClustererImpl(const Context &ctx) : _input_ctx(ctx), _ctx(ctx.coarsening.hem) {} + explicit HEMClustererImpl(const Context &ctx) : _input_ctx(ctx), _ctx(ctx.coarsening.hem) {} + + void setup(HEMClustererMemoryContext &memory_context) { + _color_blacklist = std::move(memory_context.color_blacklist); + _color_sizes = std::move(memory_context.color_sizes); + _color_sorted_nodes = std::move(memory_context.color_sorted_nodes); + } + + HEMClustererMemoryContext release() { + return { + std::move(_color_blacklist), + std::move(_color_sizes), + std::move(_color_sorted_nodes), + }; + } void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { _max_cluster_weight = max_cluster_weight; @@ -489,15 +509,26 @@ class HEMClustererImplWrapper { } void cluster(StaticArray &matching, const DistributedGraph &graph) { + const auto compute_cluster = [&](auto &impl, const auto &graph) { + impl.setup(_memory_context); + impl.cluster(matching, graph); + _memory_context = impl.release(); + }; + graph.reified( - [&](const DistributedCSRGraph &csr_graph) { _csr_impl->cluster(matching, csr_graph); }, + [&](const DistributedCSRGraph &csr_graph) { + HEMClustererImpl &impl = *_csr_impl; + compute_cluster(impl, csr_graph); + }, [&](const DistributedCompressedGraph &compressed_graph) { - _compressed_impl->cluster(matching, compressed_graph); + HEMClustererImpl &impl = *_compressed_impl; + compute_cluster(impl, compressed_graph); } ); } private: + HEMClustererMemoryContext _memory_context; std::unique_ptr> _csr_impl; std::unique_ptr> _compressed_impl; }; diff --git a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc index ea31a82f..82918c13 100644 --- a/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/lp/global_lp_clusterer.cc @@ -34,6 +34,16 @@ struct GlobalLPClusteringConfig : public LabelPropagationConfig { }; } // namespace +struct GlobalLPClusteringMemoryContext : public LabelPropagationMemoryContext< + GlobalLPClusteringConfig::RatingMap, + GlobalLPClusteringConfig::ClusterID> { + StaticArray changed_label; + StaticArray locked; + growt::GlobalNodeIDMap cluster_weights{0}; + StaticArray local_cluster_weights; + growt::GlobalNodeIDMap weight_deltas{0}; +}; + template class GlobalLPClusteringImpl final : public ChunkRandomdLabelPropagation< GlobalLPClusteringImpl, @@ -55,15 +65,42 @@ class GlobalLPClusteringImpl final : public ChunkRandomdLabelPropagation< explicit GlobalLPClusteringImpl(const Context &ctx) : _ctx(ctx), _c_ctx(ctx.coarsening), - _changed_label(ctx.partition.graph->n), - _cluster_weights(ctx.partition.graph->total_n - ctx.partition.graph->n), - _local_cluster_weights(ctx.partition.graph->n), _passive_high_degree_threshold(_c_ctx.global_lp.passive_high_degree_threshold) { set_max_num_iterations(_c_ctx.global_lp.num_iterations); Base::set_max_degree(_c_ctx.global_lp.active_high_degree_threshold); Base::set_max_num_neighbors(_c_ctx.global_lp.max_num_neighbors); } + void setup(GlobalLPClusteringMemoryContext &memory_context) { + Base::setup(memory_context); + _changed_label = std::move(memory_context.changed_label); + _locked = std::move(memory_context.locked); + _cluster_weights = std::move(memory_context.cluster_weights); + _local_cluster_weights = std::move(memory_context.local_cluster_weights); + _weight_deltas = std::move(memory_context.weight_deltas); + } + + GlobalLPClusteringMemoryContext release() { + _weight_delta_handles_ets.clear(); + _cluster_weights_handles_ets.clear(); + + auto [rating_map_ets, active, favored_clusters] = Base::release(); + return { + std::move(rating_map_ets), + std::move(active), + std::move(favored_clusters), + std::move(_changed_label), + std::move(_locked), + std::move(_cluster_weights), + std::move(_local_cluster_weights), + std::move(_weight_deltas), + }; + } + + void preinitialize(const NodeID num_nodes, const NodeID num_active_nodes) { + Base::preinitialize(num_nodes, num_active_nodes, num_nodes); + } + void initialize(const Graph &graph) { TIMER_BARRIER(graph.communicator()); SCOPED_TIMER("Label propagation"); @@ -329,14 +366,15 @@ class GlobalLPClusteringImpl final : public ChunkRandomdLabelPropagation< } void allocate(const Graph &graph) { - const NodeID allocated_num_active_nodes = _changed_label.size(); - - if (allocated_num_active_nodes < graph.n()) { + if (_changed_label.size() < graph.n()) { _changed_label.resize(graph.n()); + } + + if (_local_cluster_weights.size() < graph.n()) { _local_cluster_weights.resize(graph.n()); } - Base::allocate(graph.total_n(), graph.n(), graph.total_n()); + Base::allocate(); if (_c_ctx.global_lp.prevent_cyclic_moves) { _locked.resize(graph.n()); @@ -665,17 +703,31 @@ class GlobalLPClusteringImplWrapper { } void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) { + const auto compute_clustering = [&](auto &impl, const auto &graph) { + impl.setup(_memory_context); + impl.compute_clustering(clustering, graph); + _memory_context = impl.release(); + }; + + const NodeID num_nodes = graph.total_n(); + const NodeID num_active_nodes = graph.n(); + _csr_impl->preinitialize(num_nodes, num_active_nodes); + _compressed_impl->preinitialize(num_nodes, num_active_nodes); + graph.reified( [&](const DistributedCSRGraph &csr_graph) { - _csr_impl->compute_clustering(clustering, csr_graph); + GlobalLPClusteringImpl &impl = *_csr_impl; + compute_clustering(impl, csr_graph); }, [&](const DistributedCompressedGraph &compressed_graph) { - _compressed_impl->compute_clustering(clustering, compressed_graph); + GlobalLPClusteringImpl &impl = *_compressed_impl; + compute_clustering(impl, compressed_graph); } ); } private: + GlobalLPClusteringMemoryContext _memory_context; std::unique_ptr> _csr_impl; std::unique_ptr> _compressed_impl; }; diff --git a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc index aca7beb4..562a8941 100644 --- a/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/lp/local_lp_clusterer.cc @@ -19,6 +19,12 @@ struct LocalLPClusteringConfig : public LabelPropagationConfig { static constexpr bool kUseTwoHopClustering = true; }; +struct LocalLPClusteringMemoryContext : public LabelPropagationMemoryContext< + LocalLPClusteringConfig::RatingMap, + LocalLPClusteringConfig::ClusterID> { + OwnedRelaxedClusterWeightVector::ClusterWeights cluster_weights; +}; + template class LocalLPClusteringImpl final : public ChunkRandomdLabelPropagation< LocalLPClusteringImpl, @@ -43,19 +49,38 @@ class LocalLPClusteringImpl final : public ChunkRandomdLabelPropagation< set_max_num_iterations(c_ctx.local_lp.num_iterations); Base::set_max_degree(c_ctx.local_lp.active_high_degree_threshold); Base::set_max_num_neighbors(c_ctx.local_lp.max_num_neighbors); - Base::allocate(max_n, max_n); - ClusterWeightBase::allocate_cluster_weights(max_n); } - void initialize(const DistributedGraph &graph) { + void setup(LocalLPClusteringMemoryContext &memory_context) { + Base::setup(memory_context); + ClusterWeightBase::setup_cluster_weights(std::move(memory_context.cluster_weights)); + } + + LocalLPClusteringMemoryContext release() { + auto [rating_map_ets, active, favored_clusters] = Base::release(); + return { + std::move(rating_map_ets), + std::move(active), + std::move(favored_clusters), + ClusterWeightBase::take_cluster_weights(), + }; + } + + void preinitialize(const NodeID num_nodes) { + Base::preinitialize(num_nodes, num_nodes); + } + + void initialize(const Graph &graph) { Base::initialize(&graph, graph.n()); + Base::allocate(); + ClusterWeightBase::allocate_cluster_weights(graph.n()); } void set_max_cluster_weight(const GlobalNodeWeight max_cluster_weight) { _max_cluster_weight = max_cluster_weight; } - void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) { + void compute_clustering(StaticArray &clustering, const Graph &graph) { init_clusters_ref(clustering); initialize(graph); @@ -179,9 +204,31 @@ class LocalLPClusteringImplWrapper { _compressed_impl->set_max_cluster_weight(weight); } - void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) {} + void compute_clustering(StaticArray &clustering, const DistributedGraph &graph) { + const auto compute_clustering = [&](auto &impl, const auto &graph) { + impl.setup(_memory_context); + impl.compute_clustering(clustering, graph); + _memory_context = impl.release(); + }; + + const NodeID num_nodes = graph.total_n(); + _csr_impl->preinitialize(num_nodes); + _compressed_impl->preinitialize(num_nodes); + + graph.reified( + [&](const DistributedCSRGraph &csr_graph) { + LocalLPClusteringImpl &impl = *_csr_impl; + compute_clustering(impl, csr_graph); + }, + [&](const DistributedCompressedGraph &compressed_graph) { + LocalLPClusteringImpl &impl = *_compressed_impl; + compute_clustering(impl, compressed_graph); + } + ); + } private: + LocalLPClusteringMemoryContext _memory_context; std::unique_ptr> _csr_impl; std::unique_ptr> _compressed_impl; }; diff --git a/kaminpar-dist/distributed_label_propagation.h b/kaminpar-dist/distributed_label_propagation.h index 9d233410..dd872e91 100644 --- a/kaminpar-dist/distributed_label_propagation.h +++ b/kaminpar-dist/distributed_label_propagation.h @@ -59,6 +59,12 @@ struct LabelPropagationConfig { static constexpr bool kUseLocalActiveSetStrategy = false; }; +template struct LabelPropagationMemoryContext { + tbb::enumerable_thread_specific rating_map_ets; + ScalableVector> active; + ScalableVector> favored_clusters; +}; + /*! * Generic implementation of parallel label propagation. To use, inherit from * this class and implement all mandatory template functions. @@ -107,51 +113,83 @@ template class LabelPropagat return _expected_total_gain; } + void setup(LabelPropagationMemoryContext &memory_context) { + _rating_map_ets = std::move(memory_context.rating_map_ets); + _active = std::move(memory_context.active); + _favored_clusters = std::move(memory_context.favored_clusters); + } + + LabelPropagationMemoryContext release() { + return { + std::move(_rating_map_ets), + std::move(_active), + std::move(_favored_clusters), + }; + } + protected: /*! - * (Re)allocates memory to run label propagation on a graph with \c num_nodes - * nodes. + * Selects the number of nodes \c num_nodes of the graph for which a clustering is to be + * computed and the number of clusters \c num_clusters. + * * @param num_nodes Number of nodes in the graph. + * @param num_clusters The number of clusters. */ - void allocate(const NodeID num_nodes, const ClusterID num_clusters) { - allocate(num_nodes, num_nodes, num_clusters); + void preinitialize(const NodeID num_nodes, const ClusterID num_clusters) { + preinitialize(num_nodes, num_nodes, num_clusters); } /*! - * (Re)allocates memory to run label propagation on a graph with \c num_nodes - * nodes in total, but a clustering is only computed for the first \c - * num_active_nodes nodes. + * Selects the number of nodes \c num_nodes of the graph for which a clustering is to be + * computed, but a clustering is only computed for the first \c num_active_nodes nodes, and the + * number of clusters \c num_clusters. * - * This is mostly useful for distributed graphs where ghost nodes are always - * inactive. + * This is mostly useful for distributed graphs where ghost nodes are always inactive. * - * @param num_nodes Total number of nodes in the graph, i.e., neighbors of - * active nodes have an ID less than this. - * @param num_active_nodes Number of nodes for which a cluster label is - * computed. + * @param num_nodes Number of nodes in the graph. + * @param num_active_nodes Number of nodes for which a cluster label is computed. + * @param num_clusters The number of clusters. + */ + void preinitialize( + const NodeID num_nodes, const NodeID num_active_nodes, const ClusterID num_clusters + ) { + _num_nodes = num_nodes; + _num_active_nodes = num_active_nodes; + _prev_num_clusters = _num_clusters; + _num_clusters = num_clusters; + } + + /*! + * (Re)allocates memory to run label propagation on. Must be called after \c preinitialize(). */ - void allocate(const NodeID num_nodes, const NodeID num_active_nodes, const NodeID num_clusters) { - if (_num_nodes < num_nodes) { - if constexpr (Config::kUseLocalActiveSetStrategy) { - _active.resize(num_nodes); + void allocate() { + if constexpr (Config::kUseLocalActiveSetStrategy) { + if (_active.size() < _num_nodes) { + _active.resize(_num_nodes); } - _num_nodes = num_nodes; } - if (_num_active_nodes < num_active_nodes) { - if constexpr (Config::kUseActiveSetStrategy) { - _active.resize(num_active_nodes); + if constexpr (Config::kUseActiveSetStrategy) { + if (_active.size() < _num_active_nodes) { + _active.resize(_num_active_nodes); } - if constexpr (Config::kUseTwoHopClustering) { - _favored_clusters.resize(num_active_nodes); + } + + if constexpr (Config::kUseTwoHopClustering) { + if (_favored_clusters.size() < _num_active_nodes) { + _favored_clusters.resize(_num_active_nodes); } - _num_active_nodes = num_active_nodes; } - if (_num_clusters < num_clusters) { + + if (_rating_map_ets.empty()) { + _rating_map_ets = + tbb::enumerable_thread_specific([&_num_clusters = _num_clusters] { + return RatingMap(_num_clusters); + }); + } else if (_prev_num_clusters < _num_clusters) { for (auto &rating_map : _rating_map_ets) { - rating_map.change_max_size(num_clusters); + rating_map.change_max_size(_num_clusters); } - _num_clusters = num_clusters; } } @@ -815,9 +853,7 @@ template class LabelPropagat NodeID _max_num_neighbors = std::numeric_limits::max(); //! Thread-local map to compute gain values. - tbb::enumerable_thread_specific _rating_map_ets{[this] { - return RatingMap(_num_clusters); - }}; + tbb::enumerable_thread_specific _rating_map_ets; //! Flags nodes with at least one node in its neighborhood that changed //! clusters during the last iteration. Nodes without this flag set must not @@ -838,6 +874,7 @@ template class LabelPropagat NodeID _num_nodes = 0; NodeID _num_active_nodes = 0; ClusterID _num_clusters = 0; + ClusterID _prev_num_clusters = 0; }; /*! @@ -1190,12 +1227,18 @@ class ChunkRandomdLabelPropagation : public LabelPropagation class OwnedRelaxedClusterWeightVector { public: + using ClusterWeights = StaticArray; + void allocate_cluster_weights(const ClusterID num_clusters) { if (_cluster_weights.size() < num_clusters) { _cluster_weights.resize(num_clusters); } } + void setup_cluster_weights(ClusterWeights cluster_weights) { + _cluster_weights = std::move(cluster_weights); + } + auto &&take_cluster_weights() { return std::move(_cluster_weights); } @@ -1223,7 +1266,7 @@ template class OwnedRelaxedClusterW } private: - StaticArray _cluster_weights; + ClusterWeights _cluster_weights; }; template class NonatomicClusterVectorRef { diff --git a/kaminpar-dist/refinement/lp/lp_refiner.cc b/kaminpar-dist/refinement/lp/lp_refiner.cc index 8db12ad8..613d6e06 100644 --- a/kaminpar-dist/refinement/lp/lp_refiner.cc +++ b/kaminpar-dist/refinement/lp/lp_refiner.cc @@ -38,6 +38,13 @@ struct LPRefinerConfig : public LabelPropagationConfig { static constexpr bool kUseLocalActiveSetStrategy = true; }; +struct LPRefinerMemoryContext + : public LabelPropagationMemoryContext { + ScalableVector next_partition; + ScalableVector gains; + ScalableVector> block_weights; +}; + template class LPRefinerImpl final : public ChunkRandomdLabelPropagation, LPRefinerConfig, Graph> { @@ -121,12 +128,28 @@ class LPRefinerImpl final public: explicit LPRefinerImpl(const Context &ctx, const DistributedPartitionedGraph &p_graph) : _lp_ctx(ctx.refinement.lp), - _par_ctx(ctx.parallel), - _next_partition(p_graph.n()), - _gains(p_graph.n()), - _block_weights(p_graph.k()) { + _par_ctx(ctx.parallel) { Base::set_max_degree(_lp_ctx.active_high_degree_threshold); - Base::allocate(p_graph.total_n(), p_graph.n(), p_graph.k()); + Base::preinitialize(p_graph.total_n(), p_graph.n(), p_graph.k()); + } + + void setup(LPRefinerMemoryContext &memory_context) { + Base::setup(memory_context); + _next_partition = std::move(memory_context.next_partition); + _gains = std::move(memory_context.gains); + _block_weights = std::move(memory_context.block_weights); + } + + LPRefinerMemoryContext release() { + auto [rating_map_ets, active, favored_clusters] = Base::release(); + return { + std::move(rating_map_ets), + std::move(active), + std::move(favored_clusters), + std::move(_next_partition), + std::move(_gains), + std::move(_block_weights), + }; } void @@ -143,7 +166,10 @@ class LPRefinerImpl final if (_gains.size() < graph.n()) { _gains.resize(graph.n()); } - Base::allocate(graph.total_n(), graph.n(), _block_weights.size()); + if (_block_weights.size() < p_graph.k()) { + _block_weights.resize(p_graph.k()); + } + Base::allocate(); STOP_TIMER(); Base::initialize(&graph, _p_ctx->k); @@ -510,6 +536,10 @@ class LPRefinerImpl final Statistics _statistics; }; +// +// Private interface +// + class LPRefinerImplWrapper { public: LPRefinerImplWrapper(const Context &ctx, DistributedPartitionedGraph &p_graph) @@ -518,22 +548,33 @@ class LPRefinerImplWrapper { ) {} void refine(DistributedPartitionedGraph &p_graph, const PartitionContext &p_ctx) { + const auto refine = [&](auto &impl, const auto &graph) { + impl.setup(_memory_context); + impl.refine(graph, p_graph, p_ctx); + _memory_context = impl.release(); + }; + p_graph.reified( - [&](const DistributedCSRGraph &csr_graph) { _csr_impl->refine(csr_graph, p_graph, p_ctx); }, + [&](const DistributedCSRGraph &csr_graph) { + LPRefinerImpl &impl = *_csr_impl; + refine(impl, csr_graph); + }, [&](const DistributedCompressedGraph &compressed_graph) { - _compressed_impl->refine(compressed_graph, p_graph, p_ctx); + LPRefinerImpl &impl = *_compressed_impl; + refine(impl, compressed_graph); } ); } private: + LPRefinerMemoryContext _memory_context; std::unique_ptr> _csr_impl; std::unique_ptr> _compressed_impl; }; -/* - * Public interface - */ +// +// Public interface +// LPRefinerFactory::LPRefinerFactory(const Context &ctx) : _ctx(ctx) {} From 0d9ab340de4a13f77ef5a7bba94f57ad025ee08d Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 16 Jun 2024 14:29:52 +0200 Subject: [PATCH 04/54] feat(kaminpar-dist): add heap profiling --- apps/dKaMinPar.cc | 71 +++++++- apps/io/dist_parhip_parser.cc | 146 +++++++++++++++++ apps/io/dist_parhip_parser.h | 13 +- kaminpar-common/heap_profiler.cc | 23 ++- kaminpar-common/heap_profiler.h | 13 +- kaminpar-dist/dkaminpar.cc | 77 ++++++--- kaminpar-dist/heap_profiler.cc | 151 ++++++++++++++++++ kaminpar-dist/heap_profiler.h | 26 +++ .../kaminpar_initial_partitioner.cc | 2 + kaminpar-dist/partitioning/deep_multilevel.cc | 6 + 10 files changed, 496 insertions(+), 32 deletions(-) create mode 100644 kaminpar-dist/heap_profiler.cc create mode 100644 kaminpar-dist/heap_profiler.h diff --git a/apps/dKaMinPar.cc b/apps/dKaMinPar.cc index 3e9c1735..02e7392b 100644 --- a/apps/dKaMinPar.cc +++ b/apps/dKaMinPar.cc @@ -15,6 +15,7 @@ #include #include "kaminpar-common/environment.h" +#include "kaminpar-common/heap_profiler.h" #include "apps/io/dist_io.h" #include "apps/io/dist_parhip_parser.h" @@ -32,6 +33,11 @@ struct ApplicationContext { int max_timer_depth = 3; + bool heap_profiler_detailed = false; + int heap_profiler_max_depth = 3; + bool heap_profiler_print_structs = false; + float heap_profiler_min_struct_size = 10; + BlockID k = 0; bool quiet = false; @@ -119,6 +125,41 @@ The output should be stored in a file and can be used by the -C,--config option. cli.add_flag("--no-huge-pages", app.no_huge_pages, "Do not use huge pages via TBBmalloc."); + // Heap profiler options + if constexpr (kHeapProfiling) { + auto *hp_group = cli.add_option_group("Heap Profiler"); + + hp_group + ->add_flag( + "-H,--hp-print-detailed", + app.heap_profiler_detailed, + "Show all levels and data structures in the result summary." + ) + ->default_val(app.heap_profiler_detailed); + hp_group + ->add_option( + "--hp-max-depth", + app.heap_profiler_max_depth, + "Set maximum heap profiler depth shown in the result summary." + ) + ->default_val(app.heap_profiler_max_depth); + hp_group + ->add_option( + "--hp-print-structs", + app.heap_profiler_print_structs, + "Print data structure memory statistics in the result summary." + ) + ->default_val(app.heap_profiler_print_structs); + hp_group + ->add_option( + "--hp-min-struct-size", + app.heap_profiler_min_struct_size, + "Sets the minimum size of a data structure in MB to be included in the result summary." + ) + ->default_val(app.heap_profiler_min_struct_size) + ->check(CLI::NonNegativeNumber); + } + // Algorithmic options create_all_options(&cli, ctx); } @@ -176,6 +217,16 @@ NodeID load_kagen_graph(const ApplicationContext &app, dKaMinPar &partitioner) { return graph.vertex_range.second - graph.vertex_range.first; } +NodeID load_csr_graph(const ApplicationContext &app, dKaMinPar &partitioner) { + DistributedGraph graph(std::make_unique( + io::parhip::csr_read(app.graph_filename, false, MPI_COMM_WORLD) + )); + const NodeID n = graph.n(); + + partitioner.import_graph(std::move(graph)); + return n; +} + NodeID load_compressed_graph(const ApplicationContext &app, dKaMinPar &partitioner) { DistributedGraph graph(std::make_unique( io::parhip::compressed_read(app.graph_filename, false, MPI_COMM_WORLD) @@ -215,6 +266,8 @@ int main(int argc, char *argv[]) { // If available, use huge pages for large allocations scalable_allocation_mode(TBBMALLOC_USE_HUGE_PAGES, !app.no_huge_pages); + ENABLE_HEAP_PROFILER(); + dKaMinPar partitioner(MPI_COMM_WORLD, app.num_threads, ctx); dKaMinPar::reseed(app.seed); @@ -226,7 +279,18 @@ int main(int argc, char *argv[]) { partitioner.context().debug.graph_filename = app.graph_filename; partitioner.set_max_timer_depth(app.max_timer_depth); + if constexpr (kHeapProfiling) { + auto &global_heap_profiler = heap_profiler::HeapProfiler::global(); + if (app.heap_profiler_detailed) { + global_heap_profiler.set_detailed_summary_options(); + } else { + global_heap_profiler.set_max_depth(app.heap_profiler_max_depth); + global_heap_profiler.set_print_data_structs(app.heap_profiler_print_structs); + global_heap_profiler.set_min_data_struct_size(app.heap_profiler_min_struct_size); + } + } + START_HEAP_PROFILER("Input Graph Allocation"); // Load the graph via KaGen or via our graph compressor. const NodeID n = [&] { if (ctx.compression.enabled) { @@ -236,13 +300,18 @@ int main(int argc, char *argv[]) { } }(); - // Compute the partition + // Allocate memory for the partition std::vector partition(n); + STOP_HEAP_PROFILER(); + + // Compute the partition partitioner.compute_partition(app.k, partition.data()); if (!app.partition_filename.empty()) { dist::io::partition::write(app.partition_filename, partition); } + DISABLE_HEAP_PROFILER(); + return MPI_Finalize(); } diff --git a/apps/io/dist_parhip_parser.cc b/apps/io/dist_parhip_parser.cc index 36bc3ab4..b02909bd 100644 --- a/apps/io/dist_parhip_parser.cc +++ b/apps/io/dist_parhip_parser.cc @@ -107,6 +107,8 @@ struct ParhipHeader { namespace kaminpar::dist::io::parhip { +namespace { + std::pair compute_edge_range(const EdgeID num_edges, const mpi::PEID size, const mpi::PEID rank) { const EdgeID chunk = num_edges / size; @@ -145,6 +147,150 @@ NodeID find_node_by_edge( return high.first; } +} // namespace + +DistributedCSRGraph csr_read(const std::string &filename, const bool sorted, const MPI_Comm comm) { + BinaryReader reader(filename); + + const auto version = reader.read(0); + const auto num_nodes = reader.read(sizeof(std::uint64_t)); + const auto num_edges = reader.read(sizeof(std::uint64_t) * 2); + const ParhipHeader header(version, num_nodes, num_edges); + + std::size_t position = ParhipHeader::kSize; + + const EdgeID *raw_nodes = reader.fetch(position); + position += (header.num_nodes + 1) * sizeof(EdgeID); + + const NodeID *raw_edges = reader.fetch(position); + position += header.num_edges + sizeof(NodeID); + + const NodeWeight *raw_node_weights = reader.fetch(position); + position += header.num_nodes + sizeof(NodeWeight); + + const EdgeWeight *raw_edge_weights = reader.fetch(position); + + // Since the offsets stored in the (raw) node array of the binary are relative byte adresses + // into the binary itself, these offsets must be mapped to the actual edge IDs. + const EdgeID nodes_offset_base = ParhipHeader::kSize + (header.num_nodes + 1) * sizeof(EdgeID); + const auto map_edge_offset = [&](const NodeID node) { + return (raw_nodes[node] - nodes_offset_base) / sizeof(NodeID); + }; + + const mpi::PEID size = mpi::get_comm_size(comm); + const mpi::PEID rank = mpi::get_comm_rank(comm); + + const auto [first_edge, last_edge] = compute_edge_range(num_edges, size, rank); + + const std::uint64_t first_node = + find_node_by_edge(num_nodes, num_edges, first_edge, map_edge_offset); + const std::uint64_t last_node = + find_node_by_edge(num_nodes, num_edges, last_edge, map_edge_offset); + + const NodeID num_local_nodes = last_node - first_node; + const EdgeID num_local_edges = map_edge_offset(last_node) - map_edge_offset(first_node); + + StaticArray node_distribution(size + 1); + node_distribution[rank + 1] = last_node; + MPI_Allgather( + MPI_IN_PLACE, + 0, + MPI_DATATYPE_NULL, + node_distribution.data() + 1, + 1, + mpi::type::get(), + comm + ); + + StaticArray edge_distribution(size + 1); + edge_distribution[rank] = num_local_edges; + MPI_Allgather( + MPI_IN_PLACE, + 1, + mpi::type::get(), + edge_distribution.data(), + 1, + mpi::type::get(), + comm + ); + std::exclusive_scan( + edge_distribution.begin(), + edge_distribution.end(), + edge_distribution.begin(), + static_cast(0) + ); + + graph::GhostNodeMapper mapper(rank, node_distribution); + StaticArray nodes(num_local_nodes + 1, static_array::noinit); + StaticArray edges(num_local_edges, static_array::noinit); + StaticArray edge_weights; + if (header.has_edge_weights) { + edge_weights.resize(num_local_edges, static_array::noinit); + } + + EdgeID edge = 0; + for (NodeID u = first_node; u < last_node; ++u) { + const NodeID node = u - first_node; + nodes[node] = edge; + + const EdgeID offset = map_edge_offset(u); + const EdgeID next_offset = map_edge_offset(u + 1); + + const auto degree = static_cast(next_offset - offset); + for (NodeID i = 0; i < degree; ++i) { + const EdgeID e = offset + i; + + NodeID adjacent_node = raw_edges[e]; + if (adjacent_node >= first_node && adjacent_node < last_node) { + edges[edge] = adjacent_node - first_node; + } else { + edges[edge] = mapper.new_ghost_node(adjacent_node); + } + + if (header.has_edge_weights) [[unlikely]] { + edge_weights[edge] = raw_edge_weights[e]; + } + + edge += 1; + } + } + nodes[num_local_nodes] = edge; + + StaticArray node_weights; + if (header.has_node_weights) { + node_weights.resize(num_local_nodes + mapper.next_ghost_node(), static_array::noinit); + + tbb::parallel_for(tbb::blocked_range(0, num_local_nodes), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + node_weights[u] = raw_node_weights[first_node + u]; + } + }); + } + + auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); + + DistributedCSRGraph graph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(edges), + std::move(node_weights), + std::move(edge_weights), + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + sorted, + comm + ); + + // Fill in ghost node weights + if (header.has_node_weights) { + graph::synchronize_ghost_node_weights(graph); + } + + return graph; +} + DistributedCompressedGraph compressed_read(const std::string &filename, const bool sorted, const MPI_Comm comm) { BinaryReader reader(filename); diff --git a/apps/io/dist_parhip_parser.h b/apps/io/dist_parhip_parser.h index 7b1994db..4c6e3363 100644 --- a/apps/io/dist_parhip_parser.h +++ b/apps/io/dist_parhip_parser.h @@ -10,15 +10,26 @@ #include #include "kaminpar-dist/datastructures/distributed_compressed_graph.h" +#include "kaminpar-dist/datastructures/distributed_csr_graph.h" namespace kaminpar::dist::io::parhip { +/*! + * Reads a distributed graph that is stored in a file with ParHiP format. + * + * @param filename The name of the file to read. + * @param sorted Whether the nodes of the graph to read are stored in degree-buckets order. + * @param comm The group of processes that read the distributed graph. + * @return The graph that is stored in the file. + */ +DistributedCSRGraph csr_read(const std::string &filename, const bool sorted, const MPI_Comm comm); + /*! * Reads and compresses a distributed graph that is stored in a file with ParHiP format. * * @param filename The name of the file to read. * @param sorted Whether the nodes of the graph to read are stored in degree-buckets order. - * @param comm The group of processed that reads and compress the distributed graph. + * @param comm The group of processes that read and compress the distributed graph. * @return The graph that is stored in the file. */ DistributedCompressedGraph diff --git a/kaminpar-common/heap_profiler.cc b/kaminpar-common/heap_profiler.cc index de86ae3c..a9a77932 100644 --- a/kaminpar-common/heap_profiler.cc +++ b/kaminpar-common/heap_profiler.cc @@ -154,6 +154,9 @@ void HeapProfiler::print_heap_profile(std::ostream &out) { out << kFreeTitle << std::string(stats.free_size - kFreeTitle.length() + 1, ' '); out << kAllocsTitle << std::string(stats.allocs - kAllocsTitle.length() + 1, ' '); out << kFreesTitle << std::string(stats.frees - kFreesTitle.length() + 1, ' '); + if (!_tree.annotation.empty()) { + out << " " << _tree.annotation; + } out << '\n'; print_heap_tree_node(out, root, stats, _max_depth, _print_data_structs, _min_data_struct_size); @@ -180,6 +183,10 @@ std::size_t HeapProfiler::get_frees() { return _tree.currentNode->frees; } +[[nodiscard]] HeapProfiler::HeapProfileTree &HeapProfiler::tree_root() { + return _tree; +} + void HeapProfiler::print_heap_tree_node( std::ostream &out, const HeapProfileTreeNode &node, @@ -232,6 +239,10 @@ void HeapProfiler::print_heap_tree_node( ); } } + + if (depth == 0) { + out << std::endl; + } } void HeapProfiler::print_indentation(std::ostream &out, std::size_t depth, bool last) { @@ -248,13 +259,13 @@ void HeapProfiler::print_percentage(std::ostream &out, const HeapProfileTreeNode out << "("; if (percentage >= 0.999995) { - out << "100.00"; + out << "100.0"; } else { if (percentage < 0.1) { out << "0"; } - out << percentage * 100; + out << std::fixed << std::setprecision(2) << percentage * 100; } out << "%) "; @@ -273,7 +284,13 @@ void HeapProfiler::print_statistics( out << free_size << std::string(stats.free_size - free_size.length() + 1, ' '); out << node.allocs << std::string(stats.allocs - std::to_string(node.allocs).length() + 1, ' ') - << node.frees << std::string(stats.frees - std::to_string(node.frees).length(), ' ') << '\n'; + << node.frees << std::string(stats.frees - std::to_string(node.frees).length(), ' '); + + if (!node.annotation.empty()) { + out << " " << node.annotation; + } + + out << '\n'; } void HeapProfiler::print_data_structures( diff --git a/kaminpar-common/heap_profiler.h b/kaminpar-common/heap_profiler.h index 3877e8f6..37a0acb6 100644 --- a/kaminpar-common/heap_profiler.h +++ b/kaminpar-common/heap_profiler.h @@ -313,7 +313,7 @@ class HeapProfiler { static constexpr char kPadding = '.'; static constexpr std::size_t kBranchLength = 3; - static constexpr std::size_t kPercentageLength = 10; + static constexpr std::size_t kPercentageLength = 9; static constexpr std::size_t kDataStructSizeThreshold = 1024; static std::string to_megabytes(std::size_t bytes) { @@ -322,9 +322,11 @@ class HeapProfiler { return stream.str(); } +public: struct HeapProfileTreeNode { std::string_view name; std::string description; + std::string annotation; HeapProfileTreeNode *parent; std::vector> children; @@ -363,10 +365,12 @@ class HeapProfiler { struct HeapProfileTree { HeapProfileTreeNode root; HeapProfileTreeNode *currentNode; + std::string annotation; HeapProfileTree(std::string_view name) : root(name, "", nullptr), currentNode(&root) {} }; +private: struct HeapProfileTreeStats { std::size_t len; std::size_t max_alloc_size; @@ -558,6 +562,13 @@ class HeapProfiler { */ std::size_t get_frees(); + /*! + * Returns the tree that stores the data of this heap profiler. + * + * @return The tree that stores the data of this heap profiler. + */ + [[nodiscard]] HeapProfileTree &tree_root(); + private: bool _enabled = false; std::mutex _mutex; diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index ef2a5e6b..7bfd6ceb 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -22,6 +22,7 @@ #include "kaminpar-dist/factories.h" #include "kaminpar-dist/graphutils/rearrangement.h" #include "kaminpar-dist/graphutils/synchronization.h" +#include "kaminpar-dist/heap_profiler.h" #include "kaminpar-dist/metrics.h" #include "kaminpar-dist/timer.h" @@ -29,6 +30,7 @@ #include "kaminpar-common/console_io.h" #include "kaminpar-common/environment.h" +#include "kaminpar-common/heap_profiler.h" #include "kaminpar-common/random.h" namespace kaminpar { @@ -42,51 +44,72 @@ void print_partition_summary( const bool parseable, const bool root ) { + MPI_Comm comm = p_graph.communicator(); + const auto edge_cut = metrics::edge_cut(p_graph); const auto imbalance = metrics::imbalance(p_graph); const auto feasible = metrics::is_feasible(p_graph, ctx.partition) && p_graph.k() == ctx.partition.k; #ifdef KAMINPAR_ENABLE_TIMERS - finalize_distributed_timer(Timer::global(), p_graph.communicator()); + finalize_distributed_timer(Timer::global(), comm); #endif // KAMINPAR_ENABLE_TIMERS - if (!root) { - // Non-root PEs are only needed to compute the partition metrics - return; + bool heap_profile_root; + if constexpr (kHeapProfiling) { + auto &heap_profiler = heap_profiler::HeapProfiler::global(); + const int heap_profile_root_rank = finalize_distributed_heap_profiler(heap_profiler, comm); + + const int rank = mpi::get_comm_rank(comm); + heap_profile_root = rank == heap_profile_root_rank; } - cio::print_delimiter("Result Summary"); + if (root) { + cio::print_delimiter("Result Summary"); - if (parseable) { - LOG << "RESULT cut=" << edge_cut << " imbalance=" << imbalance << " feasible=" << feasible - << " k=" << p_graph.k(); + if (parseable) { + LOG << "RESULT cut=" << edge_cut << " imbalance=" << imbalance << " feasible=" << feasible + << " k=" << p_graph.k(); #ifdef KAMINPAR_ENABLE_TIMERS - std::cout << "TIME "; - Timer::global().print_machine_readable(std::cout); + std::cout << "TIME "; + Timer::global().print_machine_readable(std::cout); #else // KAMINPAR_ENABLE_TIMERS - LOG << "TIME disabled"; + LOG << "TIME disabled"; #endif // KAMINPAR_ENABLE_TIMERS - } + } #ifdef KAMINPAR_ENABLE_TIMERS - Timer::global().print_human_readable(std::cout, max_timer_depth); + Timer::global().print_human_readable(std::cout, max_timer_depth); #else // KAMINPAR_ENABLE_TIMERS - LOG << "Global Timers: disabled"; + LOG << "Global Timers: disabled"; #endif // KAMINPAR_ENABLE_TIMERS - LOG; - LOG << "Partition summary:"; - if (p_graph.k() != ctx.partition.k) { - LOG << logger::RED << " Number of blocks: " << p_graph.k(); - } else { - LOG << " Number of blocks: " << p_graph.k(); + LOG; } - LOG << " Edge cut: " << edge_cut; - LOG << " Imbalance: " << imbalance; - if (feasible) { - LOG << " Feasible: yes"; - } else { - LOG << logger::RED << " Feasible: no"; + + if constexpr (kHeapProfiling) { + mpi::barrier(comm); + + if (heap_profile_root) { + PRINT_HEAP_PROFILE(std::cout); + } + + mpi::barrier(comm); + } + + if (root) { + LOG << "Partition summary:"; + if (p_graph.k() != ctx.partition.k) { + LOG << logger::RED << " Number of blocks: " << p_graph.k(); + } else { + LOG << " Number of blocks: " << p_graph.k(); + } + LOG << " Edge cut: " << edge_cut; + LOG << " Imbalance: " << imbalance; + if (feasible) { + LOG << " Feasible: yes"; + } else { + LOG << logger::RED << " Feasible: no"; + } } } @@ -293,6 +316,7 @@ GlobalEdgeWeight dKaMinPar::compute_partition(const BlockID k, BlockID *partitio print_input_summary(_ctx, graph, _output_level == OutputLevel::EXPERIMENT, root); } + START_HEAP_PROFILER("Partitioning"); START_TIMER("Partitioning"); if (!_was_rearranged && _ctx.rearrange_by != GraphOrdering::NATURAL) { DistributedCSRGraph &csr_graph = @@ -304,6 +328,7 @@ GlobalEdgeWeight dKaMinPar::compute_partition(const BlockID k, BlockID *partitio } auto p_graph = factory::create_partitioner(_ctx, graph)->partition(); STOP_TIMER(); + STOP_HEAP_PROFILER(); KASSERT( dist::debug::validate_partition(p_graph), diff --git a/kaminpar-dist/heap_profiler.cc b/kaminpar-dist/heap_profiler.cc new file mode 100644 index 00000000..a10b4f4d --- /dev/null +++ b/kaminpar-dist/heap_profiler.cc @@ -0,0 +1,151 @@ +/******************************************************************************* + * Functions to annotate the heap profiler tree with aggregate information from + * all PEs. + * + * @file: heap_profiler.h + * @author: Daniel Salwasser + * @date: 16.06.2024 + ******************************************************************************/ +#include "kaminpar-dist/heap_profiler.h" + +#include +#include +#include +#include + +#include "kaminpar-mpi/wrapper.h" + +#include "kaminpar-common/heap_profiler.h" + +namespace kaminpar::dist { + +namespace { +using HeapProfiler = heap_profiler::HeapProfiler; +using HeapProfilerTree = HeapProfiler::HeapProfileTree; +using HeapProfilerTreeNode = HeapProfiler::HeapProfileTreeNode; + +std::string to_megabytes(std::size_t bytes) { + std::stringstream stream; + stream << std::fixed << std::setprecision(2) << (bytes / (float)(1024 * 1024)); + return stream.str(); +} + +template +std::vector gather_trunc_string(const std::string_view str, MPI_Comm comm) { + std::array trunc; + const std::size_t len = std::min(kSize - 1, str.length()); + str.copy(trunc.data(), len); + trunc[len] = 0; + + const auto [size, rank] = mpi::get_comm_info(comm); + std::vector recv_buffer(size * kSize); + mpi::allgather(trunc.data(), kSize, recv_buffer.data(), kSize, comm); + + std::vector strings; + for (mpi::PEID pe = 0; pe < size; ++pe) { + strings.emplace_back(recv_buffer.data() + pe * kSize); + } + + return strings; +} + +void generate_statistics( + HeapProfilerTreeNode *node, + const std::size_t mem_str_width, + const std::size_t pe_str_width, + const int root, + MPI_Comm comm +) { + constexpr std::size_t kTruncSize = 1024; + + const auto names = gather_trunc_string(node->name, comm); + const bool diverged_node = std::all_of(names.begin(), names.end(), [&](const std::string &name) { + return name.substr(0, kTruncSize) != node->name.substr(0, kTruncSize); + }); + + if (diverged_node) { + return; + } + + const auto stats = mpi::gather(node->max_alloc_size, root, comm); + const auto num_children = mpi::allgather(node->children.size(), comm); + const bool is_root = mpi::get_comm_rank(comm) == root; + + if (is_root) { + const auto min_it = std::min_element(stats.begin(), stats.end()); + const mpi::PEID min_pe = std::distance(stats.begin(), min_it); + const std::size_t min = *min_it; + + const auto max_it = std::max_element(stats.begin(), stats.end()); + const mpi::PEID max_pe = std::distance(stats.begin(), max_it); + const std::size_t max = *max_it; + + const auto sum = static_cast(std::accumulate(stats.begin(), stats.end(), 0.0)); + const auto mean = sum / static_cast(stats.size()); + + const auto pad = [](auto value, const std::size_t width) { + std::string str; + if constexpr (std::is_same_v) { + str = std::move(value); + } else { + str = std::to_string(value); + } + + if (str.length() < width) { + str = std::string(width - str.length(), ' ') + str; + } + + return str; + }; + + std::stringstream stream; + stream << "[ " << min_pe << " : " << pad(to_megabytes(min), mem_str_width) << " mb | " + << pad(to_megabytes(mean), mem_str_width) << " mb | " << max_pe << " : " + << pad(to_megabytes(max), mem_str_width) << " mb ]"; + + node->annotation = stream.str(); + } + + const bool nondiverged_children = + std::all_of(num_children.begin(), num_children.end(), [&](const std::size_t num) { + return num == node->children.size(); + }); + if (nondiverged_children) { + for (HeapProfilerTreeNode *child : node->children) { + generate_statistics(child, mem_str_width, pe_str_width, root, comm); + } + } +} + +std::pair +gather_max_peak_memory(const HeapProfilerTreeNode *node, MPI_Comm comm) { + const auto stats = mpi::allgather(node->max_alloc_size, comm); + + const auto max_it = std::max_element(stats.begin(), stats.end()); + const mpi::PEID max_pe = std::distance(stats.begin(), max_it); + const std::size_t max = *max_it; + + return std::make_pair(max_pe, max); +} + +} // namespace + +int finalize_distributed_heap_profiler(heap_profiler::HeapProfiler &heap_profiler, MPI_Comm comm) { + HeapProfilerTree &tree = heap_profiler.tree_root(); + + const auto [root, max_peak_memory] = gather_max_peak_memory(&tree.root, comm); + const std::size_t mem_str_width = to_megabytes(max_peak_memory).length(); + const std::size_t pe_str_width = std::to_string(mpi::get_comm_size(comm)).length(); + + std::stringstream stream; + stream << "PE" << std::string(pe_str_width - 1, ' ') << " : " + << "min" << std::string(mem_str_width + 3, ' ') << "avg" + << std::string(mem_str_width + 2, ' ') << "PE" << std::string(pe_str_width - 1, ' ') + << " : max"; + + tree.annotation = stream.str(); + generate_statistics(&tree.root, mem_str_width, pe_str_width, root, comm); + return root; +} + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/heap_profiler.h b/kaminpar-dist/heap_profiler.h new file mode 100644 index 00000000..ae7031f6 --- /dev/null +++ b/kaminpar-dist/heap_profiler.h @@ -0,0 +1,26 @@ +/******************************************************************************* + * Functions to annotate the heap profiler tree with aggregate information from + * all PEs. + * + * @file: heap_profiler.h + * @author: Daniel Salwasser + * @date: 16.06.2024 + ******************************************************************************/ +#pragma once + +#include + +#include "kaminpar-common/heap_profiler.h" + +namespace kaminpar::dist { + +/** + * Annotates a heap profiler tree with aggregate information from all PEs. + * + * @param heap_profiler The heap profiler to annotate. + * @param comm The group of process whose information to aggregate. + * @return The rank of the process that stores the annotated heap profile. + */ +int finalize_distributed_heap_profiler(heap_profiler::HeapProfiler &heap_profiler, MPI_Comm comm); + +} // namespace kaminpar::dist diff --git a/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc b/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc index 61108c96..09755c2d 100644 --- a/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc +++ b/kaminpar-dist/initial_partitioning/kaminpar_initial_partitioner.cc @@ -30,10 +30,12 @@ shm::PartitionedGraph KaMinParInitialPartitioner::initial_partition( shm_ctx.setup(graph); DISABLE_TIMERS(); + START_HEAP_PROFILER("KaMinPar"); const bool was_quiet = Logger::is_quiet(); Logger::set_quiet_mode(true); auto p_graph = shm::factory::create_partitioner(graph, shm_ctx)->partition(); Logger::set_quiet_mode(was_quiet); + STOP_HEAP_PROFILER(); ENABLE_TIMERS(); return p_graph; diff --git a/kaminpar-dist/partitioning/deep_multilevel.cc b/kaminpar-dist/partitioning/deep_multilevel.cc index 721df776..fb051272 100644 --- a/kaminpar-dist/partitioning/deep_multilevel.cc +++ b/kaminpar-dist/partitioning/deep_multilevel.cc @@ -60,6 +60,7 @@ DistributedPartitionedGraph DeepMultilevelPartitioner::partition() { const PEID initial_size = mpi::get_comm_size(_input_graph.communicator()); PEID current_num_pes = initial_size; + START_HEAP_PROFILER("Coarsening"); while (!converged && graph->global_n() > desired_num_nodes) { SCOPED_TIMER("Coarsening"); @@ -102,12 +103,14 @@ DistributedPartitionedGraph DeepMultilevelPartitioner::partition() { graph = c_graph; } + STOP_HEAP_PROFILER(); TIMER_BARRIER(_input_graph.communicator()); /* * Initial Partitioning */ START_TIMER("Initial partitioning"); + START_HEAP_PROFILER("Initial partitioning"); auto initial_partitioner = TIMED_SCOPE("Allocation") { return factory::create_initial_partitioner(_input_ctx); }; @@ -144,6 +147,7 @@ DistributedPartitionedGraph DeepMultilevelPartitioner::partition() { assert::heavy ); print_initial_partitioning_result(dist_p_graph, ip_p_ctx); + STOP_HEAP_PROFILER(); STOP_TIMER(); TIMER_BARRIER(_input_graph.communicator()); @@ -157,6 +161,7 @@ DistributedPartitionedGraph DeepMultilevelPartitioner::partition() { * Uncoarsening and Refinement */ START_TIMER("Uncoarsening"); + START_HEAP_PROFILER("Uncoarsening"); auto refiner_factory = TIMED_SCOPE("Allocation") { return factory::create_refiner(_input_ctx); }; @@ -339,6 +344,7 @@ DistributedPartitionedGraph DeepMultilevelPartitioner::partition() { LOG << " Feasible: " << (feasible ? "yes" : "no"); STOP_TIMER(); } + STOP_HEAP_PROFILER(); STOP_TIMER(); TIMER_BARRIER(_input_graph.communicator()); From a50ed3018a3e2c7f55a4b14692d41b4337ec37c5 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 12:10:25 +0200 Subject: [PATCH 05/54] fix(kaminpar-dist): abort compressed graph neighbors operation correctly --- .../graph-compression/compressed_edges.h | 30 +++++++++++++++++++ .../distributed_compressed_graph.h | 4 ++- .../distributed_compressed_graph_test.cc | 24 +++++++++++++++ 3 files changed, 57 insertions(+), 1 deletion(-) diff --git a/kaminpar-common/graph-compression/compressed_edges.h b/kaminpar-common/graph-compression/compressed_edges.h index 50ce0058..3edfd278 100644 --- a/kaminpar-common/graph-compression/compressed_edges.h +++ b/kaminpar-common/graph-compression/compressed_edges.h @@ -147,6 +147,36 @@ template class CompressedEdges { return {first_edge, first_edge + degree}; } + template + void decode_neighborhood( + const NodeID node, + const NodeID max_num_neighbors, + const EdgeID edge_offset, + const EdgeID next_edge_offset, + Lambda &&l + ) const { + KASSERT(max_num_neighbors > 0); + constexpr bool non_stoppable = std::is_void_v>; + + NodeID num_neighbors_visited = 1; + decode_neighborhood( + node, + edge_offset, + next_edge_offset, + [&](const EdgeID incident_edge, const NodeID adjacent_node) { + bool abort = num_neighbors_visited++ >= max_num_neighbors; + + if constexpr (non_stoppable) { + l(incident_edge, adjacent_node); + } else { + abort |= l(incident_edge, adjacent_node); + } + + return abort; + } + ); + } + template void decode_neighborhood( const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset, Lambda &&l diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index 75d18a2a..797aca05 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -313,7 +313,9 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { template inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { - _compressed_edges.decode_neighborhood(u, _nodes[u], _nodes[u + 1], std::forward(l)); + _compressed_edges.decode_neighborhood( + u, max_num_neighbors, _nodes[u], _nodes[u + 1], std::forward(l) + ); } // diff --git a/tests/dist/datastructures/distributed_compressed_graph_test.cc b/tests/dist/datastructures/distributed_compressed_graph_test.cc index be4d782b..6a034b69 100644 --- a/tests/dist/datastructures/distributed_compressed_graph_test.cc +++ b/tests/dist/datastructures/distributed_compressed_graph_test.cc @@ -207,4 +207,28 @@ TEST(DistributedCompressedGraphTest, compressed_graph_neighbors_operation) { TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_operation); } +static void test_compressed_graph_neighbors_limit_operation(const DistributedCSRGraph &graph) { + const auto compressed_graph = DistributedCompressedGraphBuilder::compress(graph); + + for (const NodeID u : graph.nodes()) { + const NodeID max_neighbor_count = std::max(1, graph.degree(u) / 2); + + NodeID graph_num_neighbors_visited = 0; + graph.neighbors(u, max_neighbor_count, [&](const EdgeID e, const NodeID v) { + graph_num_neighbors_visited += 1; + }); + + NodeID compressed_graph_num_neighbors_visited = 0; + compressed_graph.neighbors(u, max_neighbor_count, [&](const EdgeID e, const NodeID v) { + compressed_graph_num_neighbors_visited += 1; + }); + + EXPECT_EQ(graph_num_neighbors_visited, compressed_graph_num_neighbors_visited); + } +} + +TEST(CompressedGraphTest, compressed_graph_neighbors_limit_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_limit_operation); +} + } // namespace kaminpar::dist From 1dd64f74b5456e98868d3eac302766cfafafac0c Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 12:12:20 +0200 Subject: [PATCH 06/54] feat(kaminpar-dist): add option to compress graphs stored with METIS format --- apps/CMakeLists.txt | 2 + apps/dKaMinPar.cc | 42 +++-- apps/io/dist_metis_parser.cc | 315 +++++++++++++++++++++++++++++++++++ apps/io/dist_metis_parser.h | 28 ++++ apps/io/file_tokener.h | 13 +- apps/io/metis_parser.cc | 1 + apps/io/shm_io.cc | 1 + 7 files changed, 389 insertions(+), 13 deletions(-) create mode 100644 apps/io/dist_metis_parser.cc create mode 100644 apps/io/dist_metis_parser.h diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index b3be4645..0b9bbe5d 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -40,6 +40,8 @@ if (TARGET kaminpar_dist) add_dist_app(dKaMinPar dKaMinPar.cc) target_sources(dKaMinPar PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_io.cc + ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_metis_parser.h + ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_metis_parser.cc ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_parhip_parser.h ${CMAKE_CURRENT_SOURCE_DIR}/io/dist_parhip_parser.cc) target_link_libraries(dKaMinPar PRIVATE KaGen::KaGen) diff --git a/apps/dKaMinPar.cc b/apps/dKaMinPar.cc index 02e7392b..80744cd2 100644 --- a/apps/dKaMinPar.cc +++ b/apps/dKaMinPar.cc @@ -18,6 +18,7 @@ #include "kaminpar-common/heap_profiler.h" #include "apps/io/dist_io.h" +#include "apps/io/dist_metis_parser.h" #include "apps/io/dist_parhip_parser.h" using namespace kaminpar; @@ -164,6 +165,14 @@ The output should be stored in a file and can be used by the -C,--config option. create_all_options(&cli, ctx); } +template [[noreturn]] void root_run_and_exit(Lambda &&l) { + const int rank = mpi::get_comm_rank(MPI_COMM_WORLD); + if (rank == 0) { + l(); + } + std::exit(MPI_Finalize()); +} + NodeID load_kagen_graph(const ApplicationContext &app, dKaMinPar &partitioner) { using namespace kagen; @@ -228,9 +237,20 @@ NodeID load_csr_graph(const ApplicationContext &app, dKaMinPar &partitioner) { } NodeID load_compressed_graph(const ApplicationContext &app, dKaMinPar &partitioner) { - DistributedGraph graph(std::make_unique( - io::parhip::compressed_read(app.graph_filename, false, MPI_COMM_WORLD) - )); + const auto read_graph = [&] { + switch (app.io_format) { + case kagen::FileFormat::METIS: + return io::metis::compress_read(app.graph_filename, false, MPI_COMM_WORLD); + case kagen::FileFormat::PARHIP: + return io::parhip::compressed_read(app.graph_filename, false, MPI_COMM_WORLD); + default: + root_run_and_exit([&] { + LOG_ERROR << "Only graphs stored in files with METIS or ParHIP format can be compressed!"; + }); + } + }; + + DistributedGraph graph(std::make_unique(read_graph())); const NodeID n = graph.n(); partitioner.import_graph(std::move(graph)); @@ -251,16 +271,16 @@ int main(int argc, char *argv[]) { setup_context(cli, app, ctx); CLI11_PARSE(cli, argc, argv); - if (rank == 0 && app.dump_config) { - CLI::App dump; - create_all_options(&dump, ctx); - std::cout << dump.config_to_str(true, true); - std::exit(1); + if (app.dump_config) { + root_run_and_exit([&] { + CLI::App dump; + create_all_options(&dump, ctx); + std::cout << dump.config_to_str(true, true); + }); } - if (rank == 0 && app.show_version) { - std::cout << Environment::GIT_SHA1 << std::endl; - std::exit(0); + if (app.show_version) { + root_run_and_exit([&] { std::cout << Environment::GIT_SHA1 << std::endl; }); } // If available, use huge pages for large allocations diff --git a/apps/io/dist_metis_parser.cc b/apps/io/dist_metis_parser.cc new file mode 100644 index 00000000..7f5e7552 --- /dev/null +++ b/apps/io/dist_metis_parser.cc @@ -0,0 +1,315 @@ +/******************************************************************************* + * Sequential METIS parser for distributed graphs. + * + * @file: dist_metis_parser.h + * @author: Daniel Salwasser + * @date: 22.06.2024 + ******************************************************************************/ +#include "apps/io/dist_metis_parser.h" + +#include + +#include "kaminpar-mpi/datatype.h" +#include "kaminpar-mpi/utils.h" + +#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" +#include "kaminpar-dist/datastructures/ghost_node_mapper.h" +#include "kaminpar-dist/dkaminpar.h" +#include "kaminpar-dist/graphutils/synchronization.h" + +#include "apps/io/file_tokener.h" + +namespace kaminpar::dist::io::metis { +using namespace kaminpar::io; + +namespace { + +struct MetisHeader { + std::uint64_t num_nodes = 0; + std::uint64_t num_edges = 0; + bool has_node_weights = false; + bool has_edge_weights = false; +}; + +MetisHeader parse_header(MappedFileToker &toker) { + toker.skip_spaces(); + while (toker.current() == '%') { + toker.skip_line(); + toker.skip_spaces(); + } + + const std::uint64_t num_nodes = toker.scan_uint(); + const std::uint64_t num_edges = toker.scan_uint() * 2; + const std::uint64_t format = (toker.current() != '\n') ? toker.scan_uint() : 0; + toker.consume_char('\n'); + + if (format != 0 && format != 1 && format != 10 && format != 11 && format && format != 100 && + format != 110 && format != 101 && format != 111) { + LOG_WARNING << "invalid or unsupported graph format"; + } + + [[maybe_unused]] const bool has_node_sizes = format / 100; // == 1xx + const bool has_node_weights = (format % 100) / 10; // == x1x + const bool has_edge_weights = format % 10; // == xx1 + + if (has_node_sizes) { + LOG_WARNING << "ignoring node sizes"; + } + + KASSERT( + num_nodes <= static_cast(std::numeric_limits::max()), + "number of nodes is too large for the node ID type" + ); + KASSERT( + num_edges <= static_cast(std::numeric_limits::max()), + "number of edges is too large for the edge ID type" + ); + KASSERT( + num_edges <= (num_nodes * (num_nodes - 1)) / 2, + "specified number of edges is impossibly large" + ); + + return { + .num_nodes = num_nodes, + .num_edges = num_edges, + .has_node_weights = has_node_weights, + .has_edge_weights = has_edge_weights, + }; +} + +template +void parse_graph( + MappedFileToker &toker, + const MetisHeader header, + NextNodeCB &&next_node_cb, + NextEdgeCB &&next_edge_cb +) { + static_assert(std::is_invocable_v); + static_assert(std::is_invocable_v); + constexpr bool stoppable = std::is_invocable_r_v; + + for (std::uint64_t u = 0; u < header.num_nodes; ++u) { + toker.skip_spaces(); + while (toker.current() == '%') { + toker.skip_line(); + toker.skip_spaces(); + } + + std::uint64_t node_weight = 1; + if (header.has_node_weights) { + node_weight = toker.scan_uint(); + } + + if constexpr (stoppable) { + if (next_node_cb(node_weight)) { + return; + } + } else { + next_node_cb(node_weight); + } + + while (std::isdigit(toker.current())) { + const std::uint64_t v = toker.scan_uint() - 1; + + std::uint64_t edge_weight = 1; + if (header.has_edge_weights) { + edge_weight = toker.scan_uint(); + } + + next_edge_cb(edge_weight, v); + } + + if (toker.valid_position()) { + toker.consume_char('\n'); + } + } +} + +} // namespace + +namespace { + +std::pair +compute_edge_range(const EdgeID num_edges, const mpi::PEID size, const mpi::PEID rank) { + const EdgeID chunk = num_edges / size; + const EdgeID rem = num_edges % size; + const EdgeID from = rank * chunk + std::min(rank, rem); + const EdgeID to = + std::min(from + ((static_cast(rank) < rem) ? chunk + 1 : chunk), num_edges); + return std::make_pair(from, to); +} + +std::tuple find_node_by_edge( + MappedFileToker &toker, + const MetisHeader header, + const EdgeID first_edge, + const EdgeID last_edge +) { + NodeID a = 0; + NodeID first_node = 0; + NodeID last_node = 0; + EdgeID actual_first_edge = 0; + std::size_t start_pos; + + EdgeID current_edge = 0; + parse_graph( + toker, + header, + [&](const auto) { + if (current_edge < first_edge) { + first_node += 1; + return false; + } + + if (current_edge < last_edge) { + if (last_node == 0) { + start_pos = toker.position(); + actual_first_edge = current_edge; + } + + last_node += 1; + return false; + } + + return true; + }, + [&](const auto, const auto) { current_edge += 1; } + ); + + const EdgeID num_edges = current_edge - actual_first_edge; + return std::make_tuple(first_node, first_node + last_node, num_edges, start_pos); +} + +} // namespace + +DistributedCompressedGraph +compress_read(const std::string &filename, const bool sorted, const MPI_Comm comm) { + MappedFileToker toker(filename); + MetisHeader header = parse_header(toker); + + const mpi::PEID size = mpi::get_comm_size(comm); + const mpi::PEID rank = mpi::get_comm_rank(comm); + + const auto [first_edge, last_edge] = compute_edge_range(header.num_edges, size, rank); + const auto [first_node, last_node, num_local_edges, start_pos] = + find_node_by_edge(toker, header, first_edge, last_edge); + const NodeID num_local_nodes = last_node - first_node; + + StaticArray node_distribution(size + 1); + node_distribution[rank + 1] = last_node; + MPI_Allgather( + MPI_IN_PLACE, + 0, + MPI_DATATYPE_NULL, + node_distribution.data() + 1, + 1, + mpi::type::get(), + comm + ); + + StaticArray edge_distribution(size + 1); + edge_distribution[rank] = num_local_edges; + MPI_Allgather( + MPI_IN_PLACE, + 1, + mpi::type::get(), + edge_distribution.data(), + 1, + mpi::type::get(), + comm + ); + std::exclusive_scan( + edge_distribution.begin(), + edge_distribution.end(), + edge_distribution.begin(), + static_cast(0) + ); + + graph::GhostNodeMapper mapper(rank, node_distribution); + DistributedCompressedGraphBuilder builder( + num_local_nodes, num_local_edges, header.has_node_weights, header.has_edge_weights, sorted + ); + + StaticArray node_weights; + if (header.has_node_weights) { + node_weights.resize(header.num_nodes, static_array::noinit); + } + + toker.seek(start_pos); + header.num_nodes = num_local_nodes; + + std::vector> neighbourhood; + NodeID node = 0; + EdgeID edge = 0; + parse_graph( + toker, + header, + [&](const auto weight) { + if (node > 0) { + builder.add_node(node - 1, neighbourhood); + neighbourhood.clear(); + } + + if (header.has_node_weights) { + node_weights[node] = static_cast(weight); + } + + node += 1; + }, + [&, first_node = first_node, last_node = last_node](const auto weight, const auto v) { + NodeID adjacent_node = static_cast(v); + if (adjacent_node >= first_node && adjacent_node < last_node) { + adjacent_node = adjacent_node - first_node; + } else { + adjacent_node = mapper.new_ghost_node(adjacent_node); + } + + neighbourhood.emplace_back(adjacent_node, static_cast(weight)); + edge += 1; + } + ); + + builder.add_node(node - 1, neighbourhood); + neighbourhood.clear(); + neighbourhood.shrink_to_fit(); + + if (header.has_node_weights && mapper.next_ghost_node() > 0) { + StaticArray actual_node_weights( + num_local_nodes + mapper.next_ghost_node(), static_array::noinit + ); + + tbb::parallel_for(tbb::blocked_range(0, num_local_nodes), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + actual_node_weights[u] = node_weights[u]; + } + }); + + node_weights = std::move(actual_node_weights); + } + + auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); + auto [nodes, edges, edge_weights] = builder.build(); + + DistributedCompressedGraph graph( + std::move(node_distribution), + std::move(edge_distribution), + std::move(nodes), + std::move(edges), + std::move(node_weights), + std::move(edge_weights), + std::move(ghost_owner), + std::move(ghost_to_global), + std::move(global_to_ghost), + sorted, + comm + ); + + // Fill in ghost node weights + if (header.has_node_weights) { + graph::synchronize_ghost_node_weights(graph); + } + + return graph; +} + +} // namespace kaminpar::dist::io::metis diff --git a/apps/io/dist_metis_parser.h b/apps/io/dist_metis_parser.h new file mode 100644 index 00000000..e40d6cc5 --- /dev/null +++ b/apps/io/dist_metis_parser.h @@ -0,0 +1,28 @@ +/******************************************************************************* + * Sequential METIS parser for distributed graphs. + * + * @file: dist_metis_parser.h + * @author: Daniel Salwasser + * @date: 22.06.2024 + ******************************************************************************/ +#pragma once + +#include + +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" + +namespace kaminpar::dist::io::metis { + +/*! + * Reads and compresses a graph that is stored in a file with METIS format. + * + * @param filename The name of the file to read. + * @param sorted Whether the nodes of the graph to read are stored in degree-buckets order. + * @param may_dismiss Whether to abort the compression when it is determined that the compressed + * graph uses more memory than the uncompressed graph. + * @return The graph that is stored in the file, or nothing if the graph was dismissed. + */ +DistributedCompressedGraph +compress_read(const std::string &filename, const bool sorted, const MPI_Comm comm); + +} // namespace kaminpar::dist::io::metis diff --git a/apps/io/file_tokener.h b/apps/io/file_tokener.h index 162e536a..e4a1b140 100644 --- a/apps/io/file_tokener.h +++ b/apps/io/file_tokener.h @@ -8,6 +8,7 @@ #pragma once #include +#include #include #include @@ -17,7 +18,7 @@ #include #include -namespace kaminpar::shm::io { +namespace kaminpar::io { class TokerException : public std::exception { public: @@ -60,6 +61,14 @@ class MappedFileToker { close(_fd); } + void reset() { + _position = 0; + } + + void seek(const std::size_t position) { + _position = position; + } + inline void skip_spaces() { while (valid_position() && current() == ' ') { advance(); @@ -167,4 +176,4 @@ class MappedFileToker { char *_contents; }; -} // namespace kaminpar::shm::io +} // namespace kaminpar::io diff --git a/apps/io/metis_parser.cc b/apps/io/metis_parser.cc index f6de5b4a..9ffe2945 100644 --- a/apps/io/metis_parser.cc +++ b/apps/io/metis_parser.cc @@ -18,6 +18,7 @@ #include "apps/io/file_tokener.h" namespace kaminpar::shm::io::metis { +using namespace kaminpar::io; namespace { diff --git a/apps/io/shm_io.cc b/apps/io/shm_io.cc index 7a3be04b..00a47b0e 100644 --- a/apps/io/shm_io.cc +++ b/apps/io/shm_io.cc @@ -88,6 +88,7 @@ void write(const std::string &filename, const std::vector &partition) { } std::vector read(const std::string &filename) { + using namespace kaminpar::io; MappedFileToker toker(filename); std::vector partition; From 92ff486d7fc62e3c840a28eaa5cecc4cf04a2df4 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 14:54:19 +0200 Subject: [PATCH 07/54] fix(kaminpar-dist): compilation error --- kaminpar-dist/heap_profiler.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/kaminpar-dist/heap_profiler.cc b/kaminpar-dist/heap_profiler.cc index a10b4f4d..e73239f3 100644 --- a/kaminpar-dist/heap_profiler.cc +++ b/kaminpar-dist/heap_profiler.cc @@ -9,6 +9,7 @@ #include "kaminpar-dist/heap_profiler.h" #include +#include #include #include #include From 097268d538101082ff983b5327bbe85b8c8cded4 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 14:56:18 +0200 Subject: [PATCH 08/54] fix(kaminpar-dist): cover edge case where a process has no local nodes --- apps/io/dist_metis_parser.cc | 68 +++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 33 deletions(-) diff --git a/apps/io/dist_metis_parser.cc b/apps/io/dist_metis_parser.cc index 7f5e7552..84fec3c1 100644 --- a/apps/io/dist_metis_parser.cc +++ b/apps/io/dist_metis_parser.cc @@ -176,7 +176,7 @@ std::tuple find_node_by_edge( [&](const auto, const auto) { current_edge += 1; } ); - const EdgeID num_edges = current_edge - actual_first_edge; + const EdgeID num_edges = (last_node == 0) ? 0 : current_edge - actual_first_edge; return std::make_tuple(first_node, first_node + last_node, num_edges, start_pos); } @@ -235,43 +235,45 @@ compress_read(const std::string &filename, const bool sorted, const MPI_Comm com node_weights.resize(header.num_nodes, static_array::noinit); } - toker.seek(start_pos); - header.num_nodes = num_local_nodes; + if (num_local_nodes > 0) { + toker.seek(start_pos); + header.num_nodes = num_local_nodes; + + std::vector> neighbourhood; + NodeID node = 0; + EdgeID edge = 0; + parse_graph( + toker, + header, + [&](const auto weight) { + if (node > 0) { + builder.add_node(node - 1, neighbourhood); + neighbourhood.clear(); + } - std::vector> neighbourhood; - NodeID node = 0; - EdgeID edge = 0; - parse_graph( - toker, - header, - [&](const auto weight) { - if (node > 0) { - builder.add_node(node - 1, neighbourhood); - neighbourhood.clear(); - } + if (header.has_node_weights) { + node_weights[node] = static_cast(weight); + } - if (header.has_node_weights) { - node_weights[node] = static_cast(weight); - } + node += 1; + }, + [&, first_node = first_node, last_node = last_node](const auto weight, const auto v) { + NodeID adjacent_node = static_cast(v); + if (adjacent_node >= first_node && adjacent_node < last_node) { + adjacent_node = adjacent_node - first_node; + } else { + adjacent_node = mapper.new_ghost_node(adjacent_node); + } - node += 1; - }, - [&, first_node = first_node, last_node = last_node](const auto weight, const auto v) { - NodeID adjacent_node = static_cast(v); - if (adjacent_node >= first_node && adjacent_node < last_node) { - adjacent_node = adjacent_node - first_node; - } else { - adjacent_node = mapper.new_ghost_node(adjacent_node); + neighbourhood.emplace_back(adjacent_node, static_cast(weight)); + edge += 1; } + ); - neighbourhood.emplace_back(adjacent_node, static_cast(weight)); - edge += 1; - } - ); - - builder.add_node(node - 1, neighbourhood); - neighbourhood.clear(); - neighbourhood.shrink_to_fit(); + builder.add_node(node - 1, neighbourhood); + neighbourhood.clear(); + neighbourhood.shrink_to_fit(); + } if (header.has_node_weights && mapper.next_ghost_node() > 0) { StaticArray actual_node_weights( From 7b584eb4b5f176b4b03a9876b04a88eefb562095 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 16:01:16 +0200 Subject: [PATCH 09/54] fix(kaminpar-dist): replicate compressed graph if required --- .../distributed_compressed_graph.h | 12 +++++ kaminpar-dist/graphutils/replicator.cc | 47 +++++++++++-------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index 797aca05..ac5a5d55 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -504,6 +504,18 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { return _color_sizes; } + // + // Functions to access raw members of this graph + // + + [[nodiscard]] const auto &raw_node_weights() const { + return _node_weights; + } + + [[nodiscard]] const auto &raw_edge_weights() const { + return _edge_weights; + } + private: void init_degree_buckets(); void init_total_weights(); diff --git a/kaminpar-dist/graphutils/replicator.cc b/kaminpar-dist/graphutils/replicator.cc index 640b6da3..5dd2f0f4 100644 --- a/kaminpar-dist/graphutils/replicator.cc +++ b/kaminpar-dist/graphutils/replicator.cc @@ -29,6 +29,26 @@ namespace kaminpar::dist { SET_DEBUG(false); +namespace { + +template StaticArray copy_raw_nodes(const Graph &graph) { + constexpr bool kIsCompressedGraph = std::is_same_v; + + // Copy node array with (uncompressed) edge IDs or simply forward the raw nodes if the graph is + // uncompresed + if constexpr (kIsCompressedGraph) { + StaticArray raw_nodes(graph.n() + 1); + for (NodeID u : graph.nodes()) { + raw_nodes[u + 1] = raw_nodes[u] + graph.degree(u); + } + return raw_nodes; + } else { + return StaticArray(graph.n() + 1, graph.raw_nodes().data()); + } +} + +} // namespace + std::unique_ptr allgather_graph(const DistributedGraph &graph) { return std::make_unique(replicate_graph_everywhere(graph)); } @@ -68,7 +88,7 @@ allgather_graph(const DistributedPartitionedGraph &p_graph) { return {std::move(shm_graph), std::move(shm_p_graph)}; } -shm::Graph replicate_graph_everywhere(const DistributedCSRGraph &graph) { +template shm::Graph replicate_graph_everywhere(const Graph &graph) { KASSERT( graph.global_n() < std::numeric_limits::max(), "number of nodes exceeds int size", @@ -107,7 +127,7 @@ shm::Graph replicate_graph_everywhere(const DistributedCSRGraph &graph) { auto edges_displs = mpi::build_distribution_displs(graph.edge_distribution()); mpi::allgatherv( - graph.raw_nodes().data(), + copy_raw_nodes(graph).data(), asserting_cast(graph.n()), nodes.data(), nodes_recvcounts.data(), @@ -194,17 +214,11 @@ shm::Graph replicate_graph_everywhere(const DistributedCSRGraph &graph) { } shm::Graph replicate_graph_everywhere(const DistributedGraph &graph) { - const AbstractDistributedGraph *underlying_graph = graph.underlying_graph(); - - if (const auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - return replicate_graph_everywhere(*csr_graph); - } - - __builtin_unreachable(); + return graph.reified([&](const auto &graph) { return replicate_graph_everywhere(graph); }); } -DistributedGraph replicate_graph(const DistributedCSRGraph &graph, const int num_replications) { +template +DistributedGraph replicate_graph(const Graph &graph, const int num_replications) { const PEID size = mpi::get_comm_size(graph.communicator()); const PEID rank = mpi::get_comm_rank(graph.communicator()); @@ -277,7 +291,7 @@ DistributedGraph replicate_graph(const DistributedCSRGraph &graph, const int num // Exchange data -- except for node weights (need the number of ghost nodes // to allocate the vector) mpi::allgatherv( - graph.raw_nodes().data(), + copy_raw_nodes(graph).data(), asserting_cast(graph.n()), nodes.data(), nodes_counts.data(), @@ -459,14 +473,7 @@ DistributedGraph replicate_graph(const DistributedCSRGraph &graph, const int num } DistributedGraph replicate_graph(const DistributedGraph &graph, const int num_replications) { - const AbstractDistributedGraph *underlying_graph = graph.underlying_graph(); - - if (const auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - return replicate_graph(*csr_graph, num_replications); - } - - __builtin_unreachable(); + return graph.reified([&](const auto &graph) { return replicate_graph(graph, num_replications); }); } DistributedPartitionedGraph From ed5252db41c13e663156942332b71ae0cb9d72bc Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 16:41:40 +0200 Subject: [PATCH 10/54] fix(kaminpar-dist): compilation error --- .../contraction/global_cluster_contraction.cc | 20 ++++++++----------- kaminpar-dist/graphutils/replicator.cc | 4 ++-- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc index dd98b99f..ef5841c8 100644 --- a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc @@ -150,9 +150,7 @@ class GlobalCoarseGraphImpl : public CoarseGraph { _f_graph.reified([&](const auto &graph) { mpi::graph::sparse_alltoall_interface_to_pe( graph, - [&](const NodeID lnode) -> GhostNodeLabel { - return {lnode, f_partition[lnode]}; - }, + [&](const NodeID lnode) -> GhostNodeLabel { return {lnode, f_partition[lnode]}; }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[sender_lnode, block] = buffer[i]; @@ -230,7 +228,8 @@ find_nonlocal_nodes(const Graph &graph, const StaticArray &lnode_t const GlobalNodeID gcluster = lnode_to_gcluster[lnode]; if (!graph.is_owned_global_node(gcluster)) { nonlocal_nodes[node_position_buffer[lnode]] = { - .u = gcluster, .weight = graph.node_weight(lnode)}; + .u = gcluster, .weight = graph.node_weight(lnode) + }; } }); @@ -351,9 +350,7 @@ template void update_ghost_node_weights(Graph &graph) { mpi::graph::sparse_alltoall_interface_to_pe( graph, - [&](const NodeID u) -> Message { - return {u, graph.node_weight(u)}; - }, + [&](const NodeID u) -> Message { return {u, graph.node_weight(u)}; }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[local_node_on_other_pe, weight] = buffer[i]; @@ -550,7 +547,8 @@ MigrationResult migrate_elements( .sendcounts = std::move(sendcounts), .sdispls = std::move(sdispls), .recvcounts = std::move(recvcounts), - .rdispls = std::move(rdispls)}; + .rdispls = std::move(rdispls) + }; } template @@ -946,9 +944,7 @@ void rebalance_cluster_placement( }; mpi::graph::sparse_alltoall_interface_to_pe( graph, - [&](const NodeID lnode) -> Message { - return {lnode, lnode_to_gcluster[lnode]}; - }, + [&](const NodeID lnode) -> Message { return {lnode, lnode_to_gcluster[lnode]}; }, [&](const auto buffer, const PEID pe) { tbb::parallel_for(0, buffer.size(), [&](const std::size_t i) { const auto &[their_lnode, new_gcluster] = buffer[i]; @@ -1021,7 +1017,7 @@ std::unique_ptr contract_clustering( START_TIMER("Contract clustering"); KASSERT( - debug::validate_clustering(graph, lnode_to_gcluster), + debug::validate_clustering(fine_graph, lnode_to_gcluster), "input clustering is invalid", assert::heavy ); diff --git a/kaminpar-dist/graphutils/replicator.cc b/kaminpar-dist/graphutils/replicator.cc index 5dd2f0f4..28eb87e5 100644 --- a/kaminpar-dist/graphutils/replicator.cc +++ b/kaminpar-dist/graphutils/replicator.cc @@ -31,7 +31,7 @@ SET_DEBUG(false); namespace { -template StaticArray copy_raw_nodes(const Graph &graph) { +template decltype(auto) copy_raw_nodes(const Graph &graph) { constexpr bool kIsCompressedGraph = std::is_same_v; // Copy node array with (uncompressed) edge IDs or simply forward the raw nodes if the graph is @@ -43,7 +43,7 @@ template StaticArray copy_raw_nodes(const Graph &graph) } return raw_nodes; } else { - return StaticArray(graph.n() + 1, graph.raw_nodes().data()); + return graph.raw_nodes(); } } From ebb6975d3c27c3958abccc3b620948706dfd38c0 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 22 Jun 2024 21:21:16 +0200 Subject: [PATCH 11/54] feat(kaminpar-dist): print statistics about graph compression --- .../graph-compression/compressed_edges.h | 4 ++ kaminpar-dist/context.cc | 14 ++++++- kaminpar-dist/context_io.cc | 11 ++++++ kaminpar-dist/context_io.h | 1 + .../distributed_compressed_graph.h | 21 +++++++++++ .../datastructures/distributed_graph.h | 37 +++++++++++-------- kaminpar-dist/dkaminpar.cc | 3 ++ kaminpar-dist/dkaminpar.h | 14 ++++++- 8 files changed, 87 insertions(+), 18 deletions(-) diff --git a/kaminpar-common/graph-compression/compressed_edges.h b/kaminpar-common/graph-compression/compressed_edges.h index 3edfd278..988de239 100644 --- a/kaminpar-common/graph-compression/compressed_edges.h +++ b/kaminpar-common/graph-compression/compressed_edges.h @@ -115,6 +115,10 @@ template class CompressedEdges { return _num_edges; } + [[nodiscard]] std::size_t size() const { + return _compressed_edges.size(); + } + [[nodiscard]] NodeID degree(const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset) const { const std::uint8_t *data = _compressed_edges.data(); diff --git a/kaminpar-dist/context.cc b/kaminpar-dist/context.cc index 011b83d9..7db2f804 100644 --- a/kaminpar-dist/context.cc +++ b/kaminpar-dist/context.cc @@ -8,12 +8,13 @@ #include "kaminpar-dist/context.h" #include -#include #include #include "kaminpar-mpi/wrapper.h" +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" + namespace kaminpar::dist { using namespace std::string_literals; PartitionContext::PartitionContext(const BlockID k, const BlockID K, const double epsilon) @@ -112,4 +113,15 @@ bool LabelPropagationCoarseningContext::should_merge_nonadjacent_clusters( bool RefinementContext::includes_algorithm(const RefinementAlgorithm algorithm) const { return std::find(algorithms.begin(), algorithms.end(), algorithm) != algorithms.end(); } + +void GraphCompressionContext::setup(const DistributedCompressedGraph &graph) { + const MPI_Comm comm = graph.communicator(); + const double compression_ratio = graph.compression_ratio(); + auto compression_ratios = mpi::allgather(compression_ratio, comm); + + const auto size = static_cast(compression_ratios.size()); + avg_compression_ratio = std::reduce(compression_ratios.begin(), compression_ratios.end()) / size; + min_compression_ratio = *std::min_element(compression_ratios.begin(), compression_ratios.end()); + max_compression_ratio = *std::max_element(compression_ratios.begin(), compression_ratios.end()); +} } // namespace kaminpar::dist diff --git a/kaminpar-dist/context_io.cc b/kaminpar-dist/context_io.cc index 315ac48e..980431fe 100644 --- a/kaminpar-dist/context_io.cc +++ b/kaminpar-dist/context_io.cc @@ -286,6 +286,8 @@ void print(const Context &ctx, const bool root, std::ostream &out, MPI_Comm comm out << " Partition extension factor: " << ctx.partition.K << "\n"; out << " Simulate seq. hybrid exe.: " << (ctx.simulate_singlethread ? "yes" : "no") << "\n"; } + cio::print_delimiter("Graph Compression", '-'); + print(ctx.compression, ctx.parallel, out); cio::print_delimiter("Coarsening", '-'); print(ctx.coarsening, ctx.parallel, out); cio::print_delimiter("Initial Partitioning", '-'); @@ -348,6 +350,15 @@ void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostre } } +void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, std::ostream &out) { + out << "Enabled: " << (ctx.enabled ? "yes" : "no") << "\n"; + if (ctx.enabled) { + out << " Compression ratio: [Min=" << ctx.min_compression_ratio + << " | Mean=" << ctx.avg_compression_ratio << " | Max=" << ctx.max_compression_ratio << "]" + << "\n"; + } +} + void print(const CoarseningContext &ctx, const ParallelContext ¶llel, std::ostream &out) { out << "Contraction limit: " << ctx.contraction_limit << "\n"; if (ctx.max_global_clustering_levels > 0 && ctx.max_local_clustering_levels > 0) { diff --git a/kaminpar-dist/context_io.h b/kaminpar-dist/context_io.h index f1747fcc..14a1952a 100644 --- a/kaminpar-dist/context_io.h +++ b/kaminpar-dist/context_io.h @@ -40,6 +40,7 @@ std::string get_balancing_algorithms_description(); void print(const Context &ctx, bool root, std::ostream &out, MPI_Comm comm); void print(const PartitionContext &ctx, bool root, std::ostream &out, MPI_Comm comm); void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostream &out); +void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, std::ostream &out); void print(const CoarseningContext &ctx, const ParallelContext ¶llel, std::ostream &out); void print(const InitialPartitioningContext &ctx, std::ostream &out); void print(const RefinementContext &ctx, const ParallelContext ¶llel, std::ostream &out); diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index ac5a5d55..f2fbb811 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -504,6 +504,27 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { return _color_sizes; } + // + // Statistics about graph compression + // + + [[nodiscard]] double compression_ratio() const { + std::size_t uncompressed_size = (n() + 1) * sizeof(EdgeID) + m() * sizeof(NodeID); + std::size_t compressed_size = (n() + 1) * sizeof(EdgeID) + _compressed_edges.size(); + + if (is_node_weighted()) { + uncompressed_size += n() * sizeof(NodeWeight); + compressed_size += n() * sizeof(NodeWeight); + } + + if (is_edge_weighted()) { + uncompressed_size += m() * sizeof(EdgeWeight); + compressed_size += m() * sizeof(EdgeWeight); + } + + return uncompressed_size / static_cast(compressed_size); + } + // // Functions to access raw members of this graph // diff --git a/kaminpar-dist/datastructures/distributed_graph.h b/kaminpar-dist/datastructures/distributed_graph.h index 60006d96..0d107530 100644 --- a/kaminpar-dist/datastructures/distributed_graph.h +++ b/kaminpar-dist/datastructures/distributed_graph.h @@ -54,22 +54,6 @@ class DistributedGraph : public AbstractDistributedGraph { ~DistributedGraph() override = default; - // - // Underlying graph - // - - [[nodiscard]] AbstractDistributedGraph *underlying_graph() { - return _underlying_graph.get(); - } - - [[nodiscard]] const AbstractDistributedGraph *underlying_graph() const { - return _underlying_graph.get(); - } - - [[nodiscard]] AbstractDistributedGraph *take_underlying_graph() { - return _underlying_graph.release(); - } - // // Size of the graph // @@ -426,6 +410,27 @@ class DistributedGraph : public AbstractDistributedGraph { return _underlying_graph->get_color_sizes(); } + // + // Access to underlying graph + // + + [[nodiscard]] AbstractDistributedGraph *underlying_graph() { + return _underlying_graph.get(); + } + + [[nodiscard]] const AbstractDistributedGraph *underlying_graph() const { + return _underlying_graph.get(); + } + + [[nodiscard]] AbstractDistributedGraph *take_underlying_graph() { + return _underlying_graph.release(); + } + + [[nodiscard]] const DistributedCompressedGraph &compressed_graph() const { + const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); + return *dynamic_cast(abstract_graph); + } + template decltype(auto) reified(Lambda1 &&l1, Lambda2 &&l2) const { const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index 7bfd6ceb..e925f185 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -309,6 +309,9 @@ GlobalEdgeWeight dKaMinPar::compute_partition(const BlockID k, BlockID *partitio _ctx.initial_partitioning.kaminpar.parallel.num_threads = _ctx.parallel.num_threads; _ctx.partition.k = k; _ctx.partition.graph = std::make_unique(graph, _ctx.partition); + if (_ctx.compression.enabled) { + _ctx.compression.setup(_graph_ptr->compressed_graph()); + } // Initialize console output Logger::set_quiet_mode(_output_level == OutputLevel::QUIET); diff --git a/kaminpar-dist/dkaminpar.h b/kaminpar-dist/dkaminpar.h index a47e9643..67f51827 100644 --- a/kaminpar-dist/dkaminpar.h +++ b/kaminpar-dist/dkaminpar.h @@ -306,6 +306,18 @@ struct RefinementContext { struct GraphCompressionContext { bool enabled; + + // Graph compression statistics + double avg_compression_ratio; + double min_compression_ratio; + double max_compression_ratio; + + /*! + * Setups the graph compression statistics of this context. + * + * @param graph The compressed graph of this process. + */ + void setup(const class DistributedCompressedGraph &graph); }; struct PartitionContext { @@ -331,7 +343,6 @@ struct DebugContext { struct Context { GraphOrdering rearrange_by; - GraphCompressionContext compression; PartitioningMode mode; @@ -340,6 +351,7 @@ struct Context { PartitionContext partition; ParallelContext parallel; + GraphCompressionContext compression; CoarseningContext coarsening; InitialPartitioningContext initial_partitioning; RefinementContext refinement; From 58b9435a44cc2b8d87f9275eef9cd8dfe82e66c1 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 23 Jun 2024 12:42:24 +0200 Subject: [PATCH 12/54] feat(kaminpar-dist): print more statistics about graph compression --- kaminpar-dist/context.cc | 12 ++++- kaminpar-dist/context_io.cc | 44 ++++++++++++++++++- .../distributed_compressed_graph.h | 14 ++++++ kaminpar-dist/dkaminpar.h | 2 + kaminpar-dist/heap_profiler.cc | 7 +-- 5 files changed, 72 insertions(+), 7 deletions(-) diff --git a/kaminpar-dist/context.cc b/kaminpar-dist/context.cc index 7db2f804..02de59c1 100644 --- a/kaminpar-dist/context.cc +++ b/kaminpar-dist/context.cc @@ -116,12 +116,20 @@ bool RefinementContext::includes_algorithm(const RefinementAlgorithm algorithm) void GraphCompressionContext::setup(const DistributedCompressedGraph &graph) { const MPI_Comm comm = graph.communicator(); - const double compression_ratio = graph.compression_ratio(); - auto compression_ratios = mpi::allgather(compression_ratio, comm); + const auto compression_ratios = mpi::allgather(graph.compression_ratio(), comm); const auto size = static_cast(compression_ratios.size()); avg_compression_ratio = std::reduce(compression_ratios.begin(), compression_ratios.end()) / size; min_compression_ratio = *std::min_element(compression_ratios.begin(), compression_ratios.end()); max_compression_ratio = *std::max_element(compression_ratios.begin(), compression_ratios.end()); + + const auto graph_sizes = mpi::allgather(graph.memory_space(), comm); + const auto largest_compressed_graph_it = std::max_element(graph_sizes.begin(), graph_sizes.end()); + largest_compressed_graph = *largest_compressed_graph_it; + + const auto largest_compressed_graph_rank = + std::distance(graph_sizes.begin(), largest_compressed_graph_it); + largest_compressed_graph_prev_size = + largest_compressed_graph * compression_ratios[largest_compressed_graph_rank]; } } // namespace kaminpar::dist diff --git a/kaminpar-dist/context_io.cc b/kaminpar-dist/context_io.cc index 980431fe..3c1b7088 100644 --- a/kaminpar-dist/context_io.cc +++ b/kaminpar-dist/context_io.cc @@ -15,6 +15,7 @@ #include "kaminpar-mpi/wrapper.h" #include "kaminpar-dist/context.h" +#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/random.h" @@ -351,11 +352,50 @@ void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostre } void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, std::ostream &out) { + using Compression = DistributedCompressedGraph::CompressedEdges; + + const auto round = [](const auto value) { + return std::ceil(value * 1000.0) / 1000.0; + }; + const auto to_gib = [&round](const std::size_t num_bytes) { + return round(num_bytes / static_cast(1024 * 1024 * 1024)); + }; + const auto yeyornay = [](const bool value) { + return value ? "yes" : "no"; + }; + out << "Enabled: " << (ctx.enabled ? "yes" : "no") << "\n"; if (ctx.enabled) { - out << " Compression ratio: [Min=" << ctx.min_compression_ratio - << " | Mean=" << ctx.avg_compression_ratio << " | Max=" << ctx.max_compression_ratio << "]" + out << "Compression Scheme: Gap Encoding + "; + if constexpr (Compression::kStreamEncoding) { + out << "VarInt Stream Encoding\n"; + } else if constexpr (Compression::kRunLengthEncoding) { + out << "VarInt Run-Length Encoding\n"; + } else { + out << "VarInt Encoding\n"; + } + + out << " High Degree Encoding: " << yeyornay(Compression::kHighDegreeEncoding) << "\n"; + if constexpr (Compression::kHighDegreeEncoding) { + out << " Threshold: " << Compression::kHighDegreeThreshold << "\n"; + out << " Part Length: " << Compression::kHighDegreePartLength << "\n"; + } + + out << " Interval Encoding: " << yeyornay(Compression::kIntervalEncoding) << "\n"; + if constexpr (Compression::kIntervalLengthTreshold) { + out << " Length Threshold: " << Compression::kIntervalLengthTreshold << "\n"; + } + + out << " Isolated Nodes Separation: " << yeyornay(Compression::kIsolatedNodesSeparation) << "\n"; + + out << "Compression ratio: [Min=" << round(ctx.min_compression_ratio) + << " | Mean=" << round(ctx.avg_compression_ratio) + << " | Max=" << round(ctx.max_compression_ratio) << "]" + << "\n"; + + out << "Largest compressed graph: " << to_gib(ctx.largest_compressed_graph_prev_size) + << " GiB -> " << to_gib(ctx.largest_compressed_graph) << " GiB\n"; } } diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index f2fbb811..0c19cf40 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -525,6 +525,20 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { return uncompressed_size / static_cast(compressed_size); } + [[nodiscard]] std::size_t memory_space() const { + std::size_t memory_space = (n() + 1) * sizeof(EdgeID) + _compressed_edges.size(); + + if (is_node_weighted()) { + memory_space += n() * sizeof(NodeWeight); + } + + if (is_edge_weighted()) { + memory_space += m() * sizeof(EdgeWeight); + } + + return memory_space; + } + // // Functions to access raw members of this graph // diff --git a/kaminpar-dist/dkaminpar.h b/kaminpar-dist/dkaminpar.h index 67f51827..fb8a34ac 100644 --- a/kaminpar-dist/dkaminpar.h +++ b/kaminpar-dist/dkaminpar.h @@ -311,6 +311,8 @@ struct GraphCompressionContext { double avg_compression_ratio; double min_compression_ratio; double max_compression_ratio; + std::size_t largest_compressed_graph; + std::size_t largest_compressed_graph_prev_size; /*! * Setups the graph compression statistics of this context. diff --git a/kaminpar-dist/heap_profiler.cc b/kaminpar-dist/heap_profiler.cc index e73239f3..1aa55f2d 100644 --- a/kaminpar-dist/heap_profiler.cc +++ b/kaminpar-dist/heap_profiler.cc @@ -100,9 +100,10 @@ void generate_statistics( }; std::stringstream stream; - stream << "[ " << min_pe << " : " << pad(to_megabytes(min), mem_str_width) << " mb | " - << pad(to_megabytes(mean), mem_str_width) << " mb | " << max_pe << " : " - << pad(to_megabytes(max), mem_str_width) << " mb ]"; + stream << "[ " << pad(min_pe, pe_str_width) << " : " << pad(to_megabytes(min), mem_str_width) + << " mb | " << pad(to_megabytes(mean), mem_str_width) << " mb | " + << pad(max_pe, pe_str_width) << " : " << pad(to_megabytes(max), mem_str_width) + << " mb ]"; node->annotation = stream.str(); } From 9b29d6c03c85ce256c88a8f946c5d5d5898f5e13 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 23 Jun 2024 12:43:49 +0200 Subject: [PATCH 13/54] fix(kaminpar-dist): ensure correct output ordering when the heap profile is printed --- kaminpar-common/heap_profiler.cc | 4 --- kaminpar-dist/dkaminpar.cc | 15 ++++----- kaminpar-dist/logger.h | 58 ++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 13 deletions(-) diff --git a/kaminpar-common/heap_profiler.cc b/kaminpar-common/heap_profiler.cc index a9a77932..40b5af23 100644 --- a/kaminpar-common/heap_profiler.cc +++ b/kaminpar-common/heap_profiler.cc @@ -239,10 +239,6 @@ void HeapProfiler::print_heap_tree_node( ); } } - - if (depth == 0) { - out << std::endl; - } } void HeapProfiler::print_indentation(std::ostream &out, std::size_t depth, bool last) { diff --git a/kaminpar-dist/dkaminpar.cc b/kaminpar-dist/dkaminpar.cc index e925f185..78d64f31 100644 --- a/kaminpar-dist/dkaminpar.cc +++ b/kaminpar-dist/dkaminpar.cc @@ -23,6 +23,7 @@ #include "kaminpar-dist/graphutils/rearrangement.h" #include "kaminpar-dist/graphutils/synchronization.h" #include "kaminpar-dist/heap_profiler.h" +#include "kaminpar-dist/logger.h" #include "kaminpar-dist/metrics.h" #include "kaminpar-dist/timer.h" @@ -55,13 +56,10 @@ void print_partition_summary( finalize_distributed_timer(Timer::global(), comm); #endif // KAMINPAR_ENABLE_TIMERS - bool heap_profile_root; + int heap_profile_root_rank; if constexpr (kHeapProfiling) { auto &heap_profiler = heap_profiler::HeapProfiler::global(); - const int heap_profile_root_rank = finalize_distributed_heap_profiler(heap_profiler, comm); - - const int rank = mpi::get_comm_rank(comm); - heap_profile_root = rank == heap_profile_root_rank; + heap_profile_root_rank = finalize_distributed_heap_profiler(heap_profiler, comm); } if (root) { @@ -87,13 +85,12 @@ void print_partition_summary( } if constexpr (kHeapProfiling) { - mpi::barrier(comm); + SingleSynchronizedLogger logger(heap_profile_root_rank); + const bool heap_profile_root = heap_profile_root_rank == mpi::get_comm_rank(comm); if (heap_profile_root) { - PRINT_HEAP_PROFILE(std::cout); + PRINT_HEAP_PROFILE(logger.output()); } - - mpi::barrier(comm); } if (root) { diff --git a/kaminpar-dist/logger.h b/kaminpar-dist/logger.h index 213dc0c1..4b9df899 100644 --- a/kaminpar-dist/logger.h +++ b/kaminpar-dist/logger.h @@ -124,4 +124,62 @@ class SynchronizedLogger { int _root; MPI_Comm _comm; }; + +class SingleSynchronizedLogger { +public: + explicit SingleSynchronizedLogger( + const int sender_rank, const int root = 0, MPI_Comm comm = MPI_COMM_WORLD + ) + : _buf{}, + _logger{_buf, ""}, + _sender_rank{sender_rank}, + _root{root}, + _comm{comm} {} + + ~SingleSynchronizedLogger() { + int size, rank; + MPI_Comm_size(_comm, &size); + MPI_Comm_rank(_comm, &rank); + + if (rank == _root) { + if (_sender_rank == _root) { + _logger.flush(); + LLOG << _buf.str(); + return; + } + + MPI_Status status; + MPI_Probe(_sender_rank, 0, MPI_COMM_WORLD, &status); + + int cnt; + MPI_Get_count(&status, MPI_CHAR, &cnt); + + auto str = std::make_unique(cnt); + MPI_Recv(str.get(), cnt, MPI_CHAR, _sender_rank, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + LLOG << std::string(str.get(), cnt); + } else if (rank == _sender_rank) { + _logger.flush(); + + std::string str = _buf.str(); + MPI_Send(str.data(), static_cast(str.length()), MPI_CHAR, _root, 0, MPI_COMM_WORLD); + } + } + + template SingleSynchronizedLogger &operator<<(Arg &&arg) { + _logger << std::forward(arg); + return *this; + } + + [[nodiscard]] std::ostringstream &output() { + return _buf; + } + +private: + std::ostringstream _buf; + Logger _logger; + int _sender_rank; + int _root; + MPI_Comm _comm; +}; } // namespace kaminpar::dist From acf201ee44869669d1f46a6d68a19e082a0834f0 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Tue, 25 Jun 2024 08:19:18 +0200 Subject: [PATCH 14/54] fix(kaminpar-dist): only print the basename of the input file for statistics --- apps/dKaMinPar.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/apps/dKaMinPar.cc b/apps/dKaMinPar.cc index 80744cd2..7a386365 100644 --- a/apps/dKaMinPar.cc +++ b/apps/dKaMinPar.cc @@ -16,6 +16,7 @@ #include "kaminpar-common/environment.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/strutils.h" #include "apps/io/dist_io.h" #include "apps/io/dist_metis_parser.h" @@ -297,7 +298,7 @@ int main(int argc, char *argv[]) { partitioner.set_output_level(OutputLevel::EXPERIMENT); } - partitioner.context().debug.graph_filename = app.graph_filename; + partitioner.context().debug.graph_filename = str::extract_basename(app.graph_filename); partitioner.set_max_timer_depth(app.max_timer_depth); if constexpr (kHeapProfiling) { auto &global_heap_profiler = heap_profiler::HeapProfiler::global(); From 0dd287a0496a8e8bbfd1477ad5f9338cf9120921 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Tue, 25 Jun 2024 12:33:13 +0200 Subject: [PATCH 15/54] feat(compressed-graph): compress edge weights --- .../shm_compressed_graph_benchmark.cc | 90 ++++-- apps/io/metis_parser.cc | 4 +- apps/io/shm_compressed_graph_binary.cc | 19 +- .../buffered_cluster_contraction.cc | 4 +- .../legacy_buffered_cluster_contraction.cc | 4 +- .../naive_unbuffered_cluster_contraction.cc | 8 +- .../unbuffered_cluster_contraction.cc | 4 +- kaminpar-shm/datastructures/abstract_graph.h | 1 - .../datastructures/compressed_graph.cc | 12 +- .../datastructures/compressed_graph.h | 260 +++++++++++++----- .../compressed_graph_builder.cc | 66 ++--- .../datastructures/compressed_graph_builder.h | 58 +--- kaminpar-shm/datastructures/csr_graph.cc | 4 +- kaminpar-shm/datastructures/csr_graph.h | 179 +++++++++--- kaminpar-shm/datastructures/graph.cc | 6 +- kaminpar-shm/datastructures/graph.h | 9 +- kaminpar-shm/datastructures/graph_delegate.h | 4 - kaminpar-shm/graphutils/subgraph_extractor.cc | 12 +- .../initial_fm_refiner.cc | 7 +- kaminpar-shm/label_propagation.h | 51 ++-- kaminpar-shm/metrics.h | 8 +- kaminpar-shm/partitioning/debug.cc | 8 +- .../refinement/balancer/greedy_balancer.cc | 6 +- kaminpar-shm/refinement/fm/fm_batch_stats.cc | 8 +- .../refinement/gains/dense_gain_cache.h | 31 +-- .../refinement/gains/hybrid_gain_cache.h | 20 +- .../refinement/gains/on_the_fly_gain_cache.h | 26 +- .../refinement/gains/sparse_gain_cache.h | 20 +- kaminpar-shm/refinement/jet/jet_refiner.cc | 6 +- .../datastructures/compressed_graph_test.cc | 186 ++++++++----- tests/shm/datastructures/graph_test.cc | 2 +- tests/shm/matchers.h | 13 +- 32 files changed, 672 insertions(+), 464 deletions(-) diff --git a/apps/benchmarks/shm_compressed_graph_benchmark.cc b/apps/benchmarks/shm_compressed_graph_benchmark.cc index 78e360b5..a338a230 100644 --- a/apps/benchmarks/shm_compressed_graph_benchmark.cc +++ b/apps/benchmarks/shm_compressed_graph_benchmark.cc @@ -1,5 +1,5 @@ /******************************************************************************* - * Graph compression benchmark for the shared-memory algorithm. + * Compressed graph benchmark for the shared-memory algorithm. * * @file: shm_compressed_graph_benchmark.cc * @author: Daniel Salwasser @@ -23,11 +23,7 @@ using namespace kaminpar; using namespace kaminpar::shm; using namespace kaminpar::shm::io; -static std::string to_megabytes(std::size_t bytes) { - std::stringstream stream; - stream << std::fixed << std::setprecision(2) << (bytes / (float)(1024 * 1024)); - return stream.str(); -} +namespace { template static bool operator!=(const IotaRange &a, const IotaRange &b) { if (a.begin() == a.end()) { @@ -38,11 +34,11 @@ template static bool operator!=(const IotaRange &a, const IotaRa }; // See https://github.com/google/benchmark/blob/main/include/benchmark/benchmark.h -template static inline void do_not_optimize(T value) { +template void do_not_optimize(T value) { asm volatile("" : "+m"(value) : : "memory"); } -template static void benchmark_degree(const Graph &graph) { +template void benchmark_degree(const Graph &graph) { SCOPED_TIMER("Degree"); for (const auto node : graph.nodes()) { @@ -50,7 +46,7 @@ template static void benchmark_degree(const Graph &graph) { } } -template static void benchmark_incident_edges(const Graph &graph) { +template void benchmark_incident_edges(const Graph &graph) { SCOPED_TIMER("Incident Edges"); for (const auto node : graph.nodes()) { @@ -60,7 +56,7 @@ template static void benchmark_incident_edges(const Graph &grap } } -template static void benchmark_adjacent_nodes(const Graph &graph) { +template void benchmark_adjacent_nodes(const Graph &graph) { SCOPED_TIMER("Adjacent Nodes"); for (const auto node : graph.nodes()) { @@ -68,7 +64,18 @@ template static void benchmark_adjacent_nodes(const Graph &grap } } -template static void benchmark_neighbors(const Graph &graph) { +template void benchmark_weighted_adjacent_nodes(const Graph &graph) { + SCOPED_TIMER("Adjacent Nodes with Edge Weights"); + + for (const auto node : graph.nodes()) { + graph.adjacent_nodes(node, [&](const auto adjacent_node, const auto edge_weight) { + do_not_optimize(adjacent_node); + do_not_optimize(edge_weight); + }); + } +} + +template void benchmark_neighbors(const Graph &graph) { SCOPED_TIMER("Neighbors"); for (const auto node : graph.nodes()) { @@ -79,7 +86,22 @@ template static void benchmark_neighbors(const Graph &graph) { } } -template static void benchmark_neighbors_limit(const Graph &graph) { +template void benchmark_weighted_neighbors(const Graph &graph) { + SCOPED_TIMER("Neighbors with Edge Weights"); + + for (const auto node : graph.nodes()) { + graph.neighbors( + node, + [](const auto incident_edge, const auto adjacent_node, const auto edge_weight) { + do_not_optimize(incident_edge); + do_not_optimize(adjacent_node); + do_not_optimize(edge_weight); + } + ); + } +} + +template void benchmark_neighbors_limit(const Graph &graph) { SCOPED_TIMER("Neighbors (with limit)"); for (const auto node : graph.nodes()) { @@ -94,7 +116,23 @@ template static void benchmark_neighbors_limit(const Graph &gra } } -template static void benchmark_pfor_neighbors(const Graph &graph) { +template void benchmark_weighted_neighbors_limit(const Graph &graph) { + SCOPED_TIMER("Neighbors with Edge Weights (with limit)"); + + for (const auto node : graph.nodes()) { + graph.neighbors( + node, + std::numeric_limits::max(), + [](const auto incident_edge, const auto adjacent_node, const auto edge_weight) { + do_not_optimize(incident_edge); + do_not_optimize(adjacent_node); + do_not_optimize(edge_weight); + } + ); + } +} + +template void benchmark_pfor_neighbors(const Graph &graph) { SCOPED_TIMER("Parallel For Neighbors"); for (const auto node : graph.nodes()) { @@ -102,23 +140,25 @@ template static void benchmark_pfor_neighbors(const Graph &grap node, std::numeric_limits::max(), 1000, - [](const auto incident_edge, const auto adjacent_node) { + [](const auto incident_edge, const auto adjacent_node, const auto edge_weight) { do_not_optimize(incident_edge); do_not_optimize(adjacent_node); + do_not_optimize(edge_weight); } ); } } -static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { - LOG << "Running the benchmarks..."; - +void run_benchmark(const CSRGraph &graph, const CompressedGraph &compressed_graph) { TIMED_SCOPE("Uncompressed graph operations") { benchmark_degree(graph); benchmark_incident_edges(graph); benchmark_adjacent_nodes(graph); + benchmark_weighted_adjacent_nodes(graph); benchmark_neighbors(graph); + benchmark_weighted_neighbors(graph); benchmark_neighbors_limit(graph); + benchmark_weighted_neighbors_limit(graph); benchmark_pfor_neighbors(graph); }; @@ -126,19 +166,23 @@ static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { benchmark_degree(compressed_graph); benchmark_incident_edges(compressed_graph); benchmark_adjacent_nodes(compressed_graph); + benchmark_weighted_adjacent_nodes(compressed_graph); benchmark_neighbors(compressed_graph); + benchmark_weighted_neighbors(compressed_graph); benchmark_neighbors_limit(compressed_graph); + benchmark_weighted_neighbors_limit(compressed_graph); benchmark_pfor_neighbors(compressed_graph); }; } +} // namespace + int main(int argc, char *argv[]) { // Parse CLI arguments std::string graph_filename; GraphFileFormat graph_file_format = io::GraphFileFormat::METIS; int num_threads = 1; bool enable_benchmarks = true; - bool enable_checks = false; CLI::App app("Shared-memory graph compression benchmark"); app.add_option("-G,--graph", graph_filename, "Graph file")->required(); @@ -156,7 +200,6 @@ int main(int argc, char *argv[]) { // Read input graph LOG << "Reading the input graph..."; - CSRGraph graph = [&] { switch (graph_file_format) { case GraphFileFormat::METIS: @@ -168,12 +211,13 @@ int main(int argc, char *argv[]) { } }(); - CompressedGraph compressed_graph = CompressedGraphBuilder::compress(graph); + LOG << "Compressing the input graph..."; + CompressedGraph compressed_graph = ParallelCompressedGraphBuilder::compress(graph); // Run benchmarks - + LOG << "Running the benchmarks..."; GLOBAL_TIMER.reset(); - run_benchmark(std::move(graph), std::move(compressed_graph)); + run_benchmark(graph, compressed_graph); STOP_TIMER(); // Print the result summary @@ -188,5 +232,5 @@ int main(int argc, char *argv[]) { Timer::global().print_human_readable(std::cout); - return 0; + return EXIT_SUCCESS; } diff --git a/apps/io/metis_parser.cc b/apps/io/metis_parser.cc index f6de5b4a..9d9d1247 100644 --- a/apps/io/metis_parser.cc +++ b/apps/io/metis_parser.cc @@ -322,11 +322,11 @@ void write(const std::string &filename, const Graph &graph) { out << graph.node_weight(node) << ' '; } - graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { + graph.neighbors(node, [&](const NodeID adjacent_node, const EdgeWeight weight) { out << (adjacent_node + 1) << ' '; if (graph.is_edge_weighted()) { - out << graph.edge_weight(incident_edge) << ' '; + out << weight << ' '; } }); diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc index cbfbf602..886b7f06 100644 --- a/apps/io/shm_compressed_graph_binary.cc +++ b/apps/io/shm_compressed_graph_binary.cc @@ -39,6 +39,7 @@ struct CompressedBinaryHeader { std::uint64_t num_nodes; std::uint64_t num_edges; + std::int64_t total_edge_weight; std::uint64_t max_degree; std::uint64_t num_high_degree_nodes; @@ -72,12 +73,14 @@ CompressedBinaryHeader create_header(const CompressedGraph &graph) { graph.n(), graph.m(), + graph.total_edge_weight(), graph.max_degree(), graph.num_high_degree_nodes(), graph.num_high_degree_parts(), graph.num_interval_nodes(), - graph.num_intervals()}; + graph.num_intervals() + }; } template static void write_int(std::ofstream &out, const T id) { @@ -100,6 +103,7 @@ static void write_header(std::ofstream &out, const CompressedBinaryHeader header write_int(out, header.num_nodes); write_int(out, header.num_edges); + write_int(out, header.total_edge_weight); write_int(out, header.max_degree); write_int(out, header.num_high_degree_nodes); @@ -134,10 +138,6 @@ void write(const std::string &filename, const CompressedGraph &graph) { if (graph.is_node_weighted()) { write_static_array(out, graph.raw_node_weights()); } - - if (graph.is_edge_weighted()) { - write_static_array(out, graph.raw_edge_weights()); - } } template static T read_int(std::ifstream &in) { @@ -154,7 +154,7 @@ CompressedBinaryHeader read_header(std::ifstream &in) { (boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0, (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0, read_int(in), read_int(in), read_int(in), - read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), read_int(in), read_int(in), read_int(in), read_int(in), }; @@ -301,14 +301,14 @@ template static StaticArray read_static_array(std::ifstream &in) const auto size = read_int(in); StaticArray array(size, static_array::noinit); in.read(reinterpret_cast(array.data()), sizeof(T) * size); - return std::move(array); + return array; } CompressedGraph read(const std::string &filename) { std::ifstream in(filename, std::ios::binary); if (kMagicNumber != read_int(in)) { LOG_ERROR << "The magic number of the file is not correct!"; - std::exit(1); + std::exit(EXIT_FAILURE); } CompressedBinaryHeader header = read_header(in); @@ -326,8 +326,9 @@ CompressedGraph read(const std::string &filename) { std::move(nodes), std::move(compressed_edges), std::move(node_weights), - std::move(edge_weights), header.num_edges, + header.total_edge_weight, + header.has_edge_weights, header.max_degree, header.use_degree_bucket_order, header.num_high_degree_nodes, diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc index 5d9f63fd..ac126f7a 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc @@ -143,10 +143,10 @@ std::unique_ptr contract_clustering_buffered( c_u_weight += graph.node_weight(u); // coarse node weight // collect coarse edges - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } diff --git a/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc index 4327410e..caac97bb 100644 --- a/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc @@ -83,10 +83,10 @@ std::unique_ptr contract_clustering_buffered_legacy( c_u_weight += graph.node_weight(u); // coarse node weight // collect coarse edges - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } diff --git a/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc index ec7c3db9..5b6a110a 100644 --- a/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc @@ -70,10 +70,10 @@ std::unique_ptr contract_clustering_unbuffered_naive( c_u_weight += graph.node_weight(u); // Collect coarse edges - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } @@ -140,10 +140,10 @@ std::unique_ptr contract_clustering_unbuffered_naive( KASSERT(mapping[u] == c_u); // Collect coarse edges - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } diff --git a/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc index bcee6023..665966d5 100644 --- a/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.cc @@ -195,10 +195,10 @@ std::unique_ptr contract_clustering_unbuffered( c_u_weight += graph.node_weight(u); - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } diff --git a/kaminpar-shm/datastructures/abstract_graph.h b/kaminpar-shm/datastructures/abstract_graph.h index 60f4347d..9c71fe3d 100644 --- a/kaminpar-shm/datastructures/abstract_graph.h +++ b/kaminpar-shm/datastructures/abstract_graph.h @@ -42,7 +42,6 @@ class AbstractGraph { [[nodiscard]] virtual NodeWeight total_node_weight() const = 0; [[nodiscard]] virtual bool is_edge_weighted() const = 0; - [[nodiscard]] virtual EdgeWeight edge_weight(EdgeID e) const = 0; [[nodiscard]] virtual EdgeWeight total_edge_weight() const = 0; // Low-level access to the graph structure diff --git a/kaminpar-shm/datastructures/compressed_graph.cc b/kaminpar-shm/datastructures/compressed_graph.cc index da97ea86..ccf86a8f 100644 --- a/kaminpar-shm/datastructures/compressed_graph.cc +++ b/kaminpar-shm/datastructures/compressed_graph.cc @@ -20,8 +20,9 @@ CompressedGraph::CompressedGraph( CompactStaticArray nodes, StaticArray compressed_edges, StaticArray node_weights, - StaticArray edge_weights, EdgeID edge_count, + EdgeWeight total_edge_weight, + bool has_edge_weights, NodeID max_degree, bool sorted, std::size_t num_high_degree_nodes, @@ -32,8 +33,9 @@ CompressedGraph::CompressedGraph( : _nodes(std::move(nodes)), _compressed_edges(std::move(compressed_edges)), _node_weights(std::move(node_weights)), - _edge_weights(std::move(edge_weights)), _edge_count(edge_count), + _total_edge_weight(total_edge_weight), + _has_edge_weights(has_edge_weights), _max_degree(max_degree), _sorted(sorted), _num_high_degree_nodes(num_high_degree_nodes), @@ -53,12 +55,6 @@ CompressedGraph::CompressedGraph( _max_node_weight = parallel::max_element(_node_weights); } - if (_edge_weights.empty()) { - _total_edge_weight = static_cast(m()); - } else { - _total_edge_weight = parallel::accumulate(_edge_weights, static_cast(0)); - } - init_degree_buckets(); }; diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index 6640ed14..ac818c43 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -130,8 +130,6 @@ class CompressedGraph : public AbstractGraph { * format. * @param node_weights The array of node weights in which the weights of each node in the * respective entry are stored. - * @param edge_weights The array of edge weights in which the weights of each edge in the - * respective entry are stored. * @param edge_count The number of edges stored in the compressed edge array. * @param max_degree The maximum degree of the graph. * @param sorted Whether the nodes are stored by deg-buckets order. @@ -146,8 +144,9 @@ class CompressedGraph : public AbstractGraph { CompactStaticArray nodes, StaticArray compressed_edges, StaticArray node_weights, - StaticArray edge_weights, EdgeID edge_count, + EdgeWeight total_edge_weight, + bool has_edge_weights, NodeID max_degree, bool sorted, std::size_t num_high_degree_nodes, @@ -195,10 +194,6 @@ class CompressedGraph : public AbstractGraph { return _compressed_edges; } - [[nodiscard]] const StaticArray &raw_edge_weights() const { - return _edge_weights; - } - // Size of the graph [[nodiscard]] NodeID n() const final { return static_cast(_nodes.size() - 1); @@ -226,11 +221,7 @@ class CompressedGraph : public AbstractGraph { } [[nodiscard]] inline bool is_edge_weighted() const final { - return static_cast(m()) != total_edge_weight(); - } - - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { - return is_edge_weighted() ? _edge_weights[e] : 1; + return _has_edge_weights; } [[nodiscard]] inline EdgeWeight total_edge_weight() const final { @@ -291,41 +282,124 @@ class CompressedGraph : public AbstractGraph { return {first_edge, first_edge + degree}; } - template void adjacent_nodes(const NodeID node, Lambda &&l) const { - decode_neighborhood(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - return l(adjacent_node); - }); + template void adjacent_nodes(const NodeID u, Lambda &&l) const { + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + const auto invoke_caller = [&](const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(v, w); + } else { + return l(v); + } + }; + + if (is_edge_weighted()) { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + return invoke_caller(v, w); + }); + } else { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return invoke_caller(v, 1); + }); + } } - template void neighbors(const NodeID node, Lambda &&l) const { - decode_neighborhood(node, std::forward(l)); + template void neighbors(const NodeID u, Lambda &&l) const { + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + const auto invoke_caller = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(e, v, w); + } else { + return l(e, v); + } + }; + + if (is_edge_weighted()) { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + return invoke_caller(e, v, w); + }); + } else { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return invoke_caller(e, v, 1); + }); + } } template - void neighbors(const NodeID node, const NodeID max_neighbor_count, Lambda &&l) const { + void neighbors(const NodeID u, const NodeID max_neighbor_count, Lambda &&l) const { + KASSERT(u < n()); KASSERT(max_neighbor_count > 0); - constexpr bool non_stoppable = std::is_void_v>; + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto invoke_caller = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(e, v, w); + } else { + return l(e, v); + } + }; NodeID num_neighbors_visited = 1; - decode_neighborhood(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { + const auto check_abort_condition = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { bool abort = num_neighbors_visited++ >= max_neighbor_count; - if constexpr (non_stoppable) { - l(incident_edge, adjacent_node); + if constexpr (kNonStoppable) { + invoke_caller(e, v, w); } else { - abort |= l(incident_edge, adjacent_node); + abort |= invoke_caller(e, v, w); } return abort; - }); + }; + + if (is_edge_weighted()) { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + return check_abort_condition(e, v, w); + }); + } else { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return check_abort_condition(e, v, 1); + }); + } } template void pfor_neighbors( - const NodeID node, const NodeID max_neighbor_count, const NodeID grainsize, Lambda &&l + const NodeID u, const NodeID max_neighbor_count, const NodeID grainsize, Lambda &&l ) const { - constexpr bool kParallelDecoding = true; - decode_neighborhood(node, std::forward(l)); + if (is_edge_weighted()) { + decode_neighborhood(u, std::forward(l)); + } else { + constexpr bool kInvokeDirectly = std::is_invocable_v; + + if constexpr (kInvokeDirectly) { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return l(e, v, 1); + }); + } else { + decode_neighborhood(u, [&](auto &&l2) { + l([&](auto &&l3) { l2([&](const EdgeID e, const NodeID v) { return l3(e, v, 1); }); }); + }); + } + } } // Graph permutation @@ -426,7 +500,6 @@ class CompressedGraph : public AbstractGraph { if (is_edge_weighted()) { uncompressed_size += m() * sizeof(EdgeWeight); - compressed_size += m() * sizeof(EdgeWeight); } return uncompressed_size / static_cast(compressed_size); @@ -448,7 +521,6 @@ class CompressedGraph : public AbstractGraph { if (is_edge_weighted()) { uncompressed_size += m() * sizeof(EdgeWeight); - compressed_size += m() * sizeof(EdgeWeight); } return uncompressed_size - compressed_size; @@ -461,16 +533,16 @@ class CompressedGraph : public AbstractGraph { */ [[nodiscard]] std::size_t used_memory() const { return _nodes.allocated_size() + _compressed_edges.size() + - _node_weights.size() * sizeof(NodeWeight) + _edge_weights.size() * sizeof(EdgeWeight); + _node_weights.size() * sizeof(NodeWeight); } private: CompactStaticArray _nodes; StaticArray _compressed_edges; StaticArray _node_weights; - StaticArray _edge_weights; EdgeID _edge_count; + bool _has_edge_weights; NodeID _max_degree; bool _sorted; @@ -517,8 +589,16 @@ class CompressedGraph : public AbstractGraph { } } - template + template void decode_neighborhood(const NodeID node, Lambda &&l) const { + constexpr bool kInvokeDirectly = []() { + if constexpr (kHasEdgeWeights) { + return std::is_invocable_v; + } else { + return std::is_invocable_v; + } + }(); + const std::uint8_t *data = _compressed_edges.data(); const std::uint8_t *node_data = data + _nodes[node]; @@ -539,22 +619,21 @@ class CompressedGraph : public AbstractGraph { if constexpr (kHighDegreeEncoding) { if (degree >= kHighDegreeThreshold) { - decode_parts(node_data, node, edge, degree, std::forward(l)); + decode_parts( + node_data, node, edge, degree, std::forward(l) + ); return; } } - invoke_indirect>( - std::forward(l), - [&](auto &&l2) { - decode_edges( - node_data, node, edge, degree, uses_intervals, std::forward(l2) - ); - } - ); + invoke_indirect(std::forward(l), [&](auto &&l2) { + decode_edges( + node_data, node, edge, degree, uses_intervals, std::forward(l2) + ); + }); } - template + template void decode_parts( const std::uint8_t *data, const NodeID node, @@ -562,6 +641,14 @@ class CompressedGraph : public AbstractGraph { const NodeID degree, Lambda &&l ) const { + constexpr bool kInvokeDirectly = []() { + if constexpr (kHasEdgeWeights) { + return std::is_invocable_v; + } else { + return std::is_invocable_v; + } + }(); + const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); const auto iterate_part = [&](const NodeID part) { @@ -575,14 +662,11 @@ class CompressedGraph : public AbstractGraph { const NodeID part_degree = last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; - return invoke_indirect2, bool>( - std::forward(l), - [&](auto &&l2) { - return decode_edges( - part_data, node, part_edge, part_degree, true, std::forward(l2) - ); - } - ); + return invoke_indirect2(std::forward(l), [&](auto &&l2) { + return decode_edges( + part_data, node, part_edge, part_degree, true, std::forward(l2) + ); + }); }; if constexpr (kParallelDecoding) { @@ -597,7 +681,7 @@ class CompressedGraph : public AbstractGraph { } } - template + template bool decode_edges( const std::uint8_t *data, const NodeID node, @@ -610,7 +694,7 @@ class CompressedGraph : public AbstractGraph { if constexpr (kIntervalEncoding) { if (uses_intervals) { - const bool stop = decode_intervals(data, edge, std::forward(l)); + const bool stop = decode_intervals(data, edge, std::forward(l)); if (stop) { return true; } @@ -621,12 +705,27 @@ class CompressedGraph : public AbstractGraph { } } - return decode_gaps(data, node, edge, max_edge, std::forward(l)); + return decode_gaps(data, node, edge, max_edge, std::forward(l)); } - template + template bool decode_intervals(const std::uint8_t *&data, EdgeID &edge, Lambda &&l) const { - constexpr bool non_stoppable = std::is_void_v>; + using LambdaReturnType = std::conditional_t< + kHasEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto invoke_caller = [&](const NodeID adjacent_node) { + if constexpr (kHasEdgeWeights) { + const auto [edge_weight, length] = signed_varint_decode(data); + data += length; + + return l(edge, adjacent_node, edge_weight); + } else { + return l(edge, adjacent_node); + } + }; const NodeID interval_count = *((NodeID *)data); data += sizeof(NodeID); @@ -644,10 +743,10 @@ class CompressedGraph : public AbstractGraph { previous_right_extreme = cur_left_extreme + cur_interval_len - 1; for (NodeID j = 0; j < cur_interval_len; ++j) { - if constexpr (non_stoppable) { - l(edge, cur_left_extreme + j); + if constexpr (kNonStoppable) { + invoke_caller(cur_left_extreme + j); } else { - const bool stop = l(edge, cur_left_extreme + j); + const bool stop = invoke_caller(cur_left_extreme + j); if (stop) { return true; } @@ -660,11 +759,26 @@ class CompressedGraph : public AbstractGraph { return false; } - template + template bool decode_gaps( const std::uint8_t *data, NodeID node, EdgeID &edge, const EdgeID max_edge, Lambda &&l ) const { - constexpr bool non_stoppable = std::is_void_v>; + using LambdaReturnType = std::conditional_t< + kHasEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto invoke_caller = [&](const NodeID adjacent_node) { + if constexpr (kHasEdgeWeights) { + const auto [edge_weight, length] = signed_varint_decode(data); + data += length; + + return l(edge, adjacent_node, edge_weight); + } else { + return l(edge, adjacent_node); + } + }; const auto [first_gap, first_gap_len] = signed_varint_decode(data); data += first_gap_len; @@ -672,33 +786,35 @@ class CompressedGraph : public AbstractGraph { const NodeID first_adjacent_node = static_cast(first_gap + node); NodeID prev_adjacent_node = first_adjacent_node; - if constexpr (non_stoppable) { - l(edge, first_adjacent_node); + if constexpr (kNonStoppable) { + invoke_caller(first_adjacent_node); } else { - const bool stop = l(edge, first_adjacent_node); + const bool stop = invoke_caller(first_adjacent_node); if (stop) { return true; } } edge += 1; + /* const auto handle_gap = [&](const NodeID gap) { const NodeID adjacent_node = gap + prev_adjacent_node + 1; prev_adjacent_node = adjacent_node; - if constexpr (non_stoppable) { + if constexpr (kNonStoppable) { l(edge++, adjacent_node); } else { return l(edge++, adjacent_node); } }; + */ if constexpr (kRunLengthEncoding) { - VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); - rl_decoder.decode(std::forward(handle_gap)); + // VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); + // rl_decoder.decode(std::forward(handle_gap)); } else if constexpr (kStreamEncoding) { - VarIntStreamDecoder sv_encoder(data, max_edge - edge); - sv_encoder.decode(std::forward(handle_gap)); + // VarIntStreamDecoder sv_encoder(data, max_edge - edge); + // sv_encoder.decode(std::forward(handle_gap)); } else { while (edge != max_edge) { const auto [gap, gap_len] = varint_decode(data); @@ -707,10 +823,10 @@ class CompressedGraph : public AbstractGraph { const NodeID adjacent_node = gap + prev_adjacent_node + 1; prev_adjacent_node = adjacent_node; - if constexpr (non_stoppable) { - l(edge, adjacent_node); + if constexpr (kNonStoppable) { + invoke_caller(adjacent_node); } else { - const bool stop = l(edge, adjacent_node); + const bool stop = invoke_caller(adjacent_node); if (stop) { return true; } diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.cc b/kaminpar-shm/datastructures/compressed_graph_builder.cc index 5ceeca4c..e58ae71a 100644 --- a/kaminpar-shm/datastructures/compressed_graph_builder.cc +++ b/kaminpar-shm/datastructures/compressed_graph_builder.cc @@ -8,7 +8,6 @@ #include "kaminpar-shm/datastructures/compressed_graph_builder.h" #include -#include #include #include @@ -17,7 +16,6 @@ #include "kaminpar-shm/kaminpar.h" -#include "kaminpar-common/datastructures/concurrent_circular_vector.h" #include "kaminpar-common/heap_profiler.h" namespace kaminpar::shm { @@ -56,13 +54,9 @@ compressed_edge_array_max_size(const NodeID num_nodes, const EdgeID num_edges) { } // namespace CompressedEdgesBuilder::CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - bool has_edge_weights, - StaticArray &edge_weights + const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights ) - : _has_edge_weights(has_edge_weights), - _edge_weights(edge_weights) { + : _has_edge_weights(has_edge_weights) { const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); _compressed_data_start = heap_profiler::overcommit_memory(max_size); _compressed_data = _compressed_data_start.get(); @@ -70,14 +64,9 @@ CompressedEdgesBuilder::CompressedEdgesBuilder( } CompressedEdgesBuilder::CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const NodeID max_degree, - bool has_edge_weights, - StaticArray &edge_weights + const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights ) - : _has_edge_weights(has_edge_weights), - _edge_weights(edge_weights) { + : _has_edge_weights(has_edge_weights) { const std::size_t max_size = compressed_edge_array_max_size(num_nodes, max_degree); _compressed_data_start = heap_profiler::overcommit_memory(max_size); _compressed_data = _compressed_data_start.get(); @@ -162,14 +151,14 @@ CompressedGraph CompressedGraphBuilder::compress(const CSRGraph &graph) { std::vector> neighbourhood; neighbourhood.reserve(graph.max_degree()); - for (const NodeID node : graph.nodes()) { - for (const auto [incident_edge, adjacent_node] : graph.neighbors(node)) { - neighbourhood.emplace_back(adjacent_node, graph.edge_weight(incident_edge)); - } + for (const NodeID u : graph.nodes()) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + neighbourhood.emplace_back(v, w); + }); - builder.add_node(node, neighbourhood); + builder.add_node(u, neighbourhood); if (store_node_weights) { - builder.add_node_weight(node, graph.node_weight(node)); + builder.add_node_weight(u, graph.node_weight(u)); } neighbourhood.clear(); @@ -185,7 +174,8 @@ CompressedGraphBuilder::CompressedGraphBuilder( const bool has_edge_weights, const bool sorted ) - : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights, _edge_weights) { + : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights), + _store_edge_weights(has_edge_weights) { KASSERT(num_nodes < std::numeric_limits::max() - 1); const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); @@ -199,10 +189,6 @@ CompressedGraphBuilder::CompressedGraphBuilder( _node_weights.resize(num_nodes); } - if (has_edge_weights) { - _edge_weights.resize(num_edges); - } - _store_node_weights = has_node_weights; _total_node_weight = 0; } @@ -265,18 +251,13 @@ CompressedGraph CompressedGraphBuilder::build() { _node_weights.free(); } - const bool unit_edge_weights = - static_cast(_compressed_edges_builder.total_edge_weight()) == _num_edges; - if (unit_edge_weights) { - _edge_weights.free(); - } - return CompressedGraph( std::move(_nodes), std::move(compressed_edges), std::move(_node_weights), - std::move(_edge_weights), _num_edges, + _compressed_edges_builder.total_edge_weight(), + _store_edge_weights, _compressed_edges_builder.max_degree(), _sorted, _compressed_edges_builder.num_high_degree_nodes(), @@ -288,7 +269,7 @@ CompressedGraph CompressedGraphBuilder::build() { std::size_t CompressedGraphBuilder::currently_used_memory() const { return _nodes.allocated_size() + _compressed_edges_builder.size() + - _node_weights.size() * sizeof(NodeWeight) + _edge_weights.size() * sizeof(EdgeWeight); + _node_weights.size() * sizeof(NodeWeight); } std::int64_t CompressedGraphBuilder::total_node_weight() const { @@ -331,15 +312,12 @@ ParallelCompressedGraphBuilder::ParallelCompressedGraphBuilder( _compressed_edges = heap_profiler::overcommit_memory(max_size); _compressed_edges_size = 0; _num_edges = num_edges; + _has_edge_weights = has_edge_weights; if (has_node_weights) { _node_weights.resize(num_nodes, static_array::noinit); } - if (has_edge_weights) { - _edge_weights.resize(num_edges, static_array::noinit); - } - _max_degree = 0; _total_node_weight = 0; _total_edge_weight = 0; @@ -394,10 +372,6 @@ void ParallelCompressedGraphBuilder::record_local_statistics( __atomic_fetch_add(&_num_intervals, num_intervals, __ATOMIC_RELAXED); } -StaticArray &ParallelCompressedGraphBuilder::edge_weights() { - return _edge_weights; -} - CompressedGraph ParallelCompressedGraphBuilder::build() { // Store in the last entry of the node array the offset one after the last byte belonging to the // last node. @@ -434,17 +408,13 @@ CompressedGraph ParallelCompressedGraphBuilder::build() { _node_weights.free(); } - const bool unit_edge_weights = static_cast(_total_edge_weight) == _num_edges; - if (unit_edge_weights) { - _edge_weights.free(); - } - return CompressedGraph( std::move(_nodes), std::move(compressed_edges), std::move(_node_weights), - std::move(_edge_weights), _num_edges, + _total_edge_weight, + _has_edge_weights, _max_degree, _sorted, _num_high_degree_nodes, diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.h b/kaminpar-shm/datastructures/compressed_graph_builder.h index 02e3eff6..6f5bc84e 100644 --- a/kaminpar-shm/datastructures/compressed_graph_builder.h +++ b/kaminpar-shm/datastructures/compressed_graph_builder.h @@ -33,14 +33,8 @@ class CompressedEdgesBuilder { * @param num_nodes The number of nodes of the graph to compress. * @param num_edges The number of edges of the graph to compress. * @param has_edge_weights Whether the graph to compress has edge weights. - * @param edge_weights A reference to the edge weights of the compressed graph. */ - CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - bool has_edge_weights, - StaticArray &edge_weights - ); + CompressedEdgesBuilder(const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights); /*! * Constructs a new CompressedEdgesBuilder where the maxmimum degree specifies the number of edges @@ -50,15 +44,9 @@ class CompressedEdgesBuilder { * @param num_edges The number of edges of the graph to compress. * @param max_degree The maximum degree of the graph to compress. * @param has_edge_weights Whether the graph to compress has edge weights. - * @param edge_weights A reference to the edge weights of the compressed graph. - * @param edge_weights A reference to the edge weights of the compressed graph. */ CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const NodeID max_degree, - bool has_edge_weights, - StaticArray &edge_weights + const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights ); ~CompressedEdgesBuilder(); @@ -67,6 +55,7 @@ class CompressedEdgesBuilder { CompressedEdgesBuilder &operator=(const CompressedEdgesBuilder &) = delete; CompressedEdgesBuilder(CompressedEdgesBuilder &&) noexcept = default; + CompressedEdgesBuilder &operator=(CompressedEdgesBuilder &&) noexcept = delete; /*! * Initializes/resets the builder. @@ -130,7 +119,6 @@ class CompressedEdgesBuilder { std::size_t _compressed_data_max_size; bool _has_edge_weights; - StaticArray &_edge_weights; EdgeID _edge; NodeID _max_degree; @@ -169,11 +157,7 @@ class CompressedEdgesBuilder { _compressed_data += varint_encode(first_edge, _compressed_data); } - // Only increment the edge if edge weights are not stored as otherwise the edge is - // incremented with each edge weight being added. - if (!_has_edge_weights) { - _edge += degree; - } + _edge += degree; // If high-degree encoding is used then split the neighborhood if the degree crosses a // threshold. The neighborhood is split into equally sized parts (except possible the last part) @@ -221,11 +205,6 @@ class CompressedEdgesBuilder { using Neighbour = std::remove_reference_t::value_type; constexpr bool kHasEdgeWeights = std::is_same_v>; - const auto store_edge_weight = [&](const EdgeWeight edge_weight) { - _edge_weights[_edge++] = edge_weight; - _total_edge_weight += edge_weight; - }; - const auto fetch_adjacent_node = [&](const NodeID i) { if constexpr (kHasEdgeWeights) { return neighbourhood[i].first; @@ -293,7 +272,8 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[k].second; - store_edge_weight(edge_weight); + _compressed_data += signed_varint_encode(edge_weight, _compressed_data); + _total_edge_weight += edge_weight; } } } @@ -358,7 +338,8 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight first_edge_weight = neighbourhood[i].second; - store_edge_weight(first_edge_weight); + _compressed_data += signed_varint_encode(first_edge_weight, _compressed_data); + _total_edge_weight += first_edge_weight; } } @@ -391,7 +372,8 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[i].second; - store_edge_weight(edge_weight); + _compressed_data += signed_varint_encode(edge_weight, _compressed_data); + _total_edge_weight += edge_weight; } } @@ -489,19 +471,16 @@ class CompressedGraphBuilder { [[nodiscard]] std::int64_t total_edge_weight() const; private: - // The arrays that store information about the compressed graph CompactStaticArray _nodes; bool _sorted; // Whether the nodes of the graph are stored in degree-bucket order CompressedEdgesBuilder _compressed_edges_builder; EdgeID _num_edges; + bool _store_edge_weights; - StaticArray _node_weights; - StaticArray _edge_weights; - - // Statistics about the graph bool _store_node_weights; std::int64_t _total_node_weight; + StaticArray _node_weights; }; class ParallelCompressedGraphBuilder { @@ -600,13 +579,6 @@ class ParallelCompressedGraphBuilder { */ void add_node_weight(const NodeID node, const NodeWeight weight); - /*! - * Returns a reference to the edge weights of the compressed graph. - * - * @return A reference to the edge weights of the compressed graph. - */ - [[nodiscard]] StaticArray &edge_weights(); - /*! * Adds (cummulative) statistics about nodes of the compressed graph. */ @@ -636,9 +608,9 @@ class ParallelCompressedGraphBuilder { heap_profiler::unique_ptr _compressed_edges; EdgeID _compressed_edges_size; EdgeID _num_edges; + bool _has_edge_weights; StaticArray _node_weights; - StaticArray _edge_weights; NodeID _max_degree; NodeWeight _total_node_weight; @@ -820,9 +792,7 @@ CompressedGraph compute_compressed_graph( }); tbb::enumerable_thread_specific neighbourhood_builder_ets([&] { - return CompressedEdgesBuilder( - num_nodes, num_edges, max_degree, kHasEdgeWeights, builder.edge_weights() - ); + return CompressedEdgesBuilder(num_nodes, num_edges, max_degree, kHasEdgeWeights); }); const std::size_t num_threads = tbb::this_task_arena::max_concurrency(); diff --git a/kaminpar-shm/datastructures/csr_graph.cc b/kaminpar-shm/datastructures/csr_graph.cc index 3a065dcc..eeeeec5c 100644 --- a/kaminpar-shm/datastructures/csr_graph.cc +++ b/kaminpar-shm/datastructures/csr_graph.cc @@ -27,9 +27,9 @@ AbstractCSRGraph::AbstractCSRGraph(const Graph &gra parallel::prefix_sum(_nodes.begin(), _nodes.end(), _nodes.begin()); graph.pfor_nodes([&](const NodeID u) { - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { _edges[e] = v; - _edge_weights[e] = graph.edge_weight(e); + _edge_weights[e] = w; }); }); diff --git a/kaminpar-shm/datastructures/csr_graph.h b/kaminpar-shm/datastructures/csr_graph.h index 59e9531d..bb9b34a9 100644 --- a/kaminpar-shm/datastructures/csr_graph.h +++ b/kaminpar-shm/datastructures/csr_graph.h @@ -204,7 +204,7 @@ class AbstractCSRGraph : public AbstractGraph { return static_cast(m()) != total_edge_weight(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { + [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { KASSERT(!is_edge_weighted() || e < _edge_weights.size()); return is_edge_weighted() ? _edge_weights[e] : 1; } @@ -269,12 +269,45 @@ class AbstractCSRGraph : public AbstractGraph { } template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { - KASSERT(u + 1 < _nodes.size()); + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_adjacent_nodes = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(_edges[edge], decode_edge_weight(edge)); + } else { + return l(_edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } + } + } + }; - const EdgeID from = _nodes[u]; - const EdgeID to = _nodes[u + 1]; - for (EdgeID edge = from; edge < to; ++edge) { - l(_edges[edge]); + if (is_edge_weighted()) { + decode_adjacent_nodes([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_adjacent_nodes([](const EdgeID) { return 1; }); } } @@ -286,32 +319,90 @@ class AbstractCSRGraph : public AbstractGraph { } template inline void neighbors(const NodeID u, Lambda &&l) const { - KASSERT(u + 1 < _nodes.size()); + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_neighbors = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(edge, _edges[edge], decode_edge_weight(edge)); + } else { + return l(edge, _edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } + } + } + }; - const EdgeID from = _nodes[u]; - const EdgeID to = _nodes[u + 1]; - for (EdgeID edge = from; edge < to; ++edge) { - l(edge, _edges[edge]); + if (is_edge_weighted()) { + decode_neighbors([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_neighbors([](const EdgeID) { return 1; }); } } template inline void neighbors(const NodeID u, const NodeID max_neighbor_count, Lambda &&l) const { - KASSERT(u + 1 < _nodes.size()); - constexpr bool non_stoppable = - std::is_void>::value; - - const EdgeID from = _nodes[u]; - const EdgeID to = from + std::min(degree(u), max_neighbor_count); - - for (EdgeID edge = from; edge < to; ++edge) { - if constexpr (non_stoppable) { - l(edge, _edges[edge]); - } else { - if (l(edge, _edges[edge])) { - return; + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_neighbors = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(edge, _edges[edge], decode_edge_weight(edge)); + } else { + return l(edge, _edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const NodeID degree = static_cast(_nodes[u + 1] - from); + const EdgeID to = from + std::min(degree, max_neighbor_count); + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } } } + }; + + if (is_edge_weighted()) { + decode_neighbors([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_neighbors([](const EdgeID) { return 1; }); } } @@ -319,26 +410,30 @@ class AbstractCSRGraph : public AbstractGraph { inline void pfor_neighbors( const NodeID u, const NodeID max_neighbor_count, const NodeID grainsize, Lambda &&l ) const { - KASSERT(u + 1 < _nodes.size()); + KASSERT(u < n()); + constexpr bool kInvokeDirectly = std::is_invocable_v; const EdgeID from = _nodes[u]; - const EdgeID to = from + std::min(degree(u), max_neighbor_count); - - tbb::parallel_for( - tbb::blocked_range(from, to, grainsize), - [&](const tbb::blocked_range range) { - const auto end = range.end(); - - invoke_indirect>( - std::forward(l), - [&](auto &&l2) { - for (EdgeID e = range.begin(); e < end; ++e) { - l2(e, _edges[e]); - } - } - ); - } - ); + const NodeID degree = static_cast(_nodes[u + 1] - from); + const EdgeID to = from + std::min(degree, max_neighbor_count); + + const auto visit_neighbors = [&](auto &&l3) { + tbb::parallel_for(tbb::blocked_range(from, to, grainsize), [&](const auto &range) { + const auto end = range.end(); + + invoke_indirect(std::forward(l), [&](auto &&l2) { + for (EdgeID e = range.begin(); e < end; ++e) { + l2(e, _edges[e], l3(e)); + } + }); + }); + }; + + if (is_edge_weighted()) { + visit_neighbors([&](const EdgeID e) { return _edge_weights[e]; }); + } else { + visit_neighbors([](const EdgeID) { return 1; }); + } } // Graph permutation diff --git a/kaminpar-shm/datastructures/graph.cc b/kaminpar-shm/datastructures/graph.cc index a19e184e..c184d4a3 100644 --- a/kaminpar-shm/datastructures/graph.cc +++ b/kaminpar-shm/datastructures/graph.cc @@ -26,9 +26,9 @@ namespace debug { void print_graph(const Graph &graph) { for (const NodeID u : graph.nodes()) { LLOG << "L" << u << " NW" << graph.node_weight(u) << " | "; - for (const auto [e, v] : graph.neighbors(u)) { - LLOG << "EW" << graph.edge_weight(e) << " L" << v << " NW" << graph.node_weight(v) << " "; - } + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + LLOG << "EW" << w << " L" << v << " NW" << graph.node_weight(v) << " "; + }); LOG; } } diff --git a/kaminpar-shm/datastructures/graph.h b/kaminpar-shm/datastructures/graph.h index 21ea6fca..44573889 100644 --- a/kaminpar-shm/datastructures/graph.h +++ b/kaminpar-shm/datastructures/graph.h @@ -72,6 +72,11 @@ class Graph : public AbstractGraph { return _underlying_graph.get(); } + [[nodiscard]] CSRGraph &csr_graph() { + AbstractGraph *abstract_graph = _underlying_graph.get(); + return *dynamic_cast(abstract_graph); + } + template decltype(auto) reified(Lambda &&l) const { return graph::reified(underlying_graph(), std::forward(l)); } @@ -106,10 +111,6 @@ class Graph : public AbstractGraph { return _underlying_graph->is_edge_weighted(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { - return _underlying_graph->edge_weight(e); - } - [[nodiscard]] inline EdgeWeight total_edge_weight() const final { return _underlying_graph->total_edge_weight(); } diff --git a/kaminpar-shm/datastructures/graph_delegate.h b/kaminpar-shm/datastructures/graph_delegate.h index 0619c7e5..d34cb6b1 100644 --- a/kaminpar-shm/datastructures/graph_delegate.h +++ b/kaminpar-shm/datastructures/graph_delegate.h @@ -63,10 +63,6 @@ template class GraphDelegate { return _graph->total_edge_weight(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { - return _graph->edge_weight(e); - } - // // Graph properties // diff --git a/kaminpar-shm/graphutils/subgraph_extractor.cc b/kaminpar-shm/graphutils/subgraph_extractor.cc index b03325bc..983d0a72 100644 --- a/kaminpar-shm/graphutils/subgraph_extractor.cc +++ b/kaminpar-shm/graphutils/subgraph_extractor.cc @@ -79,11 +79,11 @@ SequentialSubgraphExtractionResult extract_subgraphs_sequential_generic_graph( const NodeID n0 = b * n1; const EdgeID m0 = b * m1; // either 0 or s_m[0] - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (p_graph.block(v) == b) { edges[m0 + next_edge_id[b]] = mapping[v]; if (is_edge_weighted) { - edge_weights[m0 + next_edge_id[b]] = graph.edge_weight(e); + edge_weights[m0 + next_edge_id[b]] = w; } ++next_edge_id[b]; } @@ -269,12 +269,12 @@ SubgraphExtractionResult extract_subgraphs_generic_graph( const EdgeID e0 = start_positions[b].edges_start_pos; - graph.neighbors( + graph.adjacent_nodes( u_prime, - [&](const EdgeID e_prime, const NodeID v_prime) { // e_prime, v_prime = in graph - if (p_graph.block(v_prime) == b) { // only keep internal edges + [&](const NodeID v_prime, const EdgeWeight w_prime) { // v_prime, w_prime = in graph + if (p_graph.block(v_prime) == b) { // only keep internal edges if (is_edge_weighted) { - subgraph_memory.edge_weights[e0 + e] = graph.edge_weight(e_prime); + subgraph_memory.edge_weights[e0 + e] = w_prime; } subgraph_memory.edges[e0 + e] = mapping[v_prime]; ++e; diff --git a/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc b/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc index a4aa40e6..fc273e16 100644 --- a/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc +++ b/kaminpar-shm/initial_partitioning/initial_fm_refiner.cc @@ -384,9 +384,9 @@ EdgeWeight InitialFMRefiner; } // namespace kaminpar::shm - diff --git a/kaminpar-shm/label_propagation.h b/kaminpar-shm/label_propagation.h index 8080ea15..bdbe15a9 100644 --- a/kaminpar-shm/label_propagation.h +++ b/kaminpar-shm/label_propagation.h @@ -459,12 +459,10 @@ template class LabelPropagat }; bool is_interface_node = false; - _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { + _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID, const NodeID v, const EdgeWeight w) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - map[v_cluster] += rating; + map[v_cluster] += w; if constexpr (Config::kUseLocalActiveSetStrategy) { is_interface_node |= v >= _num_active_nodes; @@ -533,12 +531,10 @@ template class LabelPropagat bool is_interface_node = false; bool is_second_phase_node = false; - _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { + _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID, const NodeID v, const EdgeWeight w) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - map[v_cluster] += rating; + map[v_cluster] += w; if (use_frm_selection && map.size() >= Config::kRatingMapThreshold) { if (aggregate_during_second_phase) { @@ -616,23 +612,26 @@ template class LabelPropagat bool is_interface_node = false; switch (_second_phase_aggregation_strategy) { case SecondPhaseAggregationStrategy::DIRECT: { - _graph->pfor_neighbors(u, _max_num_neighbors, 2000, [&](const EdgeID e, const NodeID v) { - if (derived_accept_neighbor(u, v)) { - const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - const EdgeWeight prev_rating = - __atomic_fetch_add(&map[v_cluster], rating, __ATOMIC_RELAXED); - - if (prev_rating == 0) { - map.local_used_entries().push_back(v_cluster); - } + _graph->pfor_neighbors( + u, + _max_num_neighbors, + 2000, + [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if (derived_accept_neighbor(u, v)) { + const ClusterID v_cluster = derived_cluster(v); + const EdgeWeight prev_rating = + __atomic_fetch_add(&map[v_cluster], w, __ATOMIC_RELAXED); + + if (prev_rating == 0) { + map.local_used_entries().push_back(v_cluster); + } - if constexpr (Config::kUseLocalActiveSetStrategy) { - is_interface_node |= v >= _num_active_nodes; + if constexpr (Config::kUseLocalActiveSetStrategy) { + is_interface_node |= v >= _num_active_nodes; + } + } } - } - }); + ); break; } case SecondPhaseAggregationStrategy::BUFFERED: { @@ -652,12 +651,10 @@ template class LabelPropagat _graph->pfor_neighbors(u, _max_num_neighbors, 2000, [&](auto &&local_pfor_neighbors) { auto &local_rating_map = _rating_map_ets.local().small_map(); - local_pfor_neighbors([&](const EdgeID e, const NodeID v) { + local_pfor_neighbors([&](const EdgeID e, const NodeID v, const EdgeWeight w) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - local_rating_map[v_cluster] += rating; + local_rating_map[v_cluster] += w; if (local_rating_map.size() >= Config::kRatingMapThreshold) { flush_local_rating_map(local_rating_map); diff --git a/kaminpar-shm/metrics.h b/kaminpar-shm/metrics.h index db54744a..d324d2d6 100644 --- a/kaminpar-shm/metrics.h +++ b/kaminpar-shm/metrics.h @@ -26,8 +26,8 @@ EdgeWeight edge_cut(const PartitionedGraph &p_graph, const Graph &graph) { tbb::parallel_for(tbb::blocked_range(0, graph.n()), [&](const auto &r) { auto &cut = cut_ets.local(); for (NodeID u = r.begin(); u < r.end(); ++u) { - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { - cut += (p_graph.block(u) != p_graph.block(v)) ? graph.edge_weight(e) : 0; + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + cut += (p_graph.block(u) != p_graph.block(v)) ? w : 0; }); } }); @@ -47,8 +47,8 @@ EdgeWeight edge_cut_seq(const PartitionedGraph &p_graph, const Graph &graph) { std::int64_t cut = 0; for (const NodeID u : graph.nodes()) { - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { - cut += (p_graph.block(u) != p_graph.block(v)) ? graph.edge_weight(e) : 0; + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + cut += (p_graph.block(u) != p_graph.block(v)) ? w : 0; }); } diff --git a/kaminpar-shm/partitioning/debug.cc b/kaminpar-shm/partitioning/debug.cc index b2a5e1b5..50435c72 100644 --- a/kaminpar-shm/partitioning/debug.cc +++ b/kaminpar-shm/partitioning/debug.cc @@ -78,12 +78,14 @@ void dump_graph(const Graph &graph, const std::string &filename) { if (graph.is_node_weighted()) { out << graph.node_weight(u) << " "; } - for (const auto &[e, v] : graph.neighbors(u)) { + + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { out << v + 1 << " "; if (graph.is_edge_weighted()) { - out << graph.edge_weight(e) << " "; + out << w << " "; } - } + }); + out << "\n"; } } diff --git a/kaminpar-shm/refinement/balancer/greedy_balancer.cc b/kaminpar-shm/refinement/balancer/greedy_balancer.cc index 776cf32b..1f7898cf 100644 --- a/kaminpar-shm/refinement/balancer/greedy_balancer.cc +++ b/kaminpar-shm/refinement/balancer/greedy_balancer.cc @@ -254,13 +254,13 @@ GreedyBalancer::compute_gain(const NodeID u, const BlockID u_block) const { auto action = [&](auto &map) { // compute external degree to each adjacent block that can take u without // becoming overloaded - _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeID w) { const BlockID v_block = _p_graph->block(v); if (u_block != v_block && _p_graph->block_weight(v_block) + u_weight <= _p_ctx->block_weights.max(v_block)) { - map[v_block] += _p_graph->edge_weight(e); + map[v_block] += w; } else if (u_block == v_block) { - internal_degree += _p_graph->edge_weight(e); + internal_degree += w; } }); diff --git a/kaminpar-shm/refinement/fm/fm_batch_stats.cc b/kaminpar-shm/refinement/fm/fm_batch_stats.cc index 1a7ac4b7..132a52e5 100644 --- a/kaminpar-shm/refinement/fm/fm_batch_stats.cc +++ b/kaminpar-shm/refinement/fm/fm_batch_stats.cc @@ -165,13 +165,13 @@ auto BatchStatsComputator::compute_single_batch_stats_in_sequence( // Compute the gain of the move EdgeWeight int_degree = 0; EdgeWeight ext_degree = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { if (p_graph.block(v) == p_graph.block(u)) { - int_degree += p_graph.edge_weight(e); + int_degree += weight; } else if (p_graph.block(v) == block) { - ext_degree += p_graph.edge_weight(e); + ext_degree += weight; } - } + }); KASSERT(i < distances.size()); cur_distance = std::max(cur_distance, distances[i]); diff --git a/kaminpar-shm/refinement/gains/dense_gain_cache.h b/kaminpar-shm/refinement/gains/dense_gain_cache.h index 8e8f8c0d..348720c9 100644 --- a/kaminpar-shm/refinement/gains/dense_gain_cache.h +++ b/kaminpar-shm/refinement/gains/dense_gain_cache.h @@ -259,9 +259,7 @@ class DenseGainCache { ) { IFSTATS(++_stats_ets.local().num_moves); - for (const auto &[e, v] : p_graph.neighbors(node)) { - const EdgeWeight weight = p_graph.edge_weight(e); - + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight weight) { if (in_sparse_part(v)) { __atomic_fetch_sub(&_gain_cache[index_sparse(v, block_from)], weight, __ATOMIC_RELAXED); __atomic_fetch_add(&_gain_cache[index_sparse(v, block_to)], weight, __ATOMIC_RELAXED); @@ -279,7 +277,7 @@ class DenseGainCache { IFSTATS(_stats_ets.local().num_dense_deletions += (was_deleted ? 1 : 0)); IFSTATS(_stats_ets.local().num_dense_insertions += (was_inserted ? 1 : 0)); } - } + }); } [[nodiscard]] KAMINPAR_INLINE bool @@ -490,20 +488,18 @@ class DenseGainCache { _weighted_degrees[u] = 0; if (in_sparse_part(u)) { - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); _weighted_degrees[u] += static_cast(weight); _gain_cache[index_sparse(u, block_v)] += static_cast(weight); - } + }); } else { auto ht = create_dense_wrapper(u); - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); _weighted_degrees[u] += static_cast(weight); ht.increase_by(block_v, static_cast(weight)); - } + }); } } @@ -513,13 +509,12 @@ class DenseGainCache { std::vector actual_external_degrees(_k, 0); EdgeWeight actual_weighted_degree = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); actual_weighted_degree += weight; actual_external_degrees[block_v] += weight; - } + }); for (BlockID b = 0; b < _k; ++b) { if (actual_external_degrees[b] != weighted_degree_to(u, b)) { @@ -609,11 +604,10 @@ template class DenseDelta const BlockID block_from, const BlockID block_to ) { - for (const auto &[e, v] : d_graph.neighbors(u)) { - const EdgeWeight weight = d_graph.edge_weight(e); + d_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { _gain_cache_delta[index(v, block_from)] -= weight; _gain_cache_delta[index(v, block_to)] += weight; - } + }); } KAMINPAR_INLINE void clear() { @@ -697,8 +691,7 @@ template class LargeKDens const BlockID block_from, const BlockID block_to ) { - for (const auto &[e, v] : d_graph.neighbors(u)) { - const EdgeWeight weight = d_graph.edge_weight(e); + d_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { _gain_cache_delta[index(v, block_from)] -= weight; if (_gain_cache.conn(v, block_to) == 0 && conn_delta(v, block_to) == 0) { @@ -711,7 +704,7 @@ template class LargeKDens } _gain_cache_delta[index(v, block_to)] += weight; - } + }); } KAMINPAR_INLINE void clear() { diff --git a/kaminpar-shm/refinement/gains/hybrid_gain_cache.h b/kaminpar-shm/refinement/gains/hybrid_gain_cache.h index 2c0e77b1..14d8af34 100644 --- a/kaminpar-shm/refinement/gains/hybrid_gain_cache.h +++ b/kaminpar-shm/refinement/gains/hybrid_gain_cache.h @@ -160,13 +160,12 @@ class HybridGainCache { void move(const PartitionedGraph &p_graph, const NodeID node, const BlockID from, const BlockID to) { - for (const auto &[e, v] : p_graph.neighbors(node)) { + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight w_e) { if (is_high_degree_node(v)) { - const EdgeWeight w_e = p_graph.edge_weight(e); __atomic_fetch_sub(&_gain_cache[gc_index(v, from)], w_e, __ATOMIC_RELAXED); __atomic_fetch_add(&_gain_cache[gc_index(v, to)], w_e, __ATOMIC_RELAXED); } - } + }); } [[nodiscard]] bool is_border_node(const NodeID node, const BlockID block) const { @@ -255,11 +254,10 @@ class HybridGainCache { const BlockID b_u = p_graph.block(u); wd(u) = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { - const EdgeWeight w_e = p_graph.edge_weight(e); + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w_e) { gc(u, p_graph.block(v)) += w_e; wd(u) += w_e; - } + }); } [[nodiscard]] bool @@ -269,13 +267,12 @@ class HybridGainCache { std::vector actual_external_degrees(_k, 0); EdgeWeight actual_weighted_degree = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); actual_weighted_degree += weight; actual_external_degrees[block_v] += weight; - } + }); for (BlockID b = 0; b < _k; ++b) { if (actual_external_degrees[b] != conn(u, b)) { @@ -370,13 +367,12 @@ template class HybridDelt const BlockID block_from, const BlockID block_to ) { - for (const auto &[e, v] : d_graph.neighbors(u)) { + d_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { if (_gain_cache.is_high_degree_node(v)) { - const EdgeWeight weight = d_graph.edge_weight(e); _gain_cache_delta[_gain_cache.gc_index(v, block_from)] -= weight; _gain_cache_delta[_gain_cache.gc_index(v, block_to)] += weight; } - } + }); } void clear() { diff --git a/kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h b/kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h index 7b823688..d8beb1d5 100644 --- a/kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h +++ b/kaminpar-shm/refinement/gains/on_the_fly_gain_cache.h @@ -89,13 +89,13 @@ class OnTheFlyGainCache { EdgeWeight conn_from = 0; EdgeWeight conn_to = 0; - for (const auto [e, v] : p_graph.neighbors(node)) { + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight weight) { if (p_graph.block(v) == from) { - conn_from += p_graph.edge_weight(e); + conn_from += weight; } else if (p_graph.block(v) == to) { - conn_to += p_graph.edge_weight(e); + conn_to += weight; } - } + }); return conn_to - conn_from; } @@ -110,9 +110,9 @@ class OnTheFlyGainCache { EdgeWeight conn_from = 0; std::pair conns_to = {0, 0}; - for (const auto [e, v] : p_graph.neighbors(node)) { + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight w_e) { const BlockID b_v = p_graph.block(v); - const EdgeWeight w_e = p_graph.edge_weight(e); + if (b_v == b_node) { conn_from += w_e; } else if (b_v == targets.first) { @@ -120,7 +120,7 @@ class OnTheFlyGainCache { } else if (b_v == targets.second) { conns_to.second += w_e; } - } + }); return {conns_to.first - conn_from, conns_to.second - conn_from}; } @@ -130,11 +130,11 @@ class OnTheFlyGainCache { conn_impl(const PartitionedGraphType &p_graph, const NodeID node, const BlockID block) const { EdgeWeight conn = 0; - for (const auto [e, v] : p_graph.neighbors(node)) { + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight weight) { if (p_graph.block(v) == block) { - conn += p_graph.edge_weight(e); + conn += weight; } - } + }); return conn; } @@ -157,9 +157,9 @@ class OnTheFlyGainCache { const PartitionedGraphType &p_graph, const NodeID node, const BlockID from, Lambda &&lambda ) const { auto action = [&](auto &map) { - for (const auto [e, v] : p_graph.neighbors(node)) { - map[p_graph.block(v)] += p_graph.edge_weight(e); - } + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight weight) { + map[p_graph.block(v)] += weight; + }); const EdgeWeight conn_from = kIteratesExactGains ? map[from] : 0; if constexpr (kIteratesNonadjacentBlocks) { diff --git a/kaminpar-shm/refinement/gains/sparse_gain_cache.h b/kaminpar-shm/refinement/gains/sparse_gain_cache.h index 4d62bb55..5b97d896 100644 --- a/kaminpar-shm/refinement/gains/sparse_gain_cache.h +++ b/kaminpar-shm/refinement/gains/sparse_gain_cache.h @@ -117,11 +117,10 @@ class SparseGainCache { const BlockID block_from, const BlockID block_to ) { - for (const auto &[e, v] : p_graph.neighbors(node)) { - const EdgeWeight weight = p_graph.edge_weight(e); + p_graph.adjacent_nodes(node, [&](const NodeID v, const EdgeWeight weight) { __atomic_fetch_sub(&_gain_cache[index(v, block_from)], weight, __ATOMIC_RELAXED); __atomic_fetch_add(&_gain_cache[index(v, block_to)], weight, __ATOMIC_RELAXED); - } + }); } [[nodiscard]] bool is_border_node(const NodeID node, const BlockID block) const { @@ -175,13 +174,12 @@ class SparseGainCache { const BlockID block_u = p_graph.block(u); _weighted_degrees[u] = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); _gain_cache[index(u, block_v)] += weight; _weighted_degrees[u] += weight; - } + }); } [[nodiscard]] bool @@ -190,13 +188,12 @@ class SparseGainCache { std::vector actual_external_degrees(_k, 0); EdgeWeight actual_weighted_degree = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID block_v = p_graph.block(v); - const EdgeWeight weight = p_graph.edge_weight(e); actual_weighted_degree += weight; actual_external_degrees[block_v] += weight; - } + }); for (BlockID b = 0; b < _k; ++b) { if (actual_external_degrees[b] != weighted_degree_to(u, b)) { @@ -263,11 +260,10 @@ template class SparseDelt const BlockID block_from, const BlockID block_to ) { - for (const auto &[e, v] : d_graph.neighbors(u)) { - const EdgeWeight weight = d_graph.edge_weight(e); + d_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { _gain_cache_delta[_gain_cache.index(v, block_from)] -= weight; _gain_cache_delta[_gain_cache.index(v, block_to)] += weight; - } + }); } void clear() { diff --git a/kaminpar-shm/refinement/jet/jet_refiner.cc b/kaminpar-shm/refinement/jet/jet_refiner.cc index 22b34309..6274c6ee 100644 --- a/kaminpar-shm/refinement/jet/jet_refiner.cc +++ b/kaminpar-shm/refinement/jet/jet_refiner.cc @@ -128,9 +128,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx const EdgeWeight gain_u = gain_cache.gain(u, from, to); EdgeWeight gain = 0; - for (const auto &[e, v] : p_graph.neighbors(u)) { - const EdgeWeight weight = p_graph.edge_weight(e); - + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const bool v_before_u = [&, v = v] { const BlockID from_v = p_graph.block(v); const BlockID to_v = next_partition[v]; @@ -147,7 +145,7 @@ bool JetRefiner::refine(PartitionedGraph &p_graph, const PartitionContext &p_ctx } else if (from == block_v) { gain -= weight; } - } + }); if (gain > 0) { lock[u] = 1; diff --git a/tests/shm/datastructures/compressed_graph_test.cc b/tests/shm/datastructures/compressed_graph_test.cc index f72ce1bc..1930d617 100644 --- a/tests/shm/datastructures/compressed_graph_test.cc +++ b/tests/shm/datastructures/compressed_graph_test.cc @@ -1,5 +1,3 @@ -#include - #include #include "tests/shm/graph_factories.h" @@ -136,6 +134,45 @@ TEST(CompressedGraphTest, compressed_graph_adjacent_nodes_operation) { TEST_ON_ALL_GRAPHS(test_compressed_graph_adjacent_nodes_operation); } +template +static void test_compressed_graph_weighted_adjacent_nodes_operation(Graph graph) { + auto &csr_graph = *dynamic_cast(graph.underlying_graph()); + const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); + + if constexpr (kRearrange) { + graph::reorder_edges_by_compression(csr_graph); + } + + std::vector> graph_neighbours; + std::vector> compressed_graph_neighbours; + for (const NodeID u : graph.nodes()) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + graph_neighbours.emplace_back(v, w); + }); + + compressed_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + compressed_graph_neighbours.emplace_back(v, w); + }); + + EXPECT_EQ(graph_neighbours.size(), compressed_graph_neighbours.size()); + + if constexpr (!kRearrange) { + std::sort(graph_neighbours.begin(), graph_neighbours.end()); + std::sort(compressed_graph_neighbours.begin(), compressed_graph_neighbours.end()); + } + + EXPECT_TRUE(graph_neighbours == compressed_graph_neighbours); + + graph_neighbours.clear(); + compressed_graph_neighbours.clear(); + } +} + +TEST(CompressedGraphTest, compressed_graph_weighted_adjacent_nodes_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_weighted_adjacent_nodes_operation); + TEST_ON_ALL_GRAPHS(test_compressed_graph_weighted_adjacent_nodes_operation); +} + template static void test_compressed_graph_neighbors_operation(Graph graph) { auto &csr_graph = *dynamic_cast(graph.underlying_graph()); const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); @@ -183,6 +220,54 @@ TEST(CompressedGraphTest, compressed_graph_neighbors_operation) { TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_operation); } +template +static void test_compressed_graph_weighted_neighbors_operation(Graph graph) { + auto &csr_graph = *dynamic_cast(graph.underlying_graph()); + const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); + + if constexpr (rearrange) { + graph::reorder_edges_by_compression(csr_graph); + } + + std::vector graph_incident_edges; + std::vector> graph_adjacent_node; + std::vector compressed_graph_incident_edges; + std::vector> compressed_graph_adjacent_node; + for (const NodeID u : graph.nodes()) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + graph_incident_edges.push_back(e); + graph_adjacent_node.emplace_back(v, w); + }); + + compressed_graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + compressed_graph_incident_edges.push_back(e); + compressed_graph_adjacent_node.emplace_back(v, w); + }); + + EXPECT_EQ(graph_incident_edges.size(), compressed_graph_incident_edges.size()); + + if constexpr (!rearrange) { + std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); + std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); + std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); + std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); + } + + EXPECT_TRUE(graph_incident_edges == compressed_graph_incident_edges); + EXPECT_TRUE(graph_adjacent_node == compressed_graph_adjacent_node); + + graph_incident_edges.clear(); + graph_adjacent_node.clear(); + compressed_graph_incident_edges.clear(); + compressed_graph_adjacent_node.clear(); + } +} + +TEST(CompressedGraphTest, compressed_graph_weighted_neighbors_operation) { + TEST_ON_ALL_GRAPHS(test_compressed_graph_weighted_neighbors_operation); + TEST_ON_ALL_GRAPHS(test_compressed_graph_weighted_neighbors_operation); +} + static void test_compressed_graph_neighbors_limit_operation(Graph graph) { auto &csr_graph = *dynamic_cast(graph.underlying_graph()); const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); @@ -233,29 +318,43 @@ static void test_compressed_graph_pfor_neighbors_operation(const Graph &graph) { const auto &csr_graph = *dynamic_cast(graph.underlying_graph()); const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); - tbb::concurrent_vector graph_adjacent_node; - tbb::concurrent_vector compressed_graph_adjacent_node; - for (const NodeID node : graph.nodes()) { + tbb::concurrent_vector graph_incident_edges; + tbb::concurrent_vector compressed_graph_incident_edges; + tbb::concurrent_vector> graph_adjacent_node; + tbb::concurrent_vector> compressed_graph_adjacent_node; + for (const NodeID u : graph.nodes()) { graph.pfor_neighbors( - node, + u, std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { graph_adjacent_node.push_back(v); } + 1, + [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + graph_incident_edges.push_back(e); + graph_adjacent_node.emplace_back(v, w); + } ); compressed_graph.pfor_neighbors( - node, + u, std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { compressed_graph_adjacent_node.push_back(v); } + 1, + [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + compressed_graph_incident_edges.push_back(e); + compressed_graph_adjacent_node.emplace_back(v, w); + } ); - EXPECT_EQ(graph_adjacent_node.size(), compressed_graph_adjacent_node.size()); + EXPECT_EQ(graph_incident_edges.size(), compressed_graph_incident_edges.size()); + + std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); + std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); + EXPECT_TRUE(graph_incident_edges == compressed_graph_incident_edges); std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); EXPECT_TRUE(graph_adjacent_node == compressed_graph_adjacent_node); + graph_incident_edges.clear(); + compressed_graph_incident_edges.clear(); graph_adjacent_node.clear(); compressed_graph_adjacent_node.clear(); } @@ -265,67 +364,4 @@ TEST(CompressedGraphTest, compressed_graph_pfor_neighbors_operation) { TEST_ON_ALL_GRAPHS(test_compressed_graph_pfor_neighbors_operation); } -static void test_compressed_graph_edge_weights(const Graph &graph) { - const auto &csr_graph = *dynamic_cast(graph.underlying_graph()); - const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); - - std::unordered_map csr_graph_edge_weights_map; - std::unordered_map compressed_graph_edge_weights_map; - - for (const NodeID node : graph.nodes()) { - csr_graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - csr_graph_edge_weights_map[adjacent_node] = csr_graph.edge_weight(incident_edge); - }); - - compressed_graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - compressed_graph_edge_weights_map[adjacent_node] = - compressed_graph.edge_weight(incident_edge); - }); - - EXPECT_EQ(csr_graph_edge_weights_map.size(), compressed_graph_edge_weights_map.size()); - - for (const NodeID adjacent_node : csr_graph.adjacent_nodes(node)) { - EXPECT_TRUE( - csr_graph_edge_weights_map.find(adjacent_node) != csr_graph_edge_weights_map.end() - ); - - EXPECT_TRUE( - compressed_graph_edge_weights_map.find(adjacent_node) != - compressed_graph_edge_weights_map.end() - ); - - EXPECT_TRUE( - csr_graph_edge_weights_map[adjacent_node] == - compressed_graph_edge_weights_map[adjacent_node] - ); - } - - csr_graph_edge_weights_map.clear(); - compressed_graph_edge_weights_map.clear(); - } -} - -TEST(CompressedGraphTest, compressed_graph_edge_weights) { - TEST_ON_WEIGHTED_GRAPHS(test_compressed_graph_edge_weights); -} - -static void test_rearrange_compressed_edge_weights(Graph graph) { - auto &csr_graph = *dynamic_cast(graph.underlying_graph()); - const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); - - graph::reorder_edges_by_compression(csr_graph); - - for (const NodeID node : graph.nodes()) { - graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - EXPECT_TRUE( - csr_graph.edge_weight(incident_edge) == compressed_graph.edge_weight(incident_edge) - ); - }); - } -} - -TEST(CompressedGraphTest, rearrange_compressed_edge_weights) { - TEST_ON_WEIGHTED_GRAPHS(test_rearrange_compressed_edge_weights); -} - } // namespace kaminpar::shm::testing diff --git a/tests/shm/datastructures/graph_test.cc b/tests/shm/datastructures/graph_test.cc index 8ed0988d..c260e175 100644 --- a/tests/shm/datastructures/graph_test.cc +++ b/tests/shm/datastructures/graph_test.cc @@ -30,7 +30,7 @@ TEST_F(AWeightedGridGraph, InitialNodeWeightingWorks) { TEST_F(AWeightedGridGraph, InitialEdgeWeightingWorks) { for (const EdgeID e : graph.edges()) { - EXPECT_EQ(graph.edge_weight(e), 1); + EXPECT_EQ(graph.csr_graph().edge_weight(e), 1); } } diff --git a/tests/shm/matchers.h b/tests/shm/matchers.h index 16d38906..5bc079a2 100644 --- a/tests/shm/matchers.h +++ b/tests/shm/matchers.h @@ -63,11 +63,14 @@ class HasWeightedEdgeWithWeightedEndpointsMatcher : public MatcherInterface Date: Fri, 28 Jun 2024 11:05:42 +0200 Subject: [PATCH 16/54] feat(kaminpar-dist): add option to print detailed compression ratios --- kaminpar-cli/dkaminpar_arguments.cc | 3 ++ kaminpar-dist/context.cc | 52 +++++++++++++------ kaminpar-dist/context_io.cc | 23 +++++++- kaminpar-dist/context_io.h | 7 ++- .../distributed_compressed_graph.h | 14 +++++ kaminpar-dist/dkaminpar.h | 10 ++++ kaminpar-dist/presets.cc | 1 + 7 files changed, 92 insertions(+), 18 deletions(-) diff --git a/kaminpar-cli/dkaminpar_arguments.cc b/kaminpar-cli/dkaminpar_arguments.cc index 30022f9a..4c1f2b9b 100644 --- a/kaminpar-cli/dkaminpar_arguments.cc +++ b/kaminpar-cli/dkaminpar_arguments.cc @@ -107,6 +107,9 @@ CLI::Option_group *create_debug_options(CLI::App *app, Context &ctx) { debug->add_flag("--d-save-coarsest-partition", ctx.debug.save_coarsest_partition) ->configurable(false) ->capture_default_str(); + debug->add_flag("--d-print-compression-details", ctx.debug.print_compression_details) + ->configurable(false) + ->capture_default_str(); return debug; } diff --git a/kaminpar-dist/context.cc b/kaminpar-dist/context.cc index 02de59c1..7dda0206 100644 --- a/kaminpar-dist/context.cc +++ b/kaminpar-dist/context.cc @@ -115,21 +115,43 @@ bool RefinementContext::includes_algorithm(const RefinementAlgorithm algorithm) } void GraphCompressionContext::setup(const DistributedCompressedGraph &graph) { + constexpr int kRoot = 0; const MPI_Comm comm = graph.communicator(); - - const auto compression_ratios = mpi::allgather(graph.compression_ratio(), comm); - const auto size = static_cast(compression_ratios.size()); - avg_compression_ratio = std::reduce(compression_ratios.begin(), compression_ratios.end()) / size; - min_compression_ratio = *std::min_element(compression_ratios.begin(), compression_ratios.end()); - max_compression_ratio = *std::max_element(compression_ratios.begin(), compression_ratios.end()); - - const auto graph_sizes = mpi::allgather(graph.memory_space(), comm); - const auto largest_compressed_graph_it = std::max_element(graph_sizes.begin(), graph_sizes.end()); - largest_compressed_graph = *largest_compressed_graph_it; - - const auto largest_compressed_graph_rank = - std::distance(graph_sizes.begin(), largest_compressed_graph_it); - largest_compressed_graph_prev_size = - largest_compressed_graph * compression_ratios[largest_compressed_graph_rank]; + const int rank = mpi::get_comm_rank(comm); + + compressed_graph_sizes = + mpi::gather>(graph.memory_space(), kRoot, comm); + uncompressed_graph_sizes = mpi::gather>( + graph.uncompressed_memory_space(), kRoot, comm + ); + num_nodes = mpi::gather>(graph.n(), kRoot, comm); + num_edges = mpi::gather>(graph.m(), kRoot, comm); + + const auto compression_ratios = mpi::gather(graph.compression_ratio(), kRoot, comm); + if (rank == kRoot) { + const auto size = static_cast(compression_ratios.size()); + avg_compression_ratio = + std::reduce(compression_ratios.begin(), compression_ratios.end()) / size; + min_compression_ratio = *std::min_element(compression_ratios.begin(), compression_ratios.end()); + max_compression_ratio = *std::max_element(compression_ratios.begin(), compression_ratios.end()); + + const auto largest_compressed_graph_it = + std::max_element(compressed_graph_sizes.begin(), compressed_graph_sizes.end()); + largest_compressed_graph = *largest_compressed_graph_it; + + const auto largest_compressed_graph_rank = + std::distance(compressed_graph_sizes.begin(), largest_compressed_graph_it); + largest_compressed_graph_prev_size = + largest_compressed_graph * compression_ratios[largest_compressed_graph_rank]; + + const auto largest_uncompressed_graph_it = + std::max_element(uncompressed_graph_sizes.begin(), uncompressed_graph_sizes.end()); + largest_uncompressed_graph = *largest_uncompressed_graph_it; + + const auto largest_uncompressed_graph_rank = + std::distance(uncompressed_graph_sizes.begin(), largest_uncompressed_graph_it); + largest_uncompressed_graph_after_size = + largest_uncompressed_graph / compression_ratios[largest_uncompressed_graph_rank]; + } } } // namespace kaminpar::dist diff --git a/kaminpar-dist/context_io.cc b/kaminpar-dist/context_io.cc index 3c1b7088..8443233b 100644 --- a/kaminpar-dist/context_io.cc +++ b/kaminpar-dist/context_io.cc @@ -288,7 +288,7 @@ void print(const Context &ctx, const bool root, std::ostream &out, MPI_Comm comm out << " Simulate seq. hybrid exe.: " << (ctx.simulate_singlethread ? "yes" : "no") << "\n"; } cio::print_delimiter("Graph Compression", '-'); - print(ctx.compression, ctx.parallel, out); + print(ctx.compression, ctx.parallel, ctx.debug.print_compression_details, out); cio::print_delimiter("Coarsening", '-'); print(ctx.coarsening, ctx.parallel, out); cio::print_delimiter("Initial Partitioning", '-'); @@ -351,7 +351,12 @@ void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostre } } -void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, std::ostream &out) { +void print( + const GraphCompressionContext &ctx, + const ParallelContext ¶llel, + const bool print_compression_details, + std::ostream &out +) { using Compression = DistributedCompressedGraph::CompressedEdges; const auto round = [](const auto value) { @@ -396,6 +401,20 @@ void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, out << "Largest compressed graph: " << to_gib(ctx.largest_compressed_graph_prev_size) << " GiB -> " << to_gib(ctx.largest_compressed_graph) << " GiB\n"; + + out << "Largest uncompressed graph: " << to_gib(ctx.largest_uncompressed_graph) << " GiB -> " + << to_gib(ctx.largest_uncompressed_graph_after_size) << " GiB\n"; + + if (print_compression_details) { + out << "Local graph size reductions:\n"; + const std::size_t num_processes = ctx.compressed_graph_sizes.size(); + for (std::size_t num_process = 0; num_process < num_processes; ++num_process) { + out << " PE" << num_process << ": " << to_gib(ctx.uncompressed_graph_sizes[num_process]) + << " GiB -> " << to_gib(ctx.compressed_graph_sizes[num_process]) + << " GiB [n=" << ctx.num_nodes[num_process] << ", m=" << ctx.num_edges[num_process] + << "]\n"; + } + } } } diff --git a/kaminpar-dist/context_io.h b/kaminpar-dist/context_io.h index 14a1952a..354b547c 100644 --- a/kaminpar-dist/context_io.h +++ b/kaminpar-dist/context_io.h @@ -40,7 +40,12 @@ std::string get_balancing_algorithms_description(); void print(const Context &ctx, bool root, std::ostream &out, MPI_Comm comm); void print(const PartitionContext &ctx, bool root, std::ostream &out, MPI_Comm comm); void print(const ChunksContext &ctx, const ParallelContext ¶llel, std::ostream &out); -void print(const GraphCompressionContext &ctx, const ParallelContext ¶llel, std::ostream &out); +void print( + const GraphCompressionContext &ctx, + const ParallelContext ¶llel, + const bool print_compression_details, + std::ostream &out +); void print(const CoarseningContext &ctx, const ParallelContext ¶llel, std::ostream &out); void print(const InitialPartitioningContext &ctx, std::ostream &out); void print(const RefinementContext &ctx, const ParallelContext ¶llel, std::ostream &out); diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index 0c19cf40..ad986bc7 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -539,6 +539,20 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { return memory_space; } + [[nodiscard]] std::size_t uncompressed_memory_space() const { + std::size_t memory_space = (n() + 1) * sizeof(EdgeID) + m() * sizeof(NodeID); + + if (is_node_weighted()) { + memory_space += n() * sizeof(NodeWeight); + } + + if (is_edge_weighted()) { + memory_space += m() * sizeof(EdgeWeight); + } + + return memory_space; + } + // // Functions to access raw members of this graph // diff --git a/kaminpar-dist/dkaminpar.h b/kaminpar-dist/dkaminpar.h index fb8a34ac..fbc62546 100644 --- a/kaminpar-dist/dkaminpar.h +++ b/kaminpar-dist/dkaminpar.h @@ -311,9 +311,18 @@ struct GraphCompressionContext { double avg_compression_ratio; double min_compression_ratio; double max_compression_ratio; + std::size_t largest_compressed_graph; std::size_t largest_compressed_graph_prev_size; + std::size_t largest_uncompressed_graph; + std::size_t largest_uncompressed_graph_after_size; + + std::vector compressed_graph_sizes; + std::vector uncompressed_graph_sizes; + std::vector num_nodes; + std::vector num_edges; + /*! * Setups the graph compression statistics of this context. * @@ -341,6 +350,7 @@ struct DebugContext { std::string graph_filename; bool save_coarsest_graph; bool save_coarsest_partition; + bool print_compression_details; }; struct Context { diff --git a/kaminpar-dist/presets.cc b/kaminpar-dist/presets.cc index 703df218..a3cea6bc 100644 --- a/kaminpar-dist/presets.cc +++ b/kaminpar-dist/presets.cc @@ -222,6 +222,7 @@ Context create_default_context() { { .save_coarsest_graph = false, .save_coarsest_partition = false, + .print_compression_details = false, } }; } From a2fc7aa792c9f6b134fe3fb4346176f6b74f7f49 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 30 Jun 2024 18:09:20 +0200 Subject: [PATCH 17/54] feat(compressed-graph): apply gap encoding to edge weights --- .../datastructures/compressed_graph.h | 28 +++++++++++++++---- .../datastructures/compressed_graph_builder.h | 16 +++++++++-- .../datastructures/compressed_graph_test.cc | 4 +-- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index ac818c43..7f5e92cc 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -691,10 +691,13 @@ class CompressedGraph : public AbstractGraph { Lambda &&l ) const { const EdgeID max_edge = edge + degree; + EdgeWeight prev_edge_weight = 0; if constexpr (kIntervalEncoding) { if (uses_intervals) { - const bool stop = decode_intervals(data, edge, std::forward(l)); + const bool stop = decode_intervals( + data, edge, prev_edge_weight, std::forward(l) + ); if (stop) { return true; } @@ -705,11 +708,15 @@ class CompressedGraph : public AbstractGraph { } } - return decode_gaps(data, node, edge, max_edge, std::forward(l)); + return decode_gaps( + data, node, edge, prev_edge_weight, max_edge, std::forward(l) + ); } template - bool decode_intervals(const std::uint8_t *&data, EdgeID &edge, Lambda &&l) const { + bool decode_intervals( + const std::uint8_t *&data, EdgeID &edge, EdgeWeight &prev_edge_weight, Lambda &&l + ) const { using LambdaReturnType = std::conditional_t< kHasEdgeWeights, std::invoke_result, @@ -718,9 +725,11 @@ class CompressedGraph : public AbstractGraph { const auto invoke_caller = [&](const NodeID adjacent_node) { if constexpr (kHasEdgeWeights) { - const auto [edge_weight, length] = signed_varint_decode(data); + const auto [edge_weight_gap, length] = signed_varint_decode(data); data += length; + const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; + prev_edge_weight = edge_weight; return l(edge, adjacent_node, edge_weight); } else { return l(edge, adjacent_node); @@ -761,7 +770,12 @@ class CompressedGraph : public AbstractGraph { template bool decode_gaps( - const std::uint8_t *data, NodeID node, EdgeID &edge, const EdgeID max_edge, Lambda &&l + const std::uint8_t *data, + NodeID node, + EdgeID &edge, + EdgeWeight &prev_edge_weight, + const EdgeID max_edge, + Lambda &&l ) const { using LambdaReturnType = std::conditional_t< kHasEdgeWeights, @@ -771,9 +785,11 @@ class CompressedGraph : public AbstractGraph { const auto invoke_caller = [&](const NodeID adjacent_node) { if constexpr (kHasEdgeWeights) { - const auto [edge_weight, length] = signed_varint_decode(data); + const auto [edge_weight_gap, length] = signed_varint_decode(data); data += length; + const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; + prev_edge_weight = edge_weight; return l(edge, adjacent_node, edge_weight); } else { return l(edge, adjacent_node); diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.h b/kaminpar-shm/datastructures/compressed_graph_builder.h index 6f5bc84e..94bddac0 100644 --- a/kaminpar-shm/datastructures/compressed_graph_builder.h +++ b/kaminpar-shm/datastructures/compressed_graph_builder.h @@ -222,6 +222,7 @@ class CompressedEdgesBuilder { }; NodeID local_degree = neighbourhood.size(); + EdgeWeight prev_edge_weight = 0; // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at // least kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i @@ -272,7 +273,10 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[k].second; - _compressed_data += signed_varint_encode(edge_weight, _compressed_data); + const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; + _compressed_data += signed_varint_encode(edge_weight_gap, _compressed_data); + + prev_edge_weight = edge_weight; _total_edge_weight += edge_weight; } } @@ -338,7 +342,10 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight first_edge_weight = neighbourhood[i].second; - _compressed_data += signed_varint_encode(first_edge_weight, _compressed_data); + const EdgeWeight first_edge_weight_gap = first_edge_weight - prev_edge_weight; + _compressed_data += signed_varint_encode(first_edge_weight_gap, _compressed_data); + + prev_edge_weight = first_edge_weight; _total_edge_weight += first_edge_weight; } } @@ -372,7 +379,10 @@ class CompressedEdgesBuilder { if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[i].second; - _compressed_data += signed_varint_encode(edge_weight, _compressed_data); + const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; + _compressed_data += signed_varint_encode(edge_weight_gap, _compressed_data); + + prev_edge_weight = edge_weight; _total_edge_weight += edge_weight; } } diff --git a/tests/shm/datastructures/compressed_graph_test.cc b/tests/shm/datastructures/compressed_graph_test.cc index 1930d617..e5ebd2e0 100644 --- a/tests/shm/datastructures/compressed_graph_test.cc +++ b/tests/shm/datastructures/compressed_graph_test.cc @@ -17,9 +17,7 @@ test_function(make_complete_bipartite_graph(100, 100)); \ test_function(make_complete_graph(100)); \ test_function(make_matching_graph(100)); \ - test_function(make_star_graph(HIGH_DEGREE_NUM)); - -#define TEST_ON_WEIGHTED_GRAPHS(test_function) \ + test_function(make_star_graph(HIGH_DEGREE_NUM)); \ test_function(make_complete_graph(100, [](const NodeID u, const NodeID v) { \ return static_cast(u + v); \ })); \ From 0518975a66b178360adc469cd97b86c0c0229fba Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 6 Jul 2024 12:35:39 +0200 Subject: [PATCH 18/54] feat(compressed-graph): print memory space for adjacent nodes and edge weights separately when debugging --- .../compressed_graph_builder.cc | 30 +++++-- .../datastructures/compressed_graph_builder.h | 86 +++++++++++++++---- 2 files changed, 95 insertions(+), 21 deletions(-) diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.cc b/kaminpar-shm/datastructures/compressed_graph_builder.cc index e58ae71a..74b1bf46 100644 --- a/kaminpar-shm/datastructures/compressed_graph_builder.cc +++ b/kaminpar-shm/datastructures/compressed_graph_builder.cc @@ -17,14 +17,16 @@ #include "kaminpar-shm/kaminpar.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/varint_codec.h" namespace kaminpar::shm { namespace { template -[[nodiscard]] std::size_t -compressed_edge_array_max_size(const NodeID num_nodes, const EdgeID num_edges) { +[[nodiscard]] std::size_t compressed_edge_array_max_size( + const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights +) { std::size_t edge_id_width; if constexpr (kActualNumEdges) { if constexpr (CompressedGraph::kIntervalEncoding) { @@ -48,6 +50,10 @@ compressed_edge_array_max_size(const NodeID num_nodes, const EdgeID num_edges) { max_size += (num_edges / CompressedGraph::kHighDegreePartLength) * varint_max_length(); } + if (has_edge_weights) { + max_size += num_edges * varint_max_length(); + } + return max_size; } @@ -57,7 +63,8 @@ CompressedEdgesBuilder::CompressedEdgesBuilder( const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights ) : _has_edge_weights(has_edge_weights) { - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); _compressed_data_start = heap_profiler::overcommit_memory(max_size); _compressed_data = _compressed_data_start.get(); _compressed_data_max_size = 0; @@ -67,7 +74,8 @@ CompressedEdgesBuilder::CompressedEdgesBuilder( const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights ) : _has_edge_weights(has_edge_weights) { - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, max_degree); + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, max_degree, has_edge_weights); _compressed_data_start = heap_profiler::overcommit_memory(max_size); _compressed_data = _compressed_data_start.get(); _compressed_data_max_size = 0; @@ -140,6 +148,14 @@ std::size_t CompressedEdgesBuilder::num_intervals() const { return _num_intervals; } +std::size_t CompressedEdgesBuilder::num_adjacent_node_bytes() const { + return _num_adjacent_node_bytes; +} + +std::size_t CompressedEdgesBuilder::num_edge_weights_bytes() const { + return _num_edge_weights_bytes; +} + CompressedGraph CompressedGraphBuilder::compress(const CSRGraph &graph) { const bool store_node_weights = graph.is_node_weighted(); const bool store_edge_weights = graph.is_edge_weighted(); @@ -177,7 +193,8 @@ CompressedGraphBuilder::CompressedGraphBuilder( : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights), _store_edge_weights(has_edge_weights) { KASSERT(num_nodes < std::numeric_limits::max() - 1); - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); _nodes.resize(math::byte_width(max_size), num_nodes + 1); _sorted = sorted; @@ -304,7 +321,8 @@ ParallelCompressedGraphBuilder::ParallelCompressedGraphBuilder( const bool sorted ) { KASSERT(num_nodes != std::numeric_limits::max() - 1); - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); _nodes.resize(math::byte_width(max_size), num_nodes + 1); _sorted = sorted; diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.h b/kaminpar-shm/datastructures/compressed_graph_builder.h index 94bddac0..d34f183c 100644 --- a/kaminpar-shm/datastructures/compressed_graph_builder.h +++ b/kaminpar-shm/datastructures/compressed_graph_builder.h @@ -113,6 +113,9 @@ class CompressedEdgesBuilder { [[nodiscard]] std::size_t num_interval_nodes() const; [[nodiscard]] std::size_t num_intervals() const; + [[nodiscard]] std::size_t num_adjacent_node_bytes() const; + [[nodiscard]] std::size_t num_edge_weights_bytes() const; + private: heap_profiler::unique_ptr _compressed_data_start; std::uint8_t *_compressed_data; @@ -130,6 +133,10 @@ class CompressedEdgesBuilder { std::size_t _num_interval_nodes; std::size_t _num_intervals; + // Debug graph compression statistics + std::size_t _num_adjacent_node_bytes; + std::size_t _num_edge_weights_bytes; + template EdgeID add_node(const NodeID node, Container &neighbourhood) { // The offset into the compressed edge array to the start of the neighbourhood. const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); @@ -260,8 +267,15 @@ class CompressedEdgesBuilder { const NodeID interval_length_gap = interval_len - CompressedGraph::kIntervalLengthTreshold; - _compressed_data += varint_encode(left_extreme_gap, _compressed_data); - _compressed_data += varint_encode(interval_length_gap, _compressed_data); + const std::size_t left_extreme_gap_len = + varint_encode(left_extreme_gap, _compressed_data); + _compressed_data += left_extreme_gap_len; + IF_DBG _num_adjacent_node_bytes += left_extreme_gap_len; + + const std::size_t interval_length_gap_len = + varint_encode(interval_length_gap, _compressed_data); + _compressed_data += interval_length_gap_len; + IF_DBG _num_adjacent_node_bytes += interval_length_gap_len; for (NodeID j = 0; j < interval_len; ++j) { const NodeID k = i + 1 + j - interval_len; @@ -274,7 +288,11 @@ class CompressedEdgesBuilder { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[k].second; const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; - _compressed_data += signed_varint_encode(edge_weight_gap, _compressed_data); + + const std::size_t edge_weight_gap_len = + signed_varint_encode(edge_weight_gap, _compressed_data); + _compressed_data += edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; prev_edge_weight = edge_weight; _total_edge_weight += edge_weight; @@ -302,9 +320,11 @@ class CompressedEdgesBuilder { // intervals have been encoded. if (marked_byte == nullptr) { *((NodeID *)interval_count_ptr) = interval_count; + _num_adjacent_node_bytes += sizeof(NodeID); } else if (interval_count > 0) { *((NodeID *)interval_count_ptr) = interval_count; *marked_byte |= 0b01000000; + _num_adjacent_node_bytes += sizeof(NodeID); } else { _compressed_data -= sizeof(NodeID); } @@ -337,13 +357,20 @@ class CompressedEdgesBuilder { const NodeID first_adjacent_node = fetch_adjacent_node(i); const SignedID first_gap = first_adjacent_node - static_cast(node); - _compressed_data += signed_varint_encode(first_gap, _compressed_data); + + const std::size_t first_gap_len = signed_varint_encode(first_gap, _compressed_data); + _compressed_data += first_gap_len; + IF_DBG _num_adjacent_node_bytes += first_gap_len; if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight first_edge_weight = neighbourhood[i].second; const EdgeWeight first_edge_weight_gap = first_edge_weight - prev_edge_weight; - _compressed_data += signed_varint_encode(first_edge_weight_gap, _compressed_data); + + const std::size_t first_edge_weight_gap_len = + signed_varint_encode(first_edge_weight_gap, _compressed_data); + _compressed_data += first_edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += first_edge_weight_gap_len; prev_edge_weight = first_edge_weight; _total_edge_weight += first_edge_weight; @@ -369,18 +396,28 @@ class CompressedEdgesBuilder { const NodeID gap = adjacent_node - prev_adjacent_node - 1; if constexpr (CompressedGraph::kRunLengthEncoding) { - _compressed_data += rl_encoder.add(gap); + const std::size_t gap_len = rl_encoder.add(gap); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } else if constexpr (CompressedGraph::kStreamEncoding) { - _compressed_data += sv_encoder.add(gap); + const std::size_t gap_len = sv_encoder.add(gap); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } else { - _compressed_data += varint_encode(gap, _compressed_data); + const std::size_t gap_len = varint_encode(gap, _compressed_data); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } if constexpr (kHasEdgeWeights) { if (_has_edge_weights) { const EdgeWeight edge_weight = neighbourhood[i].second; const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; - _compressed_data += signed_varint_encode(edge_weight_gap, _compressed_data); + + const std::size_t edge_weight_gap_len = + signed_varint_encode(edge_weight_gap, _compressed_data); + _compressed_data += edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; prev_edge_weight = edge_weight; _total_edge_weight += edge_weight; @@ -667,19 +704,19 @@ template decltype(auto) scoped_time(auto &elapsed, Lambda &&l) } } -void print_stats(const auto &stats_ets) { +void print_graph_compression_stats(const auto &stats_ets) { DBG << "Chunk distribution:"; std::size_t cur_thread = 0; for (const auto &stats : stats_ets) { - DBG << "t" << ++cur_thread << ": " << stats.num_chunks; + DBG << " t" << ++cur_thread << ": " << stats.num_chunks; } DBG << "Edge distribution:"; cur_thread = 0; for (const auto &stats : stats_ets) { - DBG << "t" << ++cur_thread << ": " << stats.num_edges; + DBG << " t" << ++cur_thread << ": " << stats.num_edges; } DBG << "Time distribution: (compression, sync, copy) [s]"; @@ -698,14 +735,32 @@ void print_stats(const auto &stats_ets) { total_time_sync += stats.sync_time; total_time_copy += stats.copy_time; - DBG << "t" << ++cur_thread << ": " << to_sec(stats.compression_time) << ' ' + DBG << " t" << ++cur_thread << ": " << to_sec(stats.compression_time) << ' ' << to_sec(stats.sync_time) << ' ' << to_sec(stats.copy_time); } - DBG << "sum: " << to_sec(total_time_compression) << ' ' << to_sec(total_time_sync) << ' ' + DBG << " sum: " << to_sec(total_time_compression) << ' ' << to_sec(total_time_sync) << ' ' << to_sec(total_time_copy); } +void print_compressed_graph_stats(const auto &stats_ets) { + std::size_t _total_adjacent_nodes_num_bytes = 0; + std::size_t _total_edge_weights_num_bytes = 0; + + for (const auto &neighbourhood_builder : stats_ets) { + _total_adjacent_nodes_num_bytes += neighbourhood_builder.num_adjacent_node_bytes(); + _total_edge_weights_num_bytes += neighbourhood_builder.num_edge_weights_bytes(); + } + + const auto to_mb = [](const auto num_bytes) { + return num_bytes / static_cast(1024 * 1024); + }; + + DBG << "Compressed adjacent nodes memory space: " << to_mb(_total_adjacent_nodes_num_bytes) + << " MiB"; + DBG << "Compressed edge weights memory space: " << to_mb(_total_edge_weights_num_bytes) << " MiB"; +} + } // namespace debug namespace { @@ -892,7 +947,8 @@ CompressedGraph compute_compressed_graph( }); }); - IF_DBG debug::print_stats(dbg_ets); + IF_DBG debug::print_graph_compression_stats(dbg_ets); + IF_DBG debug::print_compressed_graph_stats(neighbourhood_builder_ets); return builder.build(); } From 7073c9f5f15055a847f110d880af5c6314ed8ede Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sat, 6 Jul 2024 12:56:10 +0200 Subject: [PATCH 19/54] feat(kaminpar-shm): add tool to assign weights to graphs --- apps/io/metis_parser.cc | 2 +- apps/io/parhip_parser.cc | 103 +++++++- apps/io/parhip_parser.h | 8 + apps/tools/CMakeLists.txt | 1 + apps/tools/shm_graph_attach_weights_tool.cc | 265 ++++++++++++++++++++ 5 files changed, 367 insertions(+), 12 deletions(-) create mode 100644 apps/tools/shm_graph_attach_weights_tool.cc diff --git a/apps/io/metis_parser.cc b/apps/io/metis_parser.cc index 9d9d1247..55a3dc91 100644 --- a/apps/io/metis_parser.cc +++ b/apps/io/metis_parser.cc @@ -322,7 +322,7 @@ void write(const std::string &filename, const Graph &graph) { out << graph.node_weight(node) << ' '; } - graph.neighbors(node, [&](const NodeID adjacent_node, const EdgeWeight weight) { + graph.adjacent_nodes(node, [&](const NodeID adjacent_node, const EdgeWeight weight) { out << (adjacent_node + 1) << ' '; if (graph.is_edge_weighted()) { diff --git a/apps/io/parhip_parser.cc b/apps/io/parhip_parser.cc index 9159d38a..8980c458 100644 --- a/apps/io/parhip_parser.cc +++ b/apps/io/parhip_parser.cc @@ -23,10 +23,8 @@ #include "kaminpar-shm/graphutils/permutator.h" #include "kaminpar-shm/kaminpar.h" -#include "kaminpar-common/datastructures/concurrent_circular_vector.h" #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/logger.h" -#include "kaminpar-common/parallel/loops.h" #include "kaminpar-common/timer.h" namespace { @@ -74,8 +72,8 @@ class BinaryReader { return *reinterpret_cast(_data + position); } - template [[nodiscard]] T *fetch(std::size_t position) const { - return reinterpret_cast(_data + position); + template [[nodiscard]] const T *fetch(std::size_t position) const { + return reinterpret_cast(_data + position); } private: @@ -84,6 +82,28 @@ class BinaryReader { std::uint8_t *_data; }; +class BinaryWriter { +public: + BinaryWriter(const std::string &filename) : _out(filename, std::ios::binary) {} + + void write(const char *data, const std::size_t size) { + _out.write(data, size); + } + + template void write_int(const T value) { + _out.write(reinterpret_cast(&value), sizeof(T)); + } + + template void write_static_array(const kaminpar::StaticArray &static_array) { + const char *data = reinterpret_cast(static_array.data()); + const std::size_t size = static_array.size() * sizeof(T); + write(data, size); + } + +private: + std::ofstream _out; +}; + class ParhipHeader { using CompressedGraph = kaminpar::shm::CompressedGraph; using NodeID = CompressedGraph::NodeID; @@ -94,6 +114,25 @@ class ParhipHeader { public: static constexpr std::uint64_t kSize = 3 * sizeof(std::uint64_t); + [[nodiscard]] static std::uint64_t version( + const bool has_edge_weights, + const bool has_node_weights, + const bool has_64_bit_edge_id = sizeof(EdgeID) == 8, + const bool has_64_bit_node_id = sizeof(NodeID) == 8, + const bool has_64_bit_node_weight = sizeof(NodeWeight) == 8, + const bool has_64_bit_edge_weight = sizeof(EdgeWeight) == 8 + ) { + const auto make_flag = [&](const bool flag, const std::uint64_t shift) { + return static_cast(flag ? 0 : 1) << shift; + }; + + const std::uint64_t version = + make_flag(has_64_bit_edge_weight, 5) | make_flag(has_64_bit_node_weight, 4) | + make_flag(has_64_bit_node_id, 3) | make_flag(has_64_bit_edge_id, 2) | + make_flag(has_node_weights, 1) | make_flag(has_edge_weights, 0); + return version; + } + bool has_edge_weights; bool has_node_weights; bool has_64_bit_edge_id; @@ -136,12 +175,12 @@ class ParhipHeader { if (has_64_bit_node_weight) { if (sizeof(NodeWeight) != 8) { - LOG_ERROR << "The stored graph uses 64-Bit node weights but this build uses 32-Bit node" + LOG_ERROR << "The stored graph uses 64-Bit node weights but this build uses 32-Bit node " "weights."; std::exit(1); } } else if (sizeof(NodeWeight) != 4) { - LOG_ERROR << "The stored graph uses 32-Bit node weights but this build uses 64-Bit node" + LOG_ERROR << "The stored graph uses 32-Bit node weights but this build uses 64-Bit node " "weights."; std::exit(1); } @@ -153,7 +192,7 @@ class ParhipHeader { std::exit(1); } } else if (sizeof(EdgeWeight) != 4) { - LOG_ERROR << "The stored graph uses 32-Bit edge weights but this build uses 64-Bit edge" + LOG_ERROR << "The stored graph uses 32-Bit edge weights but this build uses 64-Bit edge " "weights."; std::exit(1); } @@ -227,10 +266,12 @@ CompressedGraph compressed_read(const std::string &filename, const bool sorted) position += (header.num_nodes + 1) * sizeof(EdgeID); const NodeID *edges = reader.fetch(position); - position += header.num_edges + sizeof(NodeID); + position += header.num_edges * sizeof(NodeID); const NodeWeight *node_weights = reader.fetch(position); - position += header.num_nodes + sizeof(NodeWeight); + if (header.has_node_weights) { + position += header.num_nodes * sizeof(NodeWeight); + } const EdgeWeight *edge_weights = reader.fetch(position); @@ -291,10 +332,12 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node position += (header.num_nodes + 1) * sizeof(EdgeID); const NodeID *edges = reader.fetch(position); - position += header.num_edges + sizeof(NodeID); + position += header.num_edges * sizeof(NodeID); const NodeWeight *node_weights = reader.fetch(position); - position += header.num_nodes + sizeof(NodeWeight); + if (header.has_node_weights) { + position += header.num_nodes * sizeof(NodeWeight); + } const EdgeWeight *edge_weights = reader.fetch(position); @@ -355,4 +398,42 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node } } +void write(const std::string &filename, const CSRGraph &graph) { + BinaryWriter writer(filename); + + const bool has_node_weights = graph.is_node_weighted(); + const bool has_edge_weights = graph.is_edge_weighted(); + + const std::uint64_t version = ParhipHeader::version(has_edge_weights, has_node_weights); + writer.write_int(version); + + const std::uint64_t num_nodes = graph.n(); + writer.write_int(num_nodes); + + const std::uint64_t num_edges = graph.m(); + writer.write_int(num_edges); + + const NodeID num_total_nodes = num_nodes + 1; + const EdgeID nodes_offset_base = ParhipHeader::kSize + num_total_nodes * sizeof(EdgeID); + const StaticArray &nodes = graph.raw_nodes(); + + StaticArray raw_nodes(num_total_nodes, static_array::noinit); + tbb::parallel_for(tbb::blocked_range(0, num_total_nodes), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + raw_nodes[u] = nodes_offset_base + nodes[u] * sizeof(NodeID); + } + }); + + writer.write_static_array(raw_nodes); + writer.write_static_array(graph.raw_edges()); + + if (has_node_weights) { + writer.write_static_array(graph.raw_node_weights()); + } + + if (has_edge_weights) { + writer.write_static_array(graph.raw_edge_weights()); + } +} + } // namespace kaminpar::shm::io::parhip diff --git a/apps/io/parhip_parser.h b/apps/io/parhip_parser.h index 79ddbb96..aa1ccca9 100644 --- a/apps/io/parhip_parser.h +++ b/apps/io/parhip_parser.h @@ -42,4 +42,12 @@ CompressedGraph compressed_read(const std::string &filename, const bool sorted); */ CompressedGraph compressed_read_parallel(const std::string &filename, const NodeOrdering ordering); +/*! + * Writes a graph to a file in ParHIP format. + * + * @param filename The name of the file in which to store the graph. + * @param graph The graph to store. + */ +void write(const std::string &filename, const CSRGraph &graph); + } // namespace kaminpar::shm::io::parhip diff --git a/apps/tools/CMakeLists.txt b/apps/tools/CMakeLists.txt index 08f0c5ab..3143359e 100644 --- a/apps/tools/CMakeLists.txt +++ b/apps/tools/CMakeLists.txt @@ -5,6 +5,7 @@ function(add_shm_tool target) endfunction() # Shared-memory tools +add_shm_tool(shm_graph_attach_weights_tool shm_graph_attach_weights_tool.cc) add_shm_tool(shm_graph_compression_tool shm_graph_compression_tool.cc) add_shm_tool(shm_graph_properties_tool shm_graph_properties_tool.cc) add_shm_tool(shm_graph_rearrangement_tool shm_graph_rearrangement_tool.cc) diff --git a/apps/tools/shm_graph_attach_weights_tool.cc b/apps/tools/shm_graph_attach_weights_tool.cc new file mode 100644 index 00000000..768dad8f --- /dev/null +++ b/apps/tools/shm_graph_attach_weights_tool.cc @@ -0,0 +1,265 @@ +/******************************************************************************* + * Tool for assigning random weights based on different distributions to graphs + * for the shared-memory algorithm. + * + * @file: shm_graph_attach_weights_tool.cc + * @author: Daniel Salwasser + * @date: 30.06.2024 + ******************************************************************************/ +// clang-format off +#include +// clang-format on + +#include +#include + +#include +#include + +#include "kaminpar-shm/datastructures/graph.h" +#include "kaminpar-shm/kaminpar.h" + +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/logger.h" +#include "kaminpar-common/parallel/loops.h" + +#include "apps/io/metis_parser.h" +#include "apps/io/parhip_parser.h" +#include "apps/io/shm_io.h" + +using namespace kaminpar; +using namespace kaminpar::shm; +using namespace kaminpar::shm::io; + +namespace { + +enum class WeightDistribution { + UNIFORM, + ALTERNATING +}; + +[[nodiscard]] std::unordered_map get_weight_distributions() { + return { + {"uniform", WeightDistribution::UNIFORM}, + {"alternating", WeightDistribution::ALTERNATING}, + }; +} + +struct EdgeHasher { + using Edge = std::pair; + + [[nodiscard]] std::size_t operator()(const Edge &edge) const noexcept { + return edge.first ^ (edge.second << 1); + } + + [[nodiscard]] std::size_t hash(const Edge &edge) const noexcept { + return edge.first ^ (edge.second << 1); + } + + [[nodiscard]] bool equal(const Edge &a, const Edge &b) const noexcept { + return a == b; + } +}; + +template +[[nodiscard]] StaticArray +generate_edge_weights(const CSRGraph &graph, Lambda &&edge_weight_generator_factory) { + StaticArray edge_weights(graph.m(), static_array::noinit); + + using Edge = std::pair; + using ConcurrentHashMap = tbb::concurrent_hash_map; + ConcurrentHashMap edge_weights_map(graph.m() / 2); + + parallel::deterministic_for( + 0, + graph.n(), + [&](const NodeID from, const NodeID to, const int cpu) { + edge_weight_generator_factory(cpu, [&](auto &&edge_weight_generator) { + for (NodeID u = from; u < to; ++u) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + if (u <= v) { + const EdgeWeight w = edge_weight_generator(e, u, v); + edge_weights[e] = w; + + typename ConcurrentHashMap::accessor entry; + edge_weights_map.insert(entry, std::make_pair(u, v)); + entry->second = w; + } + }); + } + }); + } + ); + + tbb::parallel_for(tbb::blocked_range(0, graph.n()), [&](const auto &r) { + for (NodeID u = r.begin(); u != r.end(); ++u) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + if (u > v) { + typename ConcurrentHashMap::const_accessor entry; + edge_weights_map.find(entry, std::make_pair(v, u)); + + const EdgeWeight w = entry->second; + edge_weights[e] = w; + } + }); + } + }); + + return edge_weights; +} + +[[nodiscard]] StaticArray generate_uniform_edge_weights( + const CSRGraph &graph, const int seed, const EdgeWeight min, const EdgeWeight max +) { + return generate_edge_weights(graph, [&](const int cpu, auto &&edge_weight_fetcher) { + const int local_seed = seed + cpu; + std::mt19937 gen(local_seed); + std::uniform_int_distribution dist(min, max); + + edge_weight_fetcher([&](const EdgeID, const NodeID, const NodeID) { + const EdgeWeight weight = dist(gen); + return weight; + }); + }); +} + +[[nodiscard]] StaticArray generate_alternating_edge_weights( + const CSRGraph &graph, + const int seed, + const EdgeWeight min_small_weights, + const EdgeWeight max_small_weights, + const EdgeWeight min_large_weights, + const EdgeWeight max_large_weights +) { + return generate_edge_weights(graph, [&](const int cpu, auto &&edge_weight_fetcher) { + const int local_seed = seed + cpu; + std::mt19937 gen(local_seed); + std::uniform_int_distribution small_dist(min_small_weights, max_small_weights); + std::uniform_int_distribution large_dist(min_large_weights, max_large_weights); + + edge_weight_fetcher([&](const EdgeID e, const NodeID, const NodeID) { + const bool is_small_weight = (e % 2) == 0; + + if (is_small_weight) { + const EdgeWeight weight = small_dist(gen); + return weight; + } else { + const EdgeWeight weight = large_dist(gen); + return weight; + } + }); + }); +} + +}; // namespace + +int main(int argc, char *argv[]) { + CLI::App app("Shared-memory graph attach-weights tool"); + + std::string graph_filename; + GraphFileFormat graph_file_format = io::GraphFileFormat::METIS; + app.add_option("-G,--graph", graph_filename, "Input graph in METIS/ParHIP format")->required(); + app.add_option("-f,--graph-file-format", graph_file_format) + ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) + ->description(R"(Graph file format of the input graph: + - metis + - parhip)") + ->capture_default_str(); + + std::string weighted_graph_filename; + GraphFileFormat weighted_graph_file_format = io::GraphFileFormat::METIS; + app.add_option("--out", weighted_graph_filename, "Ouput file for storing the weighted graph") + ->required(); + app.add_option("--out-f,--out-graph-file-format", weighted_graph_file_format) + ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) + ->description(R"(Graph file format used for storing the weighted graph: + - metis + - parhip)"); + + int seed = 1; + int num_threads = 1; + app.add_option("-s,--seed", seed, "Seed for random number generation.")->capture_default_str(); + app.add_option("-t,--threads", num_threads, "Number of threads")->capture_default_str(); + + WeightDistribution distribution; + app.add_option("-d,--distribution", distribution) + ->transform(CLI::CheckedTransformer(get_weight_distributions()).description("")) + ->description(R"(Distribution used for generating edge weights: + - uniform + - alternating)") + ->required() + ->capture_default_str(); + + EdgeWeight uniform_min_weight = 1; + EdgeWeight uniform_max_weight = 32768; + auto *uniform_group = app.add_option_group("Uniform Distribution"); + uniform_group->add_option("--u-min", uniform_min_weight, "Minimum weight value.") + ->capture_default_str(); + uniform_group->add_option("--u-max", uniform_max_weight, "Maximum weight value.") + ->capture_default_str(); + + EdgeWeight alt_min_small_weights = 1; + EdgeWeight alt_max_small_weights = 128; + EdgeWeight alt_min_large_weights = 32768; + EdgeWeight alt_max_large_weights = 8388608; + auto *alt_group = app.add_option_group("Uniform Distribution"); + alt_group + ->add_option("--a-min-small", alt_min_small_weights, "Minimum weight value of small weights.") + ->capture_default_str(); + alt_group + ->add_option("--a-max-small", alt_max_small_weights, "Maximum weight value of small weights.") + ->capture_default_str(); + alt_group + ->add_option("--a-min-large", alt_min_large_weights, "Minimum weight value of large weights.") + ->capture_default_str(); + alt_group + ->add_option("--a-max-large", alt_max_large_weights, "Maximum weight value of large weights.") + ->capture_default_str(); + + CLI11_PARSE(app, argc, argv); + + tbb::global_control gc(tbb::global_control::max_allowed_parallelism, num_threads); + + LOG << "Reading input graph..."; + Graph graph = io::read(graph_filename, graph_file_format, false, false, NodeOrdering::NATURAL); + CSRGraph &csr_graph = graph.csr_graph(); + + LOG << "Generating edge weights..."; + StaticArray edge_weights = [&] { + switch (distribution) { + case WeightDistribution::UNIFORM: + return generate_uniform_edge_weights(csr_graph, seed, uniform_min_weight, uniform_max_weight); + case WeightDistribution::ALTERNATING: + return generate_alternating_edge_weights( + csr_graph, + seed, + alt_min_small_weights, + alt_max_small_weights, + alt_min_large_weights, + alt_max_large_weights + ); + default: + __builtin_unreachable(); + } + }(); + + Graph weighted_graph(std::make_unique( + csr_graph.take_raw_nodes(), + csr_graph.take_raw_edges(), + csr_graph.take_raw_node_weights(), + std::move(edge_weights) + )); + + LOG << "Writing weighted graph..."; + switch (weighted_graph_file_format) { + case GraphFileFormat::METIS: + io::metis::write(weighted_graph_filename, weighted_graph); + break; + case GraphFileFormat::PARHIP: + io::parhip::write(weighted_graph_filename, weighted_graph.csr_graph()); + break; + } + + LOG << "Finished!"; + return EXIT_SUCCESS; +} From ace878685c90f9fc2b139f18ec92015cab55ea1b Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Sun, 7 Jul 2024 15:35:04 +0200 Subject: [PATCH 20/54] feat(compressed-graph): unify shared-memory and distributed graph compression --- apps/CMakeLists.txt | 1 + .../shm_compressed_graph_benchmark.cc | 4 +- apps/benchmarks/shm_input_benchmark.cc | 7 +- apps/io/binary_util.h | 100 ++ apps/io/dist_metis_parser.cc | 16 +- apps/io/dist_parhip_parser.cc | 141 ++- apps/io/metis_parser.cc | 2 +- apps/io/parhip_parser.cc | 99 +- apps/io/shm_compressed_graph_binary.cc | 20 +- .../graph-compression/compressed_edges.h | 425 ------- .../compressed_edges_builder.h | 458 +++++--- .../compressed_neighborhoods.h | 721 ++++++++++++ .../compressed_neighborhoods_builder.h | 286 +++++ .../clustering/hem/hem_clusterer.cc | 13 +- .../contraction/global_cluster_contraction.cc | 8 +- .../contraction/local_cluster_contraction.cc | 4 +- kaminpar-dist/context.cc | 1 + kaminpar-dist/context_io.cc | 2 +- .../abstract_distributed_graph.h | 2 - .../distributed_compressed_graph.cc | 11 +- .../distributed_compressed_graph.h | 102 +- .../distributed_compressed_graph_builder.cc | 157 --- .../distributed_compressed_graph_builder.h | 70 -- .../datastructures/distributed_csr_graph.h | 151 ++- .../datastructures/distributed_graph.cc | 8 +- .../datastructures/distributed_graph.h | 45 +- .../distributed_partitioned_graph.h | 1 - kaminpar-dist/debug.cc | 4 +- kaminpar-dist/distributed_label_propagation.h | 6 +- kaminpar-dist/graphutils/bfs_extractor.cc | 29 +- kaminpar-dist/graphutils/communication.h | 20 +- kaminpar-dist/graphutils/replicator.cc | 24 +- .../graphutils/subgraph_extractor.cc | 4 +- .../mtkahypar_initial_partitioner.cc | 4 +- kaminpar-dist/metrics.cc | 4 +- .../refinement/adapters/mtkahypar_refiner.cc | 5 +- kaminpar-dist/refinement/balancer/clusters.cc | 36 +- kaminpar-dist/refinement/balancer/clusters.h | 4 +- kaminpar-dist/refinement/gain_calculator.h | 6 +- kaminpar-dist/refinement/jet/jet_refiner.cc | 25 +- kaminpar-dist/refinement/lp/clp_refiner.cc | 7 +- .../clustering/legacy_lp_clusterer.cc | 8 +- .../coarsening/clustering/lp_clusterer.cc | 63 +- .../buffered_cluster_contraction.cc | 2 +- .../cluster_contraction_preprocessing.cc | 1 + .../legacy_buffered_cluster_contraction.cc | 2 +- .../naive_unbuffered_cluster_contraction.cc | 3 +- kaminpar-shm/datastructures/abstract_graph.h | 26 +- .../datastructures/compressed_graph.cc | 45 +- .../datastructures/compressed_graph.h | 703 +++--------- .../compressed_graph_builder.cc | 445 -------- .../datastructures/compressed_graph_builder.h | 1011 ----------------- kaminpar-shm/datastructures/csr_graph.cc | 182 ++- kaminpar-shm/datastructures/csr_graph.h | 423 +++---- kaminpar-shm/datastructures/graph.cc | 8 +- kaminpar-shm/datastructures/graph.h | 209 ++-- kaminpar-shm/datastructures/graph_delegate.h | 110 +- .../graphutils/compressed_graph_builder.cc | 92 ++ .../graphutils/compressed_graph_builder.h | 112 ++ .../parallel_compressed_graph_builder.cc | 28 + .../parallel_compressed_graph_builder.h | 366 ++++++ kaminpar-shm/graphutils/permutator.cc | 19 +- .../initial_bfs_bipartitioner.cc | 7 +- .../initial_partitioning/initial_coarsener.cc | 12 +- .../initial_fm_refiner.cc | 18 +- .../initial_ggg_bipartitioner.cc | 16 +- .../initial_partitioning/seed_node_utils.cc | 10 +- kaminpar-shm/kaminpar.cc | 4 +- kaminpar-shm/legacy_label_propagation.h | 4 +- kaminpar-shm/metrics.h | 1 + .../partitioning/deep/deep_multilevel.cc | 5 - .../partitioning/kway/kway_multilevel.cc | 5 - kaminpar-shm/refinement/fm/fm_batch_stats.cc | 4 +- kaminpar-shm/refinement/fm/fm_refiner.cc | 4 +- .../refinement/gains/on_the_fly_gain_cache.h | 10 +- .../refinement/lp/legacy_lp_refiner.cc | 4 +- kaminpar-shm/refinement/lp/lp_refiner.cc | 55 +- .../coarsening/cluster_contraction_test.cc | 12 +- .../distributed_compressed_graph_test.cc | 95 +- .../datastructures/compressed_graph_test.cc | 34 +- tests/shm/matchers.h | 16 +- 81 files changed, 3291 insertions(+), 3916 deletions(-) create mode 100644 apps/io/binary_util.h delete mode 100644 kaminpar-common/graph-compression/compressed_edges.h create mode 100644 kaminpar-common/graph-compression/compressed_neighborhoods.h create mode 100644 kaminpar-common/graph-compression/compressed_neighborhoods_builder.h delete mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc delete mode 100644 kaminpar-dist/datastructures/distributed_compressed_graph_builder.h delete mode 100644 kaminpar-shm/datastructures/compressed_graph_builder.cc delete mode 100644 kaminpar-shm/datastructures/compressed_graph_builder.h create mode 100644 kaminpar-shm/graphutils/compressed_graph_builder.cc create mode 100644 kaminpar-shm/graphutils/compressed_graph_builder.h create mode 100644 kaminpar-shm/graphutils/parallel_compressed_graph_builder.cc create mode 100644 kaminpar-shm/graphutils/parallel_compressed_graph_builder.h diff --git a/apps/CMakeLists.txt b/apps/CMakeLists.txt index 0b9bbe5d..d07030e0 100644 --- a/apps/CMakeLists.txt +++ b/apps/CMakeLists.txt @@ -1,5 +1,6 @@ set(KAMINPAR_IO_SOURCE_FILES io/file_tokener.h + io/binary_util.h io/metis_parser.h io/metis_parser.cc io/parhip_parser.h diff --git a/apps/benchmarks/shm_compressed_graph_benchmark.cc b/apps/benchmarks/shm_compressed_graph_benchmark.cc index a338a230..80bd5fdf 100644 --- a/apps/benchmarks/shm_compressed_graph_benchmark.cc +++ b/apps/benchmarks/shm_compressed_graph_benchmark.cc @@ -9,7 +9,7 @@ #include "kaminpar-cli/CLI11.h" -#include "kaminpar-shm/datastructures/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/parallel_compressed_graph_builder.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/logger.h" @@ -212,7 +212,7 @@ int main(int argc, char *argv[]) { }(); LOG << "Compressing the input graph..."; - CompressedGraph compressed_graph = ParallelCompressedGraphBuilder::compress(graph); + CompressedGraph compressed_graph = parallel_compress(graph); // Run benchmarks LOG << "Running the benchmarks..."; diff --git a/apps/benchmarks/shm_input_benchmark.cc b/apps/benchmarks/shm_input_benchmark.cc index 70cd2fbf..8bb04e26 100644 --- a/apps/benchmarks/shm_input_benchmark.cc +++ b/apps/benchmarks/shm_input_benchmark.cc @@ -12,7 +12,8 @@ #include #include "kaminpar-shm/context_io.h" -#include "kaminpar-shm/datastructures/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/parallel_compressed_graph_builder.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/logger.h" @@ -89,9 +90,7 @@ int main(int argc, char *argv[]) { Graph(std::make_unique(CompressedGraphBuilder::compress(csr_graph))); ctx.setup(graph); } else { - Graph graph = Graph( - std::make_unique(ParallelCompressedGraphBuilder::compress(csr_graph)) - ); + Graph graph = Graph(std::make_unique(parallel_compress(csr_graph))); ctx.setup(graph); } } else { diff --git a/apps/io/binary_util.h b/apps/io/binary_util.h new file mode 100644 index 00000000..8cc10167 --- /dev/null +++ b/apps/io/binary_util.h @@ -0,0 +1,100 @@ +/******************************************************************************* + * Reader and writer for binary files. + * + * @file: bianry_util.h + * @author: Daniel Salwasser + * @date: 07.07.2024 + ******************************************************************************/ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "kaminpar-common/datastructures/static_array.h" + +namespace kaminpar::io { + +class BinaryReaderException : public std::exception { +public: + BinaryReaderException(std::string msg) : _msg(std::move(msg)) {} + + [[nodiscard]] const char *what() const noexcept override { + return _msg.c_str(); + } + +private: + std::string _msg; +}; + +class BinaryReader { +public: + BinaryReader(const std::string &filename) { + _file = open(filename.c_str(), O_RDONLY); + if (_file == -1) { + throw BinaryReaderException("Cannot read the file that stores the graph"); + } + + struct stat file_info; + if (fstat(_file, &file_info) == -1) { + close(_file); + throw BinaryReaderException("Cannot determine the size of the file that stores the graph"); + } + + _length = static_cast(file_info.st_size); + _data = static_cast(mmap(nullptr, _length, PROT_READ, MAP_PRIVATE, _file, 0)); + if (_data == MAP_FAILED) { + close(_file); + throw BinaryReaderException("Cannot map the file that stores the graph"); + } + } + + ~BinaryReader() { + munmap(_data, _length); + close(_file); + } + + template [[nodiscard]] T read(const std::size_t position) const { + return *reinterpret_cast(_data + position); + } + + template [[nodiscard]] const T *fetch(const std::size_t position) const { + return reinterpret_cast(_data + position); + } + +private: + int _file; + std::size_t _length; + std::uint8_t *_data; +}; + +class BinaryWriter { +public: + BinaryWriter(const std::string &filename) : _out(filename, std::ios::binary) {} + + void write(const char *data, const std::size_t size) { + _out.write(data, size); + } + + template void write_int(const T value) { + _out.write(reinterpret_cast(&value), sizeof(T)); + } + + template void write_raw_static_array(const StaticArray &static_array) { + const char *data = reinterpret_cast(static_array.data()); + const std::size_t size = static_array.size() * sizeof(T); + write(data, size); + } + +private: + std::ofstream _out; +}; + +} // namespace kaminpar::io diff --git a/apps/io/dist_metis_parser.cc b/apps/io/dist_metis_parser.cc index 84fec3c1..a742fac9 100644 --- a/apps/io/dist_metis_parser.cc +++ b/apps/io/dist_metis_parser.cc @@ -12,11 +12,12 @@ #include "kaminpar-mpi/datatype.h" #include "kaminpar-mpi/utils.h" -#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" #include "kaminpar-dist/datastructures/ghost_node_mapper.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/graphutils/synchronization.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods_builder.h" + #include "apps/io/file_tokener.h" namespace kaminpar::dist::io::metis { @@ -226,8 +227,8 @@ compress_read(const std::string &filename, const bool sorted, const MPI_Comm com ); graph::GhostNodeMapper mapper(rank, node_distribution); - DistributedCompressedGraphBuilder builder( - num_local_nodes, num_local_edges, header.has_node_weights, header.has_edge_weights, sorted + CompressedNeighborhoodsBuilder builder( + num_local_nodes, num_local_edges, header.has_edge_weights ); StaticArray node_weights; @@ -247,7 +248,7 @@ compress_read(const std::string &filename, const bool sorted, const MPI_Comm com header, [&](const auto weight) { if (node > 0) { - builder.add_node(node - 1, neighbourhood); + builder.add(node - 1, neighbourhood); neighbourhood.clear(); } @@ -270,7 +271,7 @@ compress_read(const std::string &filename, const bool sorted, const MPI_Comm com } ); - builder.add_node(node - 1, neighbourhood); + builder.add(node - 1, neighbourhood); neighbourhood.clear(); neighbourhood.shrink_to_fit(); } @@ -290,15 +291,12 @@ compress_read(const std::string &filename, const bool sorted, const MPI_Comm com } auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); - auto [nodes, edges, edge_weights] = builder.build(); DistributedCompressedGraph graph( std::move(node_distribution), std::move(edge_distribution), - std::move(nodes), - std::move(edges), + builder.build(), std::move(node_weights), - std::move(edge_weights), std::move(ghost_owner), std::move(ghost_to_global), std::move(global_to_ghost), diff --git a/apps/io/dist_parhip_parser.cc b/apps/io/dist_parhip_parser.cc index b02909bd..40bc0d78 100644 --- a/apps/io/dist_parhip_parser.cc +++ b/apps/io/dist_parhip_parser.cc @@ -7,80 +7,29 @@ ******************************************************************************/ #include "apps/io/dist_parhip_parser.h" -#include #include -#include -#include -#include -#include - #include "kaminpar-mpi/datatype.h" #include "kaminpar-mpi/utils.h" -#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" #include "kaminpar-dist/datastructures/ghost_node_mapper.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/graphutils/synchronization.h" -#include "kaminpar-common/logger.h" +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods_builder.h" -namespace { +#include "apps/io/binary_util.h" -class BinaryReaderException : public std::exception { -public: - BinaryReaderException(std::string msg) : _msg(std::move(msg)) {} +namespace { - [[nodiscard]] const char *what() const noexcept override { - return _msg.c_str(); - } +class ParhipHeader { + using NodeID = kaminpar::dist::NodeID; + using EdgeID = kaminpar::dist::EdgeID; + using NodeWeight = kaminpar::dist::NodeWeight; + using EdgeWeight = kaminpar::dist::EdgeWeight; -private: - std::string _msg; -}; - -class BinaryReader { public: - BinaryReader(const std::string &filename) { - _file = open(filename.c_str(), O_RDONLY); - if (_file == -1) { - throw BinaryReaderException("Cannot read the file that stores the graph"); - } - - struct stat file_info; - if (fstat(_file, &file_info) == -1) { - close(_file); - throw BinaryReaderException("Cannot determine the size of the file that stores the graph"); - } - - _length = static_cast(file_info.st_size); - _data = static_cast(mmap(nullptr, _length, PROT_READ, MAP_PRIVATE, _file, 0)); - if (_data == MAP_FAILED) { - close(_file); - throw BinaryReaderException("Cannot map the file that stores the graph"); - } - } - - ~BinaryReader() { - munmap(_data, _length); - close(_file); - } - - template [[nodiscard]] T read(std::size_t position) const { - return *reinterpret_cast(_data + position); - } - - template [[nodiscard]] T *fetch(std::size_t position) const { - return reinterpret_cast(_data + position); - } - -private: - int _file; - std::size_t _length; - std::uint8_t *_data; -}; - -struct ParhipHeader { static constexpr std::uint64_t kSize = 3 * sizeof(std::uint64_t); bool has_edge_weights; @@ -101,11 +50,58 @@ struct ParhipHeader { has_64_bit_edge_weight((version & 32) == 0), num_nodes(num_nodes), num_edges(num_edges) {} + + void validate() const { + if (has_64_bit_node_id) { + if (sizeof(NodeID) != 8) { + LOG_ERROR << "The stored graph uses 64-Bit node IDs but this build uses 32-Bit node IDs."; + std::exit(1); + } + } else if (sizeof(NodeID) != 4) { + LOG_ERROR << "The stored graph uses 32-Bit node IDs but this build uses 64-Bit node IDs."; + std::exit(1); + } + + if (has_64_bit_edge_id) { + if (sizeof(EdgeID) != 8) { + LOG_ERROR << "The stored graph uses 64-Bit edge IDs but this build uses 32-Bit edge IDs."; + std::exit(1); + } + } else if (sizeof(EdgeID) != 4) { + LOG_ERROR << "The stored graph uses 32-Bit edge IDs but this build uses 64-Bit edge IDs."; + std::exit(1); + } + + if (has_64_bit_node_weight) { + if (sizeof(NodeWeight) != 8) { + LOG_ERROR << "The stored graph uses 64-Bit node weights but this build uses 32-Bit node " + "weights."; + std::exit(1); + } + } else if (sizeof(NodeWeight) != 4) { + LOG_ERROR << "The stored graph uses 32-Bit node weights but this build uses 64-Bit node " + "weights."; + std::exit(1); + } + + if (has_64_bit_edge_weight) { + if (sizeof(EdgeWeight) != 8) { + LOG_ERROR << "The stored graph uses 64-Bit edge weights but this build uses 32-Bit edge " + "weights."; + std::exit(1); + } + } else if (sizeof(EdgeWeight) != 4) { + LOG_ERROR << "The stored graph uses 32-Bit edge weights but this build uses 64-Bit edge " + "weights."; + std::exit(1); + } + } }; } // namespace namespace kaminpar::dist::io::parhip { +using namespace kaminpar::io; namespace { @@ -156,6 +152,7 @@ DistributedCSRGraph csr_read(const std::string &filename, const bool sorted, con const auto num_nodes = reader.read(sizeof(std::uint64_t)); const auto num_edges = reader.read(sizeof(std::uint64_t) * 2); const ParhipHeader header(version, num_nodes, num_edges); + header.validate(); std::size_t position = ParhipHeader::kSize; @@ -163,10 +160,12 @@ DistributedCSRGraph csr_read(const std::string &filename, const bool sorted, con position += (header.num_nodes + 1) * sizeof(EdgeID); const NodeID *raw_edges = reader.fetch(position); - position += header.num_edges + sizeof(NodeID); + position += header.num_edges * sizeof(NodeID); const NodeWeight *raw_node_weights = reader.fetch(position); - position += header.num_nodes + sizeof(NodeWeight); + if (header.has_node_weights) { + position += header.num_nodes * sizeof(NodeWeight); + } const EdgeWeight *raw_edge_weights = reader.fetch(position); @@ -299,6 +298,7 @@ compressed_read(const std::string &filename, const bool sorted, const MPI_Comm c const auto num_nodes = reader.read(sizeof(std::uint64_t)); const auto num_edges = reader.read(sizeof(std::uint64_t) * 2); const ParhipHeader header(version, num_nodes, num_edges); + header.validate(); std::size_t position = ParhipHeader::kSize; @@ -306,10 +306,12 @@ compressed_read(const std::string &filename, const bool sorted, const MPI_Comm c position += (header.num_nodes + 1) * sizeof(EdgeID); const NodeID *raw_edges = reader.fetch(position); - position += header.num_edges + sizeof(NodeID); + position += header.num_edges * sizeof(NodeID); const NodeWeight *raw_node_weights = reader.fetch(position); - position += header.num_nodes + sizeof(NodeWeight); + if (header.has_node_weights) { + position += header.num_nodes * sizeof(NodeWeight); + } const EdgeWeight *raw_edge_weights = reader.fetch(position); @@ -364,8 +366,8 @@ compressed_read(const std::string &filename, const bool sorted, const MPI_Comm c ); graph::GhostNodeMapper mapper(rank, node_distribution); - DistributedCompressedGraphBuilder builder( - num_local_nodes, num_local_edges, header.has_node_weights, header.has_edge_weights, sorted + CompressedNeighborhoodsBuilder builder( + num_local_nodes, num_local_edges, header.has_edge_weights ); std::vector> neighbourhood; @@ -394,7 +396,7 @@ compressed_read(const std::string &filename, const bool sorted, const MPI_Comm c neighbourhood.emplace_back(adjacent_node, edge_weight); } - builder.add_node(u - first_node, neighbourhood); + builder.add(u - first_node, neighbourhood); neighbourhood.clear(); } @@ -410,15 +412,12 @@ compressed_read(const std::string &filename, const bool sorted, const MPI_Comm c } auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); - auto [nodes, edges, edge_weights] = builder.build(); DistributedCompressedGraph graph( std::move(node_distribution), std::move(edge_distribution), - std::move(nodes), - std::move(edges), + builder.build(), std::move(node_weights), - std::move(edge_weights), std::move(ghost_owner), std::move(ghost_to_global), std::move(global_to_ghost), diff --git a/apps/io/metis_parser.cc b/apps/io/metis_parser.cc index 72d9eada..0c7a1770 100644 --- a/apps/io/metis_parser.cc +++ b/apps/io/metis_parser.cc @@ -9,7 +9,7 @@ #include -#include "kaminpar-shm/datastructures/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/compressed_graph_builder.h" #include "kaminpar-common/assert.h" #include "kaminpar-common/datastructures/static_array.h" diff --git a/apps/io/parhip_parser.cc b/apps/io/parhip_parser.cc index 8980c458..3538fbcc 100644 --- a/apps/io/parhip_parser.cc +++ b/apps/io/parhip_parser.cc @@ -11,15 +11,10 @@ #include #include -#include -#include -#include -#include #include -#include -#include -#include "kaminpar-shm/datastructures/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/compressed_graph_builder.h" +#include "kaminpar-shm/graphutils/parallel_compressed_graph_builder.h" #include "kaminpar-shm/graphutils/permutator.h" #include "kaminpar-shm/kaminpar.h" @@ -27,82 +22,9 @@ #include "kaminpar-common/logger.h" #include "kaminpar-common/timer.h" -namespace { - -class BinaryReaderException : public std::exception { -public: - BinaryReaderException(std::string msg) : _msg(std::move(msg)) {} - - [[nodiscard]] const char *what() const noexcept override { - return _msg.c_str(); - } - -private: - std::string _msg; -}; - -class BinaryReader { -public: - BinaryReader(const std::string &filename) { - _file = open(filename.c_str(), O_RDONLY); - if (_file == -1) { - throw BinaryReaderException("Cannot read the file that stores the graph"); - } - - struct stat file_info; - if (fstat(_file, &file_info) == -1) { - close(_file); - throw BinaryReaderException("Cannot determine the size of the file that stores the graph"); - } - - _length = static_cast(file_info.st_size); - _data = static_cast(mmap(nullptr, _length, PROT_READ, MAP_PRIVATE, _file, 0)); - if (_data == MAP_FAILED) { - close(_file); - throw BinaryReaderException("Cannot map the file that stores the graph"); - } - } - - ~BinaryReader() { - munmap(_data, _length); - close(_file); - } +#include "apps/io/binary_util.h" - template [[nodiscard]] T read(std::size_t position) const { - return *reinterpret_cast(_data + position); - } - - template [[nodiscard]] const T *fetch(std::size_t position) const { - return reinterpret_cast(_data + position); - } - -private: - int _file; - std::size_t _length; - std::uint8_t *_data; -}; - -class BinaryWriter { -public: - BinaryWriter(const std::string &filename) : _out(filename, std::ios::binary) {} - - void write(const char *data, const std::size_t size) { - _out.write(data, size); - } - - template void write_int(const T value) { - _out.write(reinterpret_cast(&value), sizeof(T)); - } - - template void write_static_array(const kaminpar::StaticArray &static_array) { - const char *data = reinterpret_cast(static_array.data()); - const std::size_t size = static_array.size() * sizeof(T); - write(data, size); - } - -private: - std::ofstream _out; -}; +namespace { class ParhipHeader { using CompressedGraph = kaminpar::shm::CompressedGraph; @@ -202,6 +124,7 @@ class ParhipHeader { } // namespace namespace kaminpar::shm::io::parhip { +using namespace kaminpar::io; CSRGraph csr_read(const std::string &filename, const bool sorted) { std::ifstream in(filename, std::ios::binary); @@ -364,7 +287,7 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node const auto [perm, inv_perm] = graph::sort_by_degree_buckets(num_nodes, [&](const NodeID u) { return degrees[u]; }); - return ParallelCompressedGraphBuilder::compress( + return parallel_compress( num_nodes, num_edges, header.has_node_weights, @@ -378,7 +301,7 @@ CompressedGraph compressed_read_parallel(const std::string &filename, const Node [&](const EdgeID e) { return edge_weights[e]; } ); } else { - return ParallelCompressedGraphBuilder::compress( + return parallel_compress( num_nodes, num_edges, header.has_node_weights, @@ -424,15 +347,15 @@ void write(const std::string &filename, const CSRGraph &graph) { } }); - writer.write_static_array(raw_nodes); - writer.write_static_array(graph.raw_edges()); + writer.write_raw_static_array(raw_nodes); + writer.write_raw_static_array(graph.raw_edges()); if (has_node_weights) { - writer.write_static_array(graph.raw_node_weights()); + writer.write_raw_static_array(graph.raw_node_weights()); } if (has_edge_weights) { - writer.write_static_array(graph.raw_edge_weights()); + writer.write_raw_static_array(graph.raw_edge_weights()); } } diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc index 886b7f06..895e3e4f 100644 --- a/apps/io/shm_compressed_graph_binary.cc +++ b/apps/io/shm_compressed_graph_binary.cc @@ -317,25 +317,27 @@ CompressedGraph read(const std::string &filename) { CompactStaticArray nodes = read_compact_static_array(in); StaticArray compressed_edges = read_static_array(in); - StaticArray node_weights = - header.has_node_weights ? read_static_array(in) : StaticArray(); - StaticArray edge_weights = - header.has_edge_weights ? read_static_array(in) : StaticArray(); + StaticArray node_weights; + if (header.has_node_weights) { + node_weights = read_static_array(in); + } - return CompressedGraph( + CompressedNeighborhoods compressed_neighborhoods( std::move(nodes), std::move(compressed_edges), - std::move(node_weights), + header.max_degree, header.num_edges, - header.total_edge_weight, header.has_edge_weights, - header.max_degree, - header.use_degree_bucket_order, + header.total_edge_weight, header.num_high_degree_nodes, header.num_high_degree_parts, header.num_interval_nodes, header.num_intervals ); + + return CompressedGraph( + std::move(compressed_neighborhoods), std::move(node_weights), header.use_degree_bucket_order + ); } bool is_compressed(const std::string &filename) { diff --git a/kaminpar-common/graph-compression/compressed_edges.h b/kaminpar-common/graph-compression/compressed_edges.h deleted file mode 100644 index 988de239..00000000 --- a/kaminpar-common/graph-compression/compressed_edges.h +++ /dev/null @@ -1,425 +0,0 @@ -#pragma once - -#include "kaminpar-common/constexpr_utils.h" -#include "kaminpar-common/datastructures/static_array.h" -#include "kaminpar-common/math.h" -#include "kaminpar-common/ranges.h" -#include "kaminpar-common/varint_codec.h" -#include "kaminpar-common/varint_run_length_codec.h" -#include "kaminpar-common/varint_stream_codec.h" - -namespace kaminpar { - -template class CompressedEdges { - static_assert(std::numeric_limits::is_integer); - static_assert(std::numeric_limits::is_integer); - -public: - using SignedID = std::int64_t; - -#ifdef KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING - /*! - * Whether high degree encoding is used. - */ - static constexpr bool kHighDegreeEncoding = true; -#else - /*! - * Whether high degree encoding is used. - */ - static constexpr bool kHighDegreeEncoding = false; -#endif - - /*! - * The minimum degree of a node to be considered high degree. - */ - static constexpr NodeID kHighDegreeThreshold = 10000; - - /*! - * The length of a part when splitting the neighbourhood of a high degree - * node. - */ - static constexpr NodeID kHighDegreePartLength = 1000; - -#ifdef KAMINPAR_COMPRESSION_INTERVAL_ENCODING - /*! - * Whether interval encoding is used. - */ - static constexpr bool kIntervalEncoding = true; -#else - /*! - * Whether interval encoding is used. - */ - static constexpr bool kIntervalEncoding = false; -#endif - - /*! - * The minimum length of an interval to encode if interval encoding is used. - */ - static constexpr NodeID kIntervalLengthTreshold = 3; - -#ifdef KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING - /*! - * Whether run-length encoding is used. - */ - static constexpr bool kRunLengthEncoding = true; -#else - /*! - * Whether run-length encoding is used. - */ - static constexpr bool kRunLengthEncoding = false; -#endif - -#ifdef KAMINPAR_COMPRESSION_STREAM_ENCODING - /*! - * Whether stream encoding is used. - */ - static constexpr bool kStreamEncoding = true; -#else - /*! - * Whether stream encoding is used. - */ - static constexpr bool kStreamEncoding = false; -#endif - - static_assert( - !kRunLengthEncoding || !kStreamEncoding, - "Either run-length or stream encoding can be used for varints " - "but not both." - ); - -#ifdef KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION - /*! - * Whether the isolated nodes of the compressed graph are continuously stored - * at the end of the nodes array. - */ - static constexpr bool kIsolatedNodesSeparation = true; -#else - /*! - * Whether the isolated nodes of the compressed graph are continuously stored - * at the end of the nodes array. - */ - static constexpr bool kIsolatedNodesSeparation = false; -#endif - - CompressedEdges(const EdgeID num_edges, StaticArray compressed_edges) - : _num_edges(num_edges), - _compressed_edges(std::move(compressed_edges)) {} - - CompressedEdges(const CompressedEdges &) = delete; - CompressedEdges &operator=(const CompressedEdges &) = delete; - - CompressedEdges(CompressedEdges &&) noexcept = default; - CompressedEdges &operator=(CompressedEdges &&) noexcept = default; - - [[nodiscard]] EdgeID num_edges() const { - return _num_edges; - } - - [[nodiscard]] std::size_t size() const { - return _compressed_edges.size(); - } - - [[nodiscard]] NodeID - degree(const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + edge_offset; - const std::uint8_t *next_node_data = data + next_edge_offset; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return 0; - } - - const auto header = decode_header(node, node_data, next_node_data); - return std::get<1>(header); - } - - [[nodiscard]] IotaRange - incident_edges(const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + edge_offset; - const std::uint8_t *next_node_data = data + next_edge_offset; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return {0, 0}; - } - - const auto [first_edge, degree, _, __] = decode_header(node, node_data, next_node_data); - return {first_edge, first_edge + degree}; - } - - template - void decode_neighborhood( - const NodeID node, - const NodeID max_num_neighbors, - const EdgeID edge_offset, - const EdgeID next_edge_offset, - Lambda &&l - ) const { - KASSERT(max_num_neighbors > 0); - constexpr bool non_stoppable = std::is_void_v>; - - NodeID num_neighbors_visited = 1; - decode_neighborhood( - node, - edge_offset, - next_edge_offset, - [&](const EdgeID incident_edge, const NodeID adjacent_node) { - bool abort = num_neighbors_visited++ >= max_num_neighbors; - - if constexpr (non_stoppable) { - l(incident_edge, adjacent_node); - } else { - abort |= l(incident_edge, adjacent_node); - } - - return abort; - } - ); - } - - template - void decode_neighborhood( - const NodeID node, const EdgeID edge_offset, const EdgeID next_edge_offset, Lambda &&l - ) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + edge_offset; - const std::uint8_t *next_node_data = data + next_edge_offset; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return; - } - - const auto header = decode_header(node, node_data, next_node_data); - const auto &edge = std::get<0>(header); - const auto °ree = std::get<1>(header); - const auto &uses_intervals = std::get<2>(header); - const auto &len = std::get<3>(header); - - node_data += len; - - if constexpr (kHighDegreeEncoding) { - if (degree >= kHighDegreeThreshold) { - decode_parts(node_data, node, edge, degree, std::forward(l)); - return; - } - } - - invoke_indirect>( - std::forward(l), - [&](auto &&l2) { - decode_edges( - node_data, node, edge, degree, uses_intervals, std::forward(l2) - ); - } - ); - } - -private: - EdgeID _num_edges; - StaticArray _compressed_edges; - -private: - inline std::tuple decode_header( - const NodeID node, const std::uint8_t *node_data, const std::uint8_t *next_node_data - ) const { - const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { - if constexpr (kIntervalEncoding) { - auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); - auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); - - return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); - } else { - auto [first_edge, len] = varint_decode(node_data); - auto [next_first_edge, _] = varint_decode(next_node_data); - - return std::make_tuple(first_edge, next_first_edge, false, len); - } - }(); - - if constexpr (kIsolatedNodesSeparation) { - const EdgeID ungapped_first_edge = first_edge + node; - const NodeID degree = static_cast(1 + next_first_edge - first_edge); - return std::make_tuple(ungapped_first_edge, degree, uses_intervals, len); - } else { - const NodeID degree = static_cast(next_first_edge - first_edge); - return std::make_tuple(first_edge, degree, uses_intervals, len); - } - } - - template - void decode_parts( - const std::uint8_t *data, - const NodeID node, - const EdgeID edge, - const NodeID degree, - Lambda &&l - ) const { - const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); - - const auto iterate_part = [&](const NodeID part) { - const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); - const std::uint8_t *part_data = data + part_offset; - - const NodeID part_count_m1 = part_count - 1; - const bool last_part = part == part_count_m1; - - const EdgeID part_edge = edge + kHighDegreePartLength * part; - const NodeID part_degree = - last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; - - return invoke_indirect2, bool>( - std::forward(l), - [&](auto &&l2) { - return decode_edges( - part_data, node, part_edge, part_degree, true, std::forward(l2) - ); - } - ); - }; - - if constexpr (parallel) { - tbb::parallel_for(0, part_count, std::forward(iterate_part)); - } else { - for (NodeID part = 0; part < part_count; ++part) { - const bool stop = iterate_part(part); - if (stop) { - return; - } - } - } - } - - template - bool decode_edges( - const std::uint8_t *data, - const NodeID node, - EdgeID edge, - const NodeID degree, - bool uses_intervals, - Lambda &&l - ) const { - const EdgeID max_edge = edge + degree; - - if constexpr (kIntervalEncoding) { - if (uses_intervals) { - const bool stop = decode_intervals(data, edge, std::forward(l)); - if (stop) { - return true; - } - - if (edge == max_edge) { - return false; - } - } - } - - return decode_gaps(data, node, edge, max_edge, std::forward(l)); - } - - template - bool decode_intervals(const std::uint8_t *&data, EdgeID &edge, Lambda &&l) const { - constexpr bool non_stoppable = std::is_void_v>; - - const NodeID interval_count = *((NodeID *)data); - data += sizeof(NodeID); - - NodeID previous_right_extreme = 2; - for (NodeID i = 0; i < interval_count; ++i) { - const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); - data += left_extreme_gap_len; - - const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); - data += interval_length_gap_len; - - const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; - const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; - previous_right_extreme = cur_left_extreme + cur_interval_len - 1; - - for (NodeID j = 0; j < cur_interval_len; ++j) { - if constexpr (non_stoppable) { - l(edge, cur_left_extreme + j); - } else { - const bool stop = l(edge, cur_left_extreme + j); - if (stop) { - return true; - } - } - - edge += 1; - } - } - - return false; - } - - template - bool decode_gaps( - const std::uint8_t *data, NodeID node, EdgeID &edge, const EdgeID max_edge, Lambda &&l - ) const { - constexpr bool non_stoppable = std::is_void_v>; - - const auto [first_gap, first_gap_len] = signed_varint_decode(data); - data += first_gap_len; - - const NodeID first_adjacent_node = static_cast(first_gap + node); - NodeID prev_adjacent_node = first_adjacent_node; - - if constexpr (non_stoppable) { - l(edge, first_adjacent_node); - } else { - const bool stop = l(edge, first_adjacent_node); - if (stop) { - return true; - } - } - edge += 1; - - const auto handle_gap = [&](const NodeID gap) { - const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; - - if constexpr (non_stoppable) { - l(edge++, adjacent_node); - } else { - return l(edge++, adjacent_node); - } - }; - - if constexpr (kRunLengthEncoding) { - VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); - rl_decoder.decode(std::forward(handle_gap)); - } else if constexpr (kStreamEncoding) { - VarIntStreamDecoder sv_encoder(data, max_edge - edge); - sv_encoder.decode(std::forward(handle_gap)); - } else { - while (edge != max_edge) { - const auto [gap, gap_len] = varint_decode(data); - data += gap_len; - - const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; - - if constexpr (non_stoppable) { - l(edge, adjacent_node); - } else { - const bool stop = l(edge, adjacent_node); - if (stop) { - return true; - } - } - - edge += 1; - } - } - - return false; - } -}; - -} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_edges_builder.h b/kaminpar-common/graph-compression/compressed_edges_builder.h index a31ac8ad..bbbc4d1d 100644 --- a/kaminpar-common/graph-compression/compressed_edges_builder.h +++ b/kaminpar-common/graph-compression/compressed_edges_builder.h @@ -1,32 +1,52 @@ +/******************************************************************************* + * Compressed edges builder. + * + * @file: compressed_edges_builder.h + * @author: Daniel Salwasser + * @date: 09.07.2024 + ******************************************************************************/ #pragma once #include #include #include -#include -#include "kaminpar-common/datastructures/static_array.h" -#include "kaminpar-common/graph-compression/compressed_edges.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/logger.h" namespace kaminpar { +SET_DEBUG(false); template class CompressedEdgesBuilder { - using CompressedEdges = kaminpar::CompressedEdges; - using SignedID = CompressedEdges::SignedID; - - static constexpr bool kHighDegreeEncoding = CompressedEdges::kHighDegreeEncoding; - static constexpr NodeID kHighDegreeThreshold = CompressedEdges::kHighDegreeThreshold; - static constexpr NodeID kHighDegreePartLength = CompressedEdges::kHighDegreePartLength; - static constexpr NodeID kIntervalEncoding = CompressedEdges::kIntervalEncoding; - static constexpr NodeID kIntervalLengthTreshold = CompressedEdges::kIntervalLengthTreshold; - static constexpr bool kRunLengthEncoding = CompressedEdges::kRunLengthEncoding; - static constexpr bool kStreamEncoding = CompressedEdges::kStreamEncoding; - static constexpr bool kIsolatedNodesSeparation = CompressedEdges::kIsolatedNodesSeparation; + using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; + using SignedID = CompressedNeighborhoods::SignedID; + + static constexpr bool kHighDegreeEncoding = CompressedNeighborhoods::kHighDegreeEncoding; + static constexpr NodeID kHighDegreeThreshold = CompressedNeighborhoods::kHighDegreeThreshold; + static constexpr NodeID kHighDegreePartLength = CompressedNeighborhoods::kHighDegreePartLength; + static constexpr NodeID kIntervalEncoding = CompressedNeighborhoods::kIntervalEncoding; + static constexpr NodeID kIntervalLengthTreshold = + CompressedNeighborhoods::kIntervalLengthTreshold; + static constexpr bool kRunLengthEncoding = CompressedNeighborhoods::kRunLengthEncoding; + static constexpr bool kStreamEncoding = CompressedNeighborhoods::kStreamEncoding; + static constexpr bool kIsolatedNodesSeparation = + CompressedNeighborhoods::kIsolatedNodesSeparation; +public: + /*! + * Returns the maximum size in bytes of the compressed edge array. + * + * @tparam kActualNumEdges Whether the number of edges given are of the whole graph instead of a + * true subgraph. + * @param num_nodes The number of nodes. + * @param num_nodes The number of edges. + * @param has_edge_weights Whether edge weights are stored. + */ template - [[nodiscard]] static std::size_t - compressed_edge_array_max_size(const NodeID num_nodes, const EdgeID num_edges) { + [[nodiscard]] static std::size_t compressed_edge_array_max_size( + const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights + ) { std::size_t edge_id_width; if constexpr (kActualNumEdges) { if constexpr (kIntervalEncoding) { @@ -50,58 +70,73 @@ template class Compresse max_size += (num_edges / kHighDegreePartLength) * varint_max_length(); } + if (has_edge_weights) { + max_size += num_edges * varint_max_length(); + } + return max_size; } -public: /*! * Constructs a new CompressedEdgesBuilder. * * @param num_nodes The number of nodes of the graph to compress. * @param num_edges The number of edges of the graph to compress. * @param has_edge_weights Whether the graph to compress has edge weights. - * @param edge_weights A reference to the edge weights of the compressed graph. */ - CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - bool has_edge_weights, - StaticArray &edge_weights - ) - : _has_edge_weights(has_edge_weights), - _edge_weights(edge_weights) { - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, num_edges); + CompressedEdgesBuilder(const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights) + : _has_edge_weights(has_edge_weights) { + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); _compressed_data_start = heap_profiler::overcommit_memory(max_size); + _compressed_data = _compressed_data_start.get(); + _compressed_data_max_size = 0; } /*! - * Constructs a new CompressedEdgesBuilder where the maxmimum degree specifies the number of edges - * that are compressed at once. + * Constructs a new CompressedEdgesBuilder where the maxmimum degree specifies the number + * of edges that are compressed at once. * * @param num_nodes The number of nodes of the graph to compress. * @param num_edges The number of edges of the graph to compress. * @param max_degree The maximum number of edges that are compressed at once. * @param has_edge_weights Whether the graph to compress has edge weights. - * @param edge_weights A reference to the edge weights of the compressed graph. - * @param edge_weights A reference to the edge weights of the compressed graph. */ CompressedEdgesBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const NodeID max_degree, - bool has_edge_weights, - StaticArray &edge_weights + const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights ) - : _has_edge_weights(has_edge_weights), - _edge_weights(edge_weights) { - const std::size_t max_size = compressed_edge_array_max_size(num_nodes, max_degree); + : _has_edge_weights(has_edge_weights) { + const std::size_t max_size = + compressed_edge_array_max_size(num_nodes, max_degree, has_edge_weights); _compressed_data_start = heap_profiler::overcommit_memory(max_size); + _compressed_data = _compressed_data_start.get(); + _compressed_data_max_size = 0; + } + + /*! + * Destructs the CompressedEdgesBuilder and records the memory space of the compressed + * edge array to the heap profiler if the data has not been taken. + */ + ~CompressedEdgesBuilder() { + if constexpr (kHeapProfiling) { + if (_compressed_data_start) { + const auto prev_compressed_data_size = + static_cast(_compressed_data - _compressed_data_start.get()); + const std::size_t compressed_data_size = + std::max(_compressed_data_max_size, prev_compressed_data_size); + + heap_profiler::HeapProfiler::global().record_alloc( + _compressed_data_start.get(), compressed_data_size + ); + } + } } CompressedEdgesBuilder(const CompressedEdgesBuilder &) = delete; CompressedEdgesBuilder &operator=(const CompressedEdgesBuilder &) = delete; CompressedEdgesBuilder(CompressedEdgesBuilder &&) noexcept = default; + CompressedEdgesBuilder &operator=(CompressedEdgesBuilder &&) noexcept = delete; /*! * Initializes/resets the builder. @@ -109,6 +144,9 @@ template class Compresse * @param first_edge The first edge ID of the first node to be added. */ void init(const EdgeID first_edge) { + const auto prev_compressed_data_size = + static_cast(_compressed_data - _compressed_data_start.get()); + _compressed_data_max_size = std::max(_compressed_data_max_size, prev_compressed_data_size); _compressed_data = _compressed_data_start.get(); _edge = first_edge; @@ -122,88 +160,23 @@ template class Compresse } /*! - * Adds the neighborhood of a node. Note that the neighbourhood vector is modified. + * Adds the (possibly weighted) neighborhood of a node. Note that the neighbourhood vector is + * modified. * * @param node The node whose neighborhood to add. * @param neighbourhood The neighbourhood of the node to add. * @return The offset into the compressed edge array of the node. */ - EdgeID add(const NodeID node, std::vector> &neighbourhood) { - // The offset into the compressed edge array of the start of the neighbourhood. - const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); - - const NodeID degree = neighbourhood.size(); - if (degree == 0) { - return offset; - } - - _max_degree = std::max(_max_degree, degree); - - // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes - // in one of its bits whether interval encoding is used for this node, i.e., whether the nodes - // has intervals in its neighbourhood. - std::uint8_t *marked_byte = _compressed_data; - - // Store only the first edge for the source node. The degree can be obtained by determining the - // difference between the first edge ids of a node and the next node. Additionally, store the - // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes - // array. - const EdgeID first_edge = _edge; - if constexpr (kIntervalEncoding) { - _compressed_data += marked_varint_encode(first_edge, false, _compressed_data); + template EdgeID add(const NodeID node, Container &neighbourhood) { + if constexpr (std::is_same_v>) { + std::sort(neighbourhood.begin(), neighbourhood.end(), [](const auto &a, const auto &b) { + return a.first < b.first; + }); } else { - _compressed_data += varint_encode(first_edge, _compressed_data); - } - - // Only increment the edge if edge weights are not stored as otherwise the edge is - // incremented with each edge weight being added. - if (!_has_edge_weights) { - _edge += degree; - } - - // Sort the adjacent nodes in ascending order. - std::sort(neighbourhood.begin(), neighbourhood.end(), [](const auto &a, const auto &b) { - return a.first < b.first; - }); - - // If high-degree encoding is used then split the neighborhood if the degree crosses a - // threshold. The neighborhood is split into equally sized parts (except possible the last part) - // and each part is encoded independently. Furthermore, the offset at which the part is encoded - // is also stored. - if constexpr (kHighDegreeEncoding) { - const bool split_neighbourhood = degree >= kHighDegreeThreshold; - - if (split_neighbourhood) { - const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); - const NodeID last_part_length = ((degree % kHighDegreePartLength) == 0) - ? kHighDegreePartLength - : (degree % kHighDegreePartLength); - - uint8_t *part_ptr = _compressed_data; - _compressed_data += sizeof(NodeID) * part_count; - - for (NodeID i = 0; i < part_count; ++i) { - const bool last_part = (i + 1) == part_count; - const NodeID part_length = last_part ? last_part_length : kHighDegreePartLength; - - auto part_begin = neighbourhood.begin() + i * kHighDegreePartLength; - auto part_end = part_begin + part_length; - - std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; - *((NodeID *)cur_part_ptr) = static_cast(_compressed_data - part_ptr); - - std::span> part_neighbourhood(part_begin, part_end); - add_edges(node, nullptr, part_neighbourhood); - } - - _num_high_degree_nodes += 1; - _num_high_degree_parts += part_count; - return offset; - } + std::sort(neighbourhood.begin(), neighbourhood.end()); } - add_edges(node, marked_byte, std::forward(neighbourhood)); - return offset; + return add_node(node, neighbourhood); } /*! @@ -233,26 +206,56 @@ template class Compresse return std::move(_compressed_data_start); } + /*! + * Returns the maximum degree. + * + * @return The maximum degree. + */ [[nodiscard]] std::size_t max_degree() const { return _max_degree; } + /*! + * Returns the total edge weight. + * + * @return The total edge weight. + */ [[nodiscard]] std::int64_t total_edge_weight() const { return _total_edge_weight; } + /*! + * Returns the number of nodes that have high degree. + * + * @returns The number of nodes that have high degree. + */ [[nodiscard]] std::size_t num_high_degree_nodes() const { return _num_high_degree_nodes; } + /*! + * Returns the total number of parts that result from splitting high degree neighborhoods. + * + * @returns The total number of parts that result from splitting high degree neighborhoods. + */ [[nodiscard]] std::size_t num_high_degree_parts() const { return _num_high_degree_parts; } + /*! + * Returns the number of nodes that have at least one interval. + * + * @returns The number of nodes that have at least one interval. + */ [[nodiscard]] std::size_t num_interval_nodes() const { return _num_interval_nodes; } + /*! + * Returns the total number of intervals. + * + * @returns The total number of intervals. + */ [[nodiscard]] std::size_t num_intervals() const { return _num_intervals; } @@ -260,13 +263,13 @@ template class Compresse private: heap_profiler::unique_ptr _compressed_data_start; std::uint8_t *_compressed_data; + std::size_t _compressed_data_max_size; bool _has_edge_weights; - StaticArray &_edge_weights; + EdgeWeight _total_edge_weight; EdgeID _edge; NodeID _max_degree; - EdgeWeight _total_edge_weight; // Graph compression statistics std::size_t _num_high_degree_nodes; @@ -274,23 +277,111 @@ template class Compresse std::size_t _num_interval_nodes; std::size_t _num_intervals; + // Debug graph compression statistics + std::size_t _num_adjacent_node_bytes; + std::size_t _num_edge_weights_bytes; + private: + template EdgeID add_node(const NodeID node, Container &neighbourhood) { + // The offset into the compressed edge array to the start of the neighbourhood. + const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); + + const NodeID degree = neighbourhood.size(); + if (degree == 0) { + return offset; + } + + _max_degree = std::max(_max_degree, degree); + + // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes + // in one of its bits whether interval encoding is used for this node, i.e., whether the nodes + // has intervals in its neighbourhood. + std::uint8_t *marked_byte = _compressed_data; + + // Store only the first edge for the source node. The degree can be obtained by determining the + // difference between the first edge ids of a node and the next node. Additionally, store the + // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes + // array. + const EdgeID first_edge = _edge; + if constexpr (kIntervalEncoding) { + _compressed_data += marked_varint_encode(first_edge, false, _compressed_data); + } else { + _compressed_data += varint_encode(first_edge, _compressed_data); + } + + _edge += degree; + + // If high-degree encoding is used then split the neighborhood if the degree crosses a + // threshold. The neighborhood is split into equally sized parts (except possible the last part) + // and each part is encoded independently. Furthermore, the offset at which the part is encoded + // is also stored. + if constexpr (kHighDegreeEncoding) { + const bool split_neighbourhood = degree >= kHighDegreeThreshold; + + if (split_neighbourhood) { + const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); + const NodeID last_part_length = ((degree % kHighDegreePartLength) == 0) + ? kHighDegreePartLength + : (degree % kHighDegreePartLength); + + uint8_t *part_ptr = _compressed_data; + _compressed_data += sizeof(NodeID) * part_count; + + for (NodeID i = 0; i < part_count; ++i) { + const bool last_part = (i + 1) == part_count; + const NodeID part_length = last_part ? last_part_length : kHighDegreePartLength; + + auto part_begin = neighbourhood.begin() + i * kHighDegreePartLength; + auto part_end = part_begin + part_length; + + std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; + *((NodeID *)cur_part_ptr) = static_cast(_compressed_data - part_ptr); + + using Neighbour = typename Container::value_type; + add_edges(node, nullptr, std::span(part_begin, part_end)); + } + + _num_high_degree_nodes += 1; + _num_high_degree_parts += part_count; + return offset; + } + } + + add_edges(node, marked_byte, std::forward(neighbourhood)); + return offset; + } + template void add_edges(const NodeID node, std::uint8_t *marked_byte, Container &&neighbourhood) { - const auto store_edge_weight = [&](const EdgeWeight edge_weight) { - _edge_weights[_edge++] = edge_weight; - _total_edge_weight += edge_weight; + using Neighbour = std::remove_reference_t::value_type; + constexpr bool kHasEdgeWeights = std::is_same_v>; + + const auto fetch_adjacent_node = [&](const NodeID i) { + if constexpr (kHasEdgeWeights) { + return neighbourhood[i].first; + } else { + return neighbourhood[i]; + } + }; + + const auto set_adjacent_node = [&](const NodeID i, const NodeID value) { + if constexpr (kHasEdgeWeights) { + neighbourhood[i].first = value; + } else { + neighbourhood[i] = value; + } }; NodeID local_degree = neighbourhood.size(); + EdgeWeight prev_edge_weight = 0; // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at // least kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i - // and the length j - i + 1. Left extremes are stored static constexpr bool - // kHighDegreeEncoding = the differences between each left extreme and the previous right - // extreme minus 2 (because there must be at least one integer between the end of an interval - // and the beginning of the next one), except the first left extreme, which is stored directly. - // The lengths are decremented by kIntervalLengthTreshold, the minimum length of an interval. + // and the length j - i + 1. Left extremes are stored using the differences between each left + // extreme and the previous right extreme minus 2 (because there must be at least one integer + // between the end of an interval and the beginning of the next one), except the first left + // extreme, which is stored directly. The lengths are decremented by kIntervalLengthTreshold, + // the minimum length of an interval. if constexpr (kIntervalEncoding) { NodeID interval_count = 0; @@ -303,34 +394,52 @@ template class Compresse if (local_degree >= kIntervalLengthTreshold) { NodeID interval_len = 1; NodeID previous_right_extreme = 2; - NodeID prev_adjacent_node = (*neighbourhood.begin()).first; + NodeID prev_adjacent_node = fetch_adjacent_node(0); - for (auto iter = neighbourhood.begin() + 1; iter != neighbourhood.end(); ++iter) { - const NodeID adjacent_node = (*iter).first; + for (NodeID i = 1; i < neighbourhood.size(); ++i) { + const NodeID adjacent_node = fetch_adjacent_node(i); if (prev_adjacent_node + 1 == adjacent_node) { - interval_len++; + ++interval_len; // The interval ends if there are no more nodes or the next node is not the increment of // the current node. - if (iter + 1 == neighbourhood.end() || (*(iter + 1)).first != adjacent_node + 1) { + if (i + 1 == neighbourhood.size() || fetch_adjacent_node(i + 1) != adjacent_node + 1) { if (interval_len >= kIntervalLengthTreshold) { const NodeID left_extreme = adjacent_node + 1 - interval_len; const NodeID left_extreme_gap = left_extreme + 2 - previous_right_extreme; const NodeID interval_length_gap = interval_len - kIntervalLengthTreshold; - _compressed_data += varint_encode(left_extreme_gap, _compressed_data); - _compressed_data += varint_encode(interval_length_gap, _compressed_data); + const std::size_t left_extreme_gap_len = + varint_encode(left_extreme_gap, _compressed_data); + _compressed_data += left_extreme_gap_len; + IF_DBG _num_adjacent_node_bytes += left_extreme_gap_len; - for (NodeID i = 0; i < interval_len; ++i) { - std::pair &incident_edge = *(iter + 1 + i - interval_len); + const std::size_t interval_length_gap_len = + varint_encode(interval_length_gap, _compressed_data); + _compressed_data += interval_length_gap_len; + IF_DBG _num_adjacent_node_bytes += interval_length_gap_len; + + for (NodeID j = 0; j < interval_len; ++j) { + const NodeID k = i + 1 + j - interval_len; // Set the adjacent node to a special value, which indicates for the gap encoder // that the node has been encoded through an interval. - incident_edge.first = std::numeric_limits::max(); + set_adjacent_node(k, std::numeric_limits::max()); + + if constexpr (kHasEdgeWeights) { + if (_has_edge_weights) { + const EdgeWeight edge_weight = neighbourhood[k].second; + const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; - if (_has_edge_weights) { - store_edge_weight(incident_edge.second); + const std::size_t edge_weight_gap_len = + signed_varint_encode(edge_weight_gap, _compressed_data); + _compressed_data += edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; + + prev_edge_weight = edge_weight; + _total_edge_weight += edge_weight; + } } } @@ -354,9 +463,11 @@ template class Compresse // intervals have been encoded. if (marked_byte == nullptr) { *((NodeID *)interval_count_ptr) = interval_count; + _num_adjacent_node_bytes += sizeof(NodeID); } else if (interval_count > 0) { *((NodeID *)interval_count_ptr) = interval_count; *marked_byte |= 0b01000000; + _num_adjacent_node_bytes += sizeof(NodeID); } else { _compressed_data -= sizeof(NodeID); } @@ -366,63 +477,98 @@ template class Compresse _num_intervals += interval_count; } - // If all incident edges have been compressed static constexpr bool kHighDegreeEncoding = - // intervals then gap encoding cannot be applied. + // If all incident edges have been compressed using intervals then gap encoding cannot be + // applied. if (local_degree == 0) { return; } } - // Store the remaining adjacent nodes static constexpr bool kHighDegreeEncoding = gap - // encoding. That is instead of directly storing the nodes v_1, v_2, ..., v_{k - 1}, v_k, store - // the gaps v_1 - u, v_2 - v_1 - 1, ..., v_k - v_{k - 1} - 1 between the nodes, where u is the - // source node. Note that all gaps except the first one have to be positive as we sorted the - // nodes in ascending order. Thus, only for the first gap the sign is additionally stored. - auto iter = neighbourhood.begin(); + // Store the remaining adjacent nodes using gap encoding. That is instead of directly storing + // the nodes v_1, v_2, ..., v_{k - 1}, v_k, store the gaps v_1 - u, v_2 - v_1 - 1, ..., v_k - + // v_{k - 1} - 1 between the nodes, where u is the source node. Note that all gaps except the + // first one have to be positive as we sorted the nodes in ascending order. Thus, only for the + // first gap the sign is additionally stored. + NodeID i = 0; // Go to the first adjacent node that has not been encoded through an interval. if constexpr (kIntervalEncoding) { - while ((*iter).first == std::numeric_limits::max()) { - ++iter; + while (fetch_adjacent_node(i) == std::numeric_limits::max()) { + i += 1; } } - const auto [first_adjacent_node, first_edge_weight] = *iter++; + const NodeID first_adjacent_node = fetch_adjacent_node(i); const SignedID first_gap = first_adjacent_node - static_cast(node); - _compressed_data += signed_varint_encode(first_gap, _compressed_data); - if (_has_edge_weights) { - store_edge_weight(first_edge_weight); + const std::size_t first_gap_len = signed_varint_encode(first_gap, _compressed_data); + _compressed_data += first_gap_len; + IF_DBG _num_adjacent_node_bytes += first_gap_len; + + if constexpr (kHasEdgeWeights) { + if (_has_edge_weights) { + const EdgeWeight first_edge_weight = neighbourhood[i].second; + const EdgeWeight first_edge_weight_gap = first_edge_weight - prev_edge_weight; + + const std::size_t first_edge_weight_gap_len = + signed_varint_encode(first_edge_weight_gap, _compressed_data); + _compressed_data += first_edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += first_edge_weight_gap_len; + + prev_edge_weight = first_edge_weight; + _total_edge_weight += first_edge_weight; + } } + i += 1; + VarIntRunLengthEncoder rl_encoder(_compressed_data); VarIntStreamEncoder sv_encoder(_compressed_data, local_degree - 1); NodeID prev_adjacent_node = first_adjacent_node; - while (iter != neighbourhood.end()) { - const auto [adjacent_node, edge_weight] = *iter++; + while (i < neighbourhood.size()) { + const NodeID adjacent_node = fetch_adjacent_node(i); // Skip the adjacent node since it has been encoded through an interval. if constexpr (kIntervalEncoding) { if (adjacent_node == std::numeric_limits::max()) { + i += 1; continue; } } const NodeID gap = adjacent_node - prev_adjacent_node - 1; if constexpr (kRunLengthEncoding) { - _compressed_data += rl_encoder.add(gap); + const std::size_t gap_len = rl_encoder.add(gap); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } else if constexpr (kStreamEncoding) { - _compressed_data += sv_encoder.add(gap); + const std::size_t gap_len = sv_encoder.add(gap); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } else { - _compressed_data += varint_encode(gap, _compressed_data); + const std::size_t gap_len = varint_encode(gap, _compressed_data); + _compressed_data += gap_len; + IF_DBG _num_adjacent_node_bytes += gap_len; } - if (_has_edge_weights) { - store_edge_weight(edge_weight); + if constexpr (kHasEdgeWeights) { + if (_has_edge_weights) { + const EdgeWeight edge_weight = neighbourhood[i].second; + const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; + + const std::size_t edge_weight_gap_len = + signed_varint_encode(edge_weight_gap, _compressed_data); + _compressed_data += edge_weight_gap_len; + IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; + + prev_edge_weight = edge_weight; + _total_edge_weight += edge_weight; + } } prev_adjacent_node = adjacent_node; + i += 1; } if constexpr (kRunLengthEncoding) { diff --git a/kaminpar-common/graph-compression/compressed_neighborhoods.h b/kaminpar-common/graph-compression/compressed_neighborhoods.h new file mode 100644 index 00000000..d16e025b --- /dev/null +++ b/kaminpar-common/graph-compression/compressed_neighborhoods.h @@ -0,0 +1,721 @@ +/******************************************************************************* + * Compressed neighborhoods of a static graph. + * + * @file: compressed_neighborhoods.h + * @author: Daniel Salwasser + * @date: 08.07.2024 + ******************************************************************************/ +#pragma once + +#include "kaminpar-common/constexpr_utils.h" +#include "kaminpar-common/datastructures/compact_static_array.h" +#include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/math.h" +#include "kaminpar-common/ranges.h" +#include "kaminpar-common/varint_codec.h" +#include "kaminpar-common/varint_run_length_codec.h" +#include "kaminpar-common/varint_stream_codec.h" + +namespace kaminpar { + +template class CompressedNeighborhoods { + static_assert(std::numeric_limits::is_integer); + static_assert(std::numeric_limits::is_integer); + static_assert(std::numeric_limits::is_integer); + + struct NeighborhoodHeader { + EdgeID first_edge; + NodeID degree; + bool uses_intervals; + std::size_t length; + }; + +public: + using SignedID = std::int64_t; + + /*! + * Whether high degree encoding is used. + */ +#ifdef KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING + static constexpr bool kHighDegreeEncoding = true; +#else + static constexpr bool kHighDegreeEncoding = false; +#endif + + /*! + * The minimum degree of a node to be considered high degree. + */ + static constexpr NodeID kHighDegreeThreshold = 10000; + + /*! + * The length of a part when splitting the neighbourhood of a high degree + * node. + */ + static constexpr NodeID kHighDegreePartLength = 1000; + + /*! + * Whether interval encoding is used. + */ +#ifdef KAMINPAR_COMPRESSION_INTERVAL_ENCODING + static constexpr bool kIntervalEncoding = true; +#else + static constexpr bool kIntervalEncoding = false; +#endif + + /*! + * The minimum length of an interval to encode if interval encoding is used. + */ + static constexpr NodeID kIntervalLengthTreshold = 3; + + /*! + * Whether run-length encoding is used. + */ +#ifdef KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING + static constexpr bool kRunLengthEncoding = true; +#else + static constexpr bool kRunLengthEncoding = false; +#endif + + /*! + * Whether stream encoding is used. + */ +#ifdef KAMINPAR_COMPRESSION_STREAM_ENCODING + static constexpr bool kStreamEncoding = true; +#else + static constexpr bool kStreamEncoding = false; +#endif + + static_assert( + !kRunLengthEncoding || !kStreamEncoding, + "Either run-length or stream encoding can be used for varints " + "but not both." + ); + + /*! + * Whether the isolated nodes of the compressed graph are continuously stored + * at the end of the nodes array. + */ +#ifdef KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION + static constexpr bool kIsolatedNodesSeparation = true; +#else + static constexpr bool kIsolatedNodesSeparation = false; +#endif + + /** + * Constructs a new CompressedNeighborhoods. + * + * @param nodes The nodes of the compressed neighborhoods. + * @param compressed_edges The edges and edge weights of the compressed neighborhoods. + * @param max_degree The maximum degree of the nodes. + * @param num_edges The number of edges. + * @param has_edge_weights Whether edge weights are stored + * @param total_edge_weight The total edge weight. + * @param num_high_degree_nodes The number of nodes that have high degree. + * @param num_high_degree_parts The total number of parts that result from splitting high degree + * neighborhoods. + * @param num_interval_nodes The number of nodes that have at least one interval. + * @param num_intervals The total number of intervals. + */ + CompressedNeighborhoods( + CompactStaticArray nodes, + StaticArray compressed_edges, + const NodeID max_degree, + const EdgeID num_edges, + const bool has_edge_weights, + const EdgeWeight total_edge_weight, + std::size_t num_high_degree_nodes, + std::size_t num_high_degree_parts, + std::size_t num_interval_nodes, + std::size_t num_intervals + ) + : _nodes(std::move(nodes)), + _compressed_edges(std::move(compressed_edges)), + _max_degree(max_degree), + _num_edges(num_edges), + _has_edge_weights(has_edge_weights), + _total_edge_weight(total_edge_weight), + _num_high_degree_nodes(num_high_degree_nodes), + _num_high_degree_parts(num_high_degree_parts), + _num_interval_nodes(num_interval_nodes), + _num_intervals(num_intervals) { + KASSERT(kHighDegreeEncoding || _num_high_degree_nodes == 0); + KASSERT(kHighDegreeEncoding || _num_high_degree_parts == 0); + KASSERT(kIntervalEncoding || _num_interval_nodes == 0); + KASSERT(kIntervalEncoding || _num_intervals == 0); + } + + CompressedNeighborhoods(const CompressedNeighborhoods &) = delete; + CompressedNeighborhoods &operator=(const CompressedNeighborhoods &) = delete; + + CompressedNeighborhoods(CompressedNeighborhoods &&) noexcept = default; + CompressedNeighborhoods &operator=(CompressedNeighborhoods &&) noexcept = default; + + /** + * Returns the maximum degree of the nodes. + * + * @return The maximum degree of the nodes. + */ + [[nodiscard]] NodeID max_degree() const { + return _max_degree; + } + + /** + * Returns the degree of a node. + * + * @param node The node whose degree is to be returned. + * @return The degree of the node. + */ + [[nodiscard]] NodeID degree(const NodeID node) const { + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + _nodes[node]; + const std::uint8_t *next_node_data = data + _nodes[node + 1]; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) [[unlikely]] { + return 0; + } + + const auto header = decode_header(node, node_data, next_node_data); + return header.degree; + } + + /** + * Returns incident edges of a nodes. + * + * @param node The node whose incident edges is to be returned. + * @return The incident edges of the node. + */ + [[nodiscard]] IotaRange incident_edges(const NodeID node) const { + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + _nodes[node]; + const std::uint8_t *next_node_data = data + _nodes[node + 1]; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) [[unlikely]] { + return {0, 0}; + } + + const auto header = decode_header(node, node_data, next_node_data); + return {header.first_edge, header.first_edge + header.degree}; + } + + /** + * Decodes a neighborhood and invokes a caller with each adjacent node and corresponding edge + * weight. + * + * @tparam kParallelDecoding Whether to decode the neighborhood in parallel. + * @tparam Lambda The type of the caller to invoke. + * @param u The node whose neighborhood is to be decoded. + * @param l The caller to invoke. + */ + template + void decode(const NodeID u, Lambda &&l) const { + KASSERT(u < num_nodes()); + constexpr bool kInvokeDirectly = std::is_invocable_v; + + if (_has_edge_weights) [[unlikely]] { + decode_neighborhood(u, std::forward(l)); + } else { + if constexpr (kInvokeDirectly) { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return l(e, v, 1); + }); + } else { + decode_neighborhood(u, [&](auto &&l2) { + l([&](auto &&l3) { l2([&](const EdgeID e, const NodeID v) { return l3(e, v, 1); }); }); + }); + } + } + } + + /** + * Decodes the leading edges of a neighborhood and invokes a caller with each adjacent node and + * corresponding edge weight. + * + * @tparam Lambda The type of the caller to invoke. + * @param u The node whose neighborhood is to be decoded. + * @param max_num_neighbors The number of neighbors to decode. + * @param l The caller to invoke. + */ + template + void decode(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { + KASSERT(u < num_nodes()); + KASSERT(max_num_neighbors > 0); + + static_assert(std::is_invocable_v); + constexpr bool kNonStoppable = + std::is_void_v>; + + NodeID num_neighbors_visited = 1; + const auto invoke_and_check = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + bool abort = num_neighbors_visited++ >= max_num_neighbors; + + if constexpr (kNonStoppable) { + l(e, v, w); + } else { + abort |= l(e, v, w); + } + + return abort; + }; + + if (_has_edge_weights) [[unlikely]] { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + return invoke_and_check(e, v, w); + }); + } else { + decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { + return invoke_and_check(e, v, 1); + }); + } + } + + /** + * Restricts the node array to a specific number of nodes. + * + * @param new_n The new number of nodes. + */ + void restrict_nodes(const NodeID new_n) { + _nodes.restrict(new_n); + } + + /** + * Unrestricts the node array. + */ + void unrestrict_nodes() { + _nodes.unrestrict(); + } + + /** + * Returns the number of nodes. + * + * @return The number of nodes. + */ + [[nodiscard]] EdgeID num_nodes() const { + return _nodes.size() - 1; + } + + /** + * Returns the number of edges. + * + * @return The number of edges. + */ + [[nodiscard]] EdgeID num_edges() const { + return _num_edges; + } + + /** + * Returns whether the edges are weighted. + * + * @return Whether the edges are weighted. + */ + [[nodiscard]] bool has_edge_weights() const { + return _has_edge_weights; + } + + /** + * Returns the total edge weight. + * + * @return The total edge weight. + */ + [[nodiscard]] bool total_edge_weight() const { + return _total_edge_weight; + } + + /*! + * Returns the number of nodes that have high degree. + * + * @returns The number of nodes that have high degree. + */ + [[nodiscard]] std::size_t num_high_degree_nodes() const { + return _num_high_degree_nodes; + } + + /*! + * Returns the total number of parts that result from splitting high degree neighborhoods. + * + * @returns The total number of parts that result from splitting high degree neighborhoods. + */ + [[nodiscard]] std::size_t num_high_degree_parts() const { + return _num_high_degree_parts; + } + + /*! + * Returns the number of nodes that have at least one interval. + * + * @returns The number of nodes that have at least one interval. + */ + [[nodiscard]] std::size_t num_interval_nodes() const { + return _num_interval_nodes; + } + + /*! + * Returns the total number of intervals. + * + * @returns The total number of intervals. + */ + [[nodiscard]] std::size_t num_intervals() const { + return _num_intervals; + } + + /** + * Returns the used memory space in bytes. + * + * @return The used memory space in bytes. + */ + [[nodiscard]] std::size_t memory_space() const { + return _nodes.allocated_size() + _compressed_edges.size(); + } + + /** + * Returns ownership of the raw node array. + * + * @return Ownership of the raw node array. + */ + [[nodiscard]] CompactStaticArray &&take_raw_nodes() { + return std::move(_nodes); + } + + /** + * Returns a reference to the raw node array. + * + * @return A reference to the raw node array. + */ + [[nodiscard]] CompactStaticArray &raw_nodes() { + return _nodes; + } + + /** + * Returns a reference to the raw node array. + * + * @return A reference to the raw node array. + */ + [[nodiscard]] const CompactStaticArray &raw_nodes() const { + return _nodes; + } + + /** + * Returns a reference to the raw compressed edges. + * + * @return A reference to the raw compressed edges. + */ + [[nodiscard]] const StaticArray &raw_compressed_edges() const { + return _compressed_edges; + } + +private: + CompactStaticArray _nodes; + StaticArray _compressed_edges; + + EdgeID _num_edges; + NodeID _max_degree; + + bool _has_edge_weights; + EdgeWeight _total_edge_weight; + + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; + +private: + template + void decode_neighborhood(const NodeID node, Lambda &&l) const { + constexpr bool kInvokeDirectly = []() { + if constexpr (kHasEdgeWeights) { + return std::is_invocable_v; + } else { + return std::is_invocable_v; + } + }(); + + const std::uint8_t *data = _compressed_edges.data(); + + const std::uint8_t *node_data = data + _nodes[node]; + const std::uint8_t *next_node_data = data + _nodes[node + 1]; + + const bool is_isolated_node = node_data == next_node_data; + if (is_isolated_node) [[unlikely]] { + return; + } + + const auto header = decode_header(node, node_data, next_node_data); + node_data += header.length; + + if constexpr (kHighDegreeEncoding) { + if (header.degree >= kHighDegreeThreshold) { + decode_parts( + node_data, node, header.degree, header.first_edge, std::forward(l) + ); + return; + } + } + + invoke_indirect(std::forward(l), [&](auto &&l2) { + decode_edges( + node_data, + node, + header.degree, + header.first_edge, + header.uses_intervals, + std::forward(l2) + ); + }); + } + + [[nodiscard]] NeighborhoodHeader decode_header( + const NodeID node, + const std::uint8_t *const node_data, + const std::uint8_t *const next_node_data + ) const { + const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { + if constexpr (kIntervalEncoding) { + const auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); + const auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); + + return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); + } else { + const auto [first_edge, len] = varint_decode(node_data); + const auto [next_first_edge, _] = varint_decode(next_node_data); + + return std::make_tuple(first_edge, next_first_edge, false, len); + } + }(); + + if constexpr (kIsolatedNodesSeparation) { + const EdgeID ungapped_first_edge = first_edge + node; + const NodeID degree = static_cast(1 + next_first_edge - first_edge); + return {ungapped_first_edge, degree, uses_intervals, len}; + } else { + const NodeID degree = static_cast(next_first_edge - first_edge); + return {first_edge, degree, uses_intervals, len}; + } + } + + template + void decode_parts( + const std::uint8_t *data, + const NodeID node, + const NodeID degree, + const EdgeID edge, + Lambda &&l + ) const { + constexpr bool kInvokeDirectly = []() { + if constexpr (kHasEdgeWeights) { + return std::is_invocable_v; + } else { + return std::is_invocable_v; + } + }(); + + const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); + + const auto iterate_part = [&](const NodeID part) { + const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); + const std::uint8_t *part_data = data + part_offset; + + const NodeID part_count_m1 = part_count - 1; + const bool last_part = part == part_count_m1; + + const EdgeID part_edge = edge + kHighDegreePartLength * part; + const NodeID part_degree = + last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; + + return invoke_indirect2(std::forward(l), [&](auto &&l2) { + return decode_edges( + part_data, node, part_degree, part_edge, true, std::forward(l2) + ); + }); + }; + + if constexpr (kParallelDecoding) { + tbb::parallel_for(0, part_count, std::forward(iterate_part)); + } else { + for (NodeID part = 0; part < part_count; ++part) { + const bool stop = iterate_part(part); + if (stop) { + return; + } + } + } + } + + template + bool decode_edges( + const std::uint8_t *data, + const NodeID node, + const NodeID degree, + EdgeID edge, + bool uses_intervals, + Lambda &&l + ) const { + const EdgeID max_edge = edge + degree; + EdgeWeight prev_edge_weight = 0; + + if constexpr (kIntervalEncoding) { + if (uses_intervals) { + const bool stop = decode_intervals( + data, edge, prev_edge_weight, std::forward(l) + ); + if (stop) { + return true; + } + + if (edge == max_edge) { + return false; + } + } + } + + return decode_gaps( + data, node, edge, prev_edge_weight, max_edge, std::forward(l) + ); + } + + template + bool decode_intervals( + const std::uint8_t *&data, EdgeID &edge, EdgeWeight &prev_edge_weight, Lambda &&l + ) const { + using LambdaReturnType = std::conditional_t< + kHasEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto invoke_caller = [&](const NodeID adjacent_node) { + if constexpr (kHasEdgeWeights) { + const auto [edge_weight_gap, length] = signed_varint_decode(data); + data += length; + + const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; + prev_edge_weight = edge_weight; + + return l(edge, adjacent_node, edge_weight); + } else { + return l(edge, adjacent_node); + } + }; + + const NodeID interval_count = *((NodeID *)data); + data += sizeof(NodeID); + + NodeID previous_right_extreme = 2; + for (NodeID i = 0; i < interval_count; ++i) { + const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); + data += left_extreme_gap_len; + + const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); + data += interval_length_gap_len; + + const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; + const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; + previous_right_extreme = cur_left_extreme + cur_interval_len - 1; + + for (NodeID j = 0; j < cur_interval_len; ++j) { + if constexpr (kNonStoppable) { + invoke_caller(cur_left_extreme + j); + } else { + const bool stop = invoke_caller(cur_left_extreme + j); + if (stop) { + return true; + } + } + + edge += 1; + } + } + + return false; + } + + template + bool decode_gaps( + const std::uint8_t *data, + NodeID node, + EdgeID &edge, + EdgeWeight &prev_edge_weight, + const EdgeID max_edge, + Lambda &&l + ) const { + using LambdaReturnType = std::conditional_t< + kHasEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto invoke_caller = [&](const NodeID adjacent_node) { + if constexpr (kHasEdgeWeights) { + const auto [edge_weight_gap, length] = signed_varint_decode(data); + data += length; + + const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; + prev_edge_weight = edge_weight; + return l(edge, adjacent_node, edge_weight); + } else { + return l(edge, adjacent_node); + } + }; + + const auto [first_gap, first_gap_len] = signed_varint_decode(data); + data += first_gap_len; + + const NodeID first_adjacent_node = static_cast(first_gap + node); + NodeID prev_adjacent_node = first_adjacent_node; + + if constexpr (kNonStoppable) { + invoke_caller(first_adjacent_node); + } else { + const bool stop = invoke_caller(first_adjacent_node); + if (stop) { + return true; + } + } + edge += 1; + + /* + const auto handle_gap = [&](const NodeID gap) { + const NodeID adjacent_node = gap + prev_adjacent_node + 1; + prev_adjacent_node = adjacent_node; + + if constexpr (kNonStoppable) { + l(edge++, adjacent_node); + } else { + return l(edge++, adjacent_node); + } + }; + */ + + if constexpr (kRunLengthEncoding) { + // VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); + // rl_decoder.decode(std::forward(handle_gap)); + } else if constexpr (kStreamEncoding) { + // VarIntStreamDecoder sv_encoder(data, max_edge - edge); + // sv_encoder.decode(std::forward(handle_gap)); + } else { + while (edge != max_edge) { + const auto [gap, gap_len] = varint_decode(data); + data += gap_len; + + const NodeID adjacent_node = gap + prev_adjacent_node + 1; + prev_adjacent_node = adjacent_node; + + if constexpr (kNonStoppable) { + invoke_caller(adjacent_node); + } else { + const bool stop = invoke_caller(adjacent_node); + if (stop) { + return true; + } + } + + edge += 1; + } + } + + return false; + } +}; + +} // namespace kaminpar diff --git a/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h b/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h new file mode 100644 index 00000000..2d7e79fb --- /dev/null +++ b/kaminpar-common/graph-compression/compressed_neighborhoods_builder.h @@ -0,0 +1,286 @@ +/******************************************************************************* + * Compressed neighborhoods builder. + * + * @file: compressed_neighborhoods_builder.h + * @author: Daniel Salwasser + * @date: 09.07.2024 + ******************************************************************************/ +#pragma once + +#include "kaminpar-common/datastructures/compact_static_array.h" +#include "kaminpar-common/graph-compression/compressed_edges_builder.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods.h" + +namespace kaminpar { + +template +class CompressedNeighborhoodsBuilder { + using CompressedEdgesBuilder = kaminpar::CompressedEdgesBuilder; + using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; + +public: + /*! + * Constructs a new CompressedNeighborhoodsBuilder. + * + * @param num_nodes The number of nodes of the graph to compress. + * @param num_edges The number of edges of the graph to compress. + * @param has_edge_weights Whether edge weights are stored. + */ + CompressedNeighborhoodsBuilder( + const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights + ) + : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights), + _num_edges(num_edges), + _has_edge_weights(has_edge_weights) { + + const std::size_t max_size = CompressedEdgesBuilder::compressed_edge_array_max_size( + num_nodes, num_edges, has_edge_weights + ); + _nodes.resize(math::byte_width(max_size), num_nodes + 1); + _compressed_edges_builder.init(0); + } + + /*! + * Adds the (possibly weighted) neighborhood of a node. Note that the neighbourhood vector is + * modified. + * + * @param node The node whose neighborhood to add. + * @param neighbourhood The neighbourhood of the node to add. + */ + template void add(const NodeID node, Container &neighbourhood) { + KASSERT(node + 1 < _nodes.size()); + + const EdgeID offset = _compressed_edges_builder.add(node, neighbourhood); + _nodes.write(node, offset); + } + + /*! + * Builds the compressed neighborhoods. The builder must then be reinitialized in order to + * compress further neighborhoods. + * + * @return The compressed neighborhoods that have been build. + */ + CompressedNeighborhoods build() { + std::size_t compressed_edges_size = _compressed_edges_builder.size(); + auto compressed_edges = _compressed_edges_builder.take_compressed_data(); + + // Store in the last entry of the node array the offset one after the last byte belonging to the + // last node. + _nodes.write(_nodes.size() - 1, static_cast(compressed_edges_size)); + + // Store at the end of the compressed edge array the (gap of the) id of the last edge. This + // ensures that the the degree of the last node can be computed from the difference between the + // last two first edge ids. + const EdgeID last_edge = _num_edges; + std::uint8_t *compressed_edges_end = compressed_edges.get() + compressed_edges_size; + if constexpr (CompressedNeighborhoods::kIntervalEncoding) { + compressed_edges_size += marked_varint_encode(last_edge, false, compressed_edges_end); + } else { + compressed_edges_size += varint_encode(last_edge, compressed_edges_end); + } + + // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to + // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. + if constexpr (CompressedNeighborhoods::kStreamEncoding) { + compressed_edges_size += 15; + } + + if constexpr (kHeapProfiling) { + heap_profiler::HeapProfiler::global().record_alloc( + compressed_edges.get(), compressed_edges_size + ); + } + + return CompressedNeighborhoods( + std::move(_nodes), + StaticArray(compressed_edges_size, std::move(compressed_edges)), + _compressed_edges_builder.max_degree(), + _num_edges, + _has_edge_weights, + _compressed_edges_builder.total_edge_weight(), + _compressed_edges_builder.num_high_degree_nodes(), + _compressed_edges_builder.num_high_degree_parts(), + _compressed_edges_builder.num_interval_nodes(), + _compressed_edges_builder.num_intervals() + ); + } + + /*! + * Returns the used memory of the compressed neighborhoods. + * + * @return The used memory of the compressed neighborhoods. + */ + [[nodiscard]] std::size_t currently_used_memory() const { + return _nodes.allocated_size() + _compressed_edges_builder.size(); + } + + /*! + * Returns the total edge weight. + * + * @return The total edge weight. + */ + [[nodiscard]] std::int64_t total_edge_weight() const { + return _compressed_edges_builder.total_edge_weight(); + } + +private: + CompactStaticArray _nodes; + CompressedEdgesBuilder _compressed_edges_builder; + EdgeID _num_edges; + bool _has_edge_weights; +}; + +template +class ParallelCompressedNeighborhoodsBuilder { + using CompressedEdgesBuilder = kaminpar::CompressedEdgesBuilder; + using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; + +public: + /*! + * Constructs a new ParallelCompressedNeighborhoodsBuilder. + * + * @param num_nodes The number of nodes of the graph to compress. + * @param num_edges The number of edges of the graph to compress. + * @param has_edge_weights Whether edge weights are stored. + */ + ParallelCompressedNeighborhoodsBuilder( + const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights + ) + : _num_edges(num_edges), + _max_degree(0), + _has_edge_weights(has_edge_weights), + _total_edge_weight(0), + _num_high_degree_nodes(0), + _num_high_degree_parts(0), + _num_interval_nodes(0), + _num_intervals(0) { + const std::size_t max_size = CompressedEdgesBuilder::compressed_edge_array_max_size( + num_nodes, num_edges, has_edge_weights + ); + _nodes.resize(math::byte_width(max_size), num_nodes + 1); + _compressed_edges = heap_profiler::overcommit_memory(max_size); + _compressed_edges_size = 0; + } + + /*! + * Adds a node to the compressed neighborhoods. + * + * @param node The node to add. + * @param offset The offset into the compressed edge array at which the compressed neighborhood + * of the node is stored. + */ + void add_node(const NodeID node, const EdgeID offset) { + _nodes.write(node, offset); + } + + /** + * Adds compressed neighborhoods of possible multiple consecutive nodes to the compressed graph. + * + * @param offset The offset into the compressed edge array at which the compressed neighborhoods + * are stored. + * @param length The length in bytes of the compressed neighborhoods to store. + * @param data A pointer to the start of the compressed neighborhoods to copy. + */ + void add_compressed_edges(const EdgeID offset, const EdgeID length, const std::uint8_t *data) { + __atomic_fetch_add(&_compressed_edges_size, length, __ATOMIC_RELAXED); + std::memcpy(_compressed_edges.get() + offset, data, length); + } + + /*! + * Adds (cummulative) statistics about nodes of the compressed graph. + */ + void record_local_statistics( + NodeID max_degree, + EdgeWeight edge_weight, + std::size_t num_high_degree_nodes, + std::size_t num_high_degree_parts, + std::size_t num_interval_nodes, + std::size_t num_intervals + ) { + NodeID global_max_degree = __atomic_load_n(&_max_degree, __ATOMIC_RELAXED); + while (max_degree > global_max_degree) { + const bool success = __atomic_compare_exchange_n( + &_max_degree, &global_max_degree, max_degree, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + ); + + if (success) { + break; + } + } + + __atomic_fetch_add(&_total_edge_weight, edge_weight, __ATOMIC_RELAXED); + + __atomic_fetch_add(&_num_high_degree_nodes, num_high_degree_nodes, __ATOMIC_RELAXED); + __atomic_fetch_add(&_num_high_degree_parts, num_high_degree_parts, __ATOMIC_RELAXED); + __atomic_fetch_add(&_num_interval_nodes, num_interval_nodes, __ATOMIC_RELAXED); + __atomic_fetch_add(&_num_intervals, num_intervals, __ATOMIC_RELAXED); + } + + /*! + * Finalizes the compressed neighborhoods. Note that all nodes and compressed neighborhoods have + * to be added at this point. The builder must then be reinitialized in order to compress further + * neighborhoods. + * + * @return The compressed neighborhoods that have been build. + */ + [[nodiscard]] CompressedNeighborhoods build() { + // Store in the last entry of the node array the offset one after the last byte belonging to the + // last node. + _nodes.write(_nodes.size() - 1, _compressed_edges_size); + + // Store at the end of the compressed edge array the (gap of the) id of the last edge. This + // ensures that the the degree of the last node can be computed from the difference between the + // last two first edge ids. + std::uint8_t *_compressed_edges_end = _compressed_edges.get() + _compressed_edges_size; + const EdgeID last_edge = _num_edges; + if constexpr (CompressedNeighborhoods::kIntervalEncoding) { + _compressed_edges_size += marked_varint_encode(last_edge, false, _compressed_edges_end); + } else { + _compressed_edges_size += varint_encode(last_edge, _compressed_edges_end); + } + + // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to + // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. + if constexpr (CompressedNeighborhoods::kStreamEncoding) { + _compressed_edges_size += 15; + } + + if constexpr (kHeapProfiling) { + heap_profiler::HeapProfiler::global().record_alloc( + _compressed_edges.get(), _compressed_edges_size + ); + } + + return CompressedNeighborhoods( + std::move(_nodes), + StaticArray(_compressed_edges_size, std::move(_compressed_edges)), + _max_degree, + _num_edges, + _has_edge_weights, + _total_edge_weight, + _num_high_degree_nodes, + _num_high_degree_parts, + _num_interval_nodes, + _num_intervals + ); + } + +private: + CompactStaticArray _nodes; + heap_profiler::unique_ptr _compressed_edges; + EdgeID _compressed_edges_size; + + EdgeID _num_edges; + NodeID _max_degree; + + bool _has_edge_weights; + EdgeWeight _total_edge_weight; + + // Statistics about graph compression + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; +}; + +} // namespace kaminpar diff --git a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc index df330035..3afff313 100644 --- a/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc +++ b/kaminpar-dist/coarsening/clustering/hem/hem_clusterer.cc @@ -216,7 +216,7 @@ template class HEMClustererImpl { NodeID best_neighbor = 0; EdgeWeight best_weight = 0; - _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight e_weight) { // v already matched? if (_matching[v] != kInvalidGlobalNodeID) { return; @@ -229,7 +229,6 @@ template class HEMClustererImpl { } // Already found a better neighbor? - const EdgeWeight e_weight = _graph->edge_weight(e); if (e_weight < best_weight) { return; } @@ -276,13 +275,13 @@ template class HEMClustererImpl { seq_from, seq_to, [&](const NodeID seq_u) { return _color_sorted_nodes[seq_u]; }, - [&](const NodeID u, EdgeID, const NodeID v) { + [&](const NodeID u, EdgeID, const NodeID v, EdgeWeight) { return _matching[u] == _graph->local_to_global_node(v); }, - [&](const NodeID u, const EdgeID e, const NodeID v, const PEID pe) -> MatchRequest { + [&](const NodeID u, const EdgeID e, const NodeID v, const EdgeWeight w, const PEID pe) { const GlobalNodeID v_global = _graph->local_to_global_node(v); const NodeID their_v = static_cast(v_global - _graph->offset_n(pe)); - return {u, their_v, _graph->edge_weight(e)}; + return MatchRequest(u, their_v, w); } ); @@ -451,10 +450,10 @@ template class HEMClustererImpl { }; mpi::graph::sparse_alltoall_interface_to_ghost( *_graph, - [&](const NodeID u, EdgeID, const NodeID v) -> bool { + [&](const NodeID u, EdgeID, const NodeID v, EdgeWeight) -> bool { return _matching[u] == _graph->local_to_global_node(v); }, - [&](const NodeID u, EdgeID, NodeID) -> MatchedEdge { + [&](const NodeID u, EdgeID, NodeID, EdgeWeight) -> MatchedEdge { return {_graph->local_to_global_node(u), _matching[u]}; }, [&](const auto &r, const PEID pe) { diff --git a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc index ef5841c8..757137c5 100644 --- a/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/global_cluster_contraction.cc @@ -270,13 +270,13 @@ find_nonlocal_edges(const Graph &graph, const StaticArray &lnode_t if (!graph.is_owned_global_node(gcluster_u)) { NodeID pos = edge_position_buffer[lnode_u]; - graph.neighbors(lnode_u, [&](const EdgeID e, const NodeID lnode_v) { + graph.adjacent_nodes(lnode_u, [&](const NodeID lnode_v, const EdgeWeight w) { const GlobalNodeID gcluster_v = lnode_to_gcluster[lnode_v]; if (gcluster_u != gcluster_v) { nonlocal_edges[pos] = { .u = gcluster_u, .v = gcluster_v, - .weight = graph.edge_weight(e), + .weight = w, }; ++pos; } @@ -1387,8 +1387,8 @@ std::unique_ptr contract_clustering( if (u < graph.n()) { c_u_weight += graph.node_weight(u); - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { - handle_edge_to_lnode(graph.edge_weight(e), v); + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { + handle_edge_to_lnode(w, v); }); } else { // Fix node weight later diff --git a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc index 4fe53a28..4b5b2702 100644 --- a/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc +++ b/kaminpar-dist/coarsening/contraction/local_cluster_contraction.cc @@ -199,10 +199,10 @@ std::unique_ptr contract_local_clustering( KASSERT(mapping[u] == c_u); // collect coarse edges - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const NodeID c_v = mapping[v]; if (c_u != c_v) { - map[c_v] += graph.edge_weight(e); + map[c_v] += w; } }); } diff --git a/kaminpar-dist/context.cc b/kaminpar-dist/context.cc index 7dda0206..67bb9d14 100644 --- a/kaminpar-dist/context.cc +++ b/kaminpar-dist/context.cc @@ -8,6 +8,7 @@ #include "kaminpar-dist/context.h" #include +#include #include diff --git a/kaminpar-dist/context_io.cc b/kaminpar-dist/context_io.cc index 8443233b..d9122b97 100644 --- a/kaminpar-dist/context_io.cc +++ b/kaminpar-dist/context_io.cc @@ -357,7 +357,7 @@ void print( const bool print_compression_details, std::ostream &out ) { - using Compression = DistributedCompressedGraph::CompressedEdges; + using Compression = DistributedCompressedGraph::CompressedNeighborhoods; const auto round = [](const auto value) { return std::ceil(value * 1000.0) / 1000.0; diff --git a/kaminpar-dist/datastructures/abstract_distributed_graph.h b/kaminpar-dist/datastructures/abstract_distributed_graph.h index aa8de8e9..9682e900 100644 --- a/kaminpar-dist/datastructures/abstract_distributed_graph.h +++ b/kaminpar-dist/datastructures/abstract_distributed_graph.h @@ -63,7 +63,6 @@ class AbstractDistributedGraph { [[nodiscard]] virtual GlobalNodeWeight global_total_node_weight() const = 0; [[nodiscard]] virtual bool is_edge_weighted() const = 0; - [[nodiscard]] virtual EdgeWeight edge_weight(const EdgeID e) const = 0; [[nodiscard]] virtual EdgeWeight total_edge_weight() const = 0; [[nodiscard]] virtual GlobalEdgeWeight global_total_edge_weight() const = 0; @@ -94,7 +93,6 @@ class AbstractDistributedGraph { [[nodiscard]] virtual NodeID degree(const NodeID u) const = 0; [[nodiscard]] virtual const StaticArray &node_weights() const = 0; - [[nodiscard]] virtual const StaticArray &edge_weights() const = 0; virtual void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) = 0; diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.cc b/kaminpar-dist/datastructures/distributed_compressed_graph.cc index 1c542f52..52190f63 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.cc +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.cc @@ -85,18 +85,13 @@ void DistributedCompressedGraph::init_total_weights() { _max_node_weight = 1; } - if (is_edge_weighted()) { - _total_edge_weight = parallel::accumulate(_edge_weights.begin(), _edge_weights.end(), 0); - } else { - _total_edge_weight = m(); - } - _global_total_node_weight = mpi::allreduce(_total_node_weight, MPI_SUM, communicator()); _global_max_node_weight = mpi::allreduce(_max_node_weight, MPI_MAX, communicator()); - _global_total_edge_weight = - mpi::allreduce(_total_edge_weight, MPI_SUM, communicator()); + _global_total_edge_weight = mpi::allreduce( + _compressed_neighborhoods.total_edge_weight(), MPI_SUM, communicator() + ); } void DistributedCompressedGraph::init_communication_metrics() { diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph.h b/kaminpar-dist/datastructures/distributed_compressed_graph.h index ad986bc7..8be1feb6 100644 --- a/kaminpar-dist/datastructures/distributed_compressed_graph.h +++ b/kaminpar-dist/datastructures/distributed_compressed_graph.h @@ -17,11 +17,12 @@ #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/degree_buckets.h" -#include "kaminpar-common/graph-compression/compressed_edges.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods.h" namespace kaminpar::dist { class DistributedCompressedGraph : public AbstractDistributedGraph { + public: // Data types used for this graph using AbstractDistributedGraph::EdgeID; @@ -33,13 +34,12 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { using AbstractDistributedGraph::NodeID; using AbstractDistributedGraph::NodeWeight; - using CompressedEdges = kaminpar::CompressedEdges; + using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; DistributedCompressedGraph( StaticArray node_distribution, StaticArray edge_distribution, - StaticArray nodes, - CompressedEdges compressed_edges, + CompressedNeighborhoods compressed_neighborhoods, StaticArray ghost_owner, StaticArray ghost_to_global, growt::StaticGhostNodeMapping global_to_ghost, @@ -49,9 +49,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { : DistributedCompressedGraph( std::move(node_distribution), std::move(edge_distribution), - std::move(nodes), - std::move(compressed_edges), - {}, + std::move(compressed_neighborhoods), {}, std::move(ghost_owner), std::move(ghost_to_global), @@ -63,10 +61,8 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { DistributedCompressedGraph( StaticArray node_distribution, StaticArray edge_distribution, - StaticArray nodes, - CompressedEdges compressed_edges, + CompressedNeighborhoods compressed_neighborhoods, StaticArray node_weights, - StaticArray edge_weights, StaticArray ghost_owner, StaticArray ghost_to_global, growt::StaticGhostNodeMapping global_to_ghost, @@ -75,10 +71,8 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { ) : _node_distribution(std::move(node_distribution)), _edge_distribution(std::move(edge_distribution)), - _nodes(std::move(nodes)), - _compressed_edges(std::move(compressed_edges)), + _compressed_neighborhoods(std::move(compressed_neighborhoods)), _node_weights(std::move(node_weights)), - _edge_weights(std::move(edge_weights)), _ghost_owner(std::move(ghost_owner)), _ghost_to_global(std::move(ghost_to_global)), _global_to_ghost(std::move(global_to_ghost)), @@ -86,8 +80,8 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { _communicator(comm) { const PEID rank = mpi::get_comm_rank(communicator()); - _n = _nodes.size() - 1; - _m = _compressed_edges.num_edges(); + _n = _compressed_neighborhoods.num_nodes(); + _m = compressed_neighborhoods.num_edges(); _ghost_n = _ghost_to_global.size(); _offset_n = _node_distribution[rank]; _offset_m = _edge_distribution[rank]; @@ -190,15 +184,11 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { } [[nodiscard]] inline bool is_edge_weighted() const final { - return !_edge_weights.empty(); - } - - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { - return is_edge_weighted() ? _edge_weights[e] : 1; + return _compressed_neighborhoods.has_edge_weights(); } [[nodiscard]] inline EdgeWeight total_edge_weight() const final { - return _total_edge_weight; + return _compressed_neighborhoods.total_edge_weight(); } [[nodiscard]] inline GlobalEdgeWeight global_total_edge_weight() const final { @@ -291,7 +281,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { } [[nodiscard]] inline IotaRange incident_edges(const NodeID u) const final { - return _compressed_edges.incident_edges(u, _nodes[u], _nodes[u + 1]); + return _compressed_neighborhoods.incident_edges(u); } // @@ -299,23 +289,47 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { // template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { - _compressed_edges.decode_neighborhood( - u, - _nodes[u], - _nodes[u + 1], - [&](const EdgeID incident_edge, const NodeID adjacent_node) { return l(adjacent_node); } - ); + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + _compressed_neighborhoods.decode(u, [&](const EdgeID, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(v, w); + } else { + return l(v); + } + }); } template inline void neighbors(const NodeID u, Lambda &&l) const { - _compressed_edges.decode_neighborhood(u, _nodes[u], _nodes[u + 1], std::forward(l)); + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + _compressed_neighborhoods.decode(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(e, v, w); + } else { + return l(e, v); + } + }); } template inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { - _compressed_edges.decode_neighborhood( - u, max_num_neighbors, _nodes[u], _nodes[u + 1], std::forward(l) - ); + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + _compressed_neighborhoods + .decode(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(e, v, w); + } else { + return l(e, v); + } + }); } // @@ -362,17 +376,13 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { [[nodiscard]] inline NodeID degree(const NodeID u) const final { KASSERT(is_owned_node(u)); - return _compressed_edges.degree(u, _nodes[u], _nodes[u + 1]); + return _compressed_neighborhoods.degree(u); } [[nodiscard]] inline const StaticArray &node_weights() const final { return _node_weights; } - [[nodiscard]] inline const StaticArray &edge_weights() const final { - return _edge_weights; - } - inline void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) final { KASSERT(is_ghost_node(ghost_node)); KASSERT(is_node_weighted()); @@ -510,7 +520,7 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { [[nodiscard]] double compression_ratio() const { std::size_t uncompressed_size = (n() + 1) * sizeof(EdgeID) + m() * sizeof(NodeID); - std::size_t compressed_size = (n() + 1) * sizeof(EdgeID) + _compressed_edges.size(); + std::size_t compressed_size = _compressed_neighborhoods.memory_space(); if (is_node_weighted()) { uncompressed_size += n() * sizeof(NodeWeight); @@ -519,23 +529,18 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { if (is_edge_weighted()) { uncompressed_size += m() * sizeof(EdgeWeight); - compressed_size += m() * sizeof(EdgeWeight); } return uncompressed_size / static_cast(compressed_size); } [[nodiscard]] std::size_t memory_space() const { - std::size_t memory_space = (n() + 1) * sizeof(EdgeID) + _compressed_edges.size(); + std::size_t memory_space = _compressed_neighborhoods.memory_space(); if (is_node_weighted()) { memory_space += n() * sizeof(NodeWeight); } - if (is_edge_weighted()) { - memory_space += m() * sizeof(EdgeWeight); - } - return memory_space; } @@ -561,10 +566,6 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { return _node_weights; } - [[nodiscard]] const auto &raw_edge_weights() const { - return _edge_weights; - } - private: void init_degree_buckets(); void init_total_weights(); @@ -583,16 +584,13 @@ class DistributedCompressedGraph : public AbstractDistributedGraph { NodeWeight _max_node_weight{}; NodeWeight _global_max_node_weight{}; - EdgeWeight _total_edge_weight{}; GlobalEdgeWeight _global_total_edge_weight{}; StaticArray _node_distribution{}; StaticArray _edge_distribution{}; - StaticArray _nodes{}; - CompressedEdges _compressed_edges; + CompressedNeighborhoods _compressed_neighborhoods; StaticArray _node_weights{}; - StaticArray _edge_weights{}; StaticArray _ghost_owner{}; StaticArray _ghost_to_global{}; diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc deleted file mode 100644 index d818ed11..00000000 --- a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.cc +++ /dev/null @@ -1,157 +0,0 @@ -/******************************************************************************* - * Sequential builder for distributed compressed graphs. - * - * @file: distributed_compressed_graph_builder.h - * @author: Daniel Salwasser - * @date: 07.06.2024 - ******************************************************************************/ -#include "kaminpar-dist/datastructures/distributed_compressed_graph_builder.h" - -#include "kaminpar-dist/datastructures/ghost_node_mapper.h" -#include "kaminpar-dist/graphutils/synchronization.h" - -#include "kaminpar-common/assert.h" - -namespace kaminpar::dist { - -DistributedCompressedGraph -DistributedCompressedGraphBuilder::compress(const DistributedCSRGraph &graph) { - const mpi::PEID size = mpi::get_comm_size(graph.communicator()); - const mpi::PEID rank = mpi::get_comm_rank(graph.communicator()); - - StaticArray node_distribution( - graph.node_distribution().begin(), graph.node_distribution().end() - ); - StaticArray edge_distribution( - graph.edge_distribution().begin(), graph.edge_distribution().end() - ); - - graph::GhostNodeMapper mapper(rank, node_distribution); - DistributedCompressedGraphBuilder builder( - graph.n(), graph.m(), graph.is_node_weighted(), graph.is_edge_weighted(), graph.sorted() - ); - - const NodeID first_node = node_distribution[rank]; - const NodeID last_node = node_distribution[rank + 1]; - - const auto &raw_nodes = graph.raw_nodes(); - const auto &raw_edges = graph.raw_nodes(); - const auto &raw_node_weights = graph.raw_nodes(); - - std::vector> neighbourhood; - for (const NodeID u : graph.nodes()) { - graph.neighbors(u, [&](const EdgeID e, const NodeID adjacent_node) { - const EdgeWeight edge_weight = graph.is_edge_weighted() ? graph.edge_weight(e) : 1; - - if (graph.is_owned_node(adjacent_node)) { - neighbourhood.emplace_back(adjacent_node, edge_weight); - } else { - const NodeID original_adjacent_node = graph.local_to_global_node(adjacent_node); - neighbourhood.emplace_back(mapper.new_ghost_node(original_adjacent_node), edge_weight); - } - }); - - builder.add_node(u, neighbourhood); - neighbourhood.clear(); - } - - StaticArray node_weights; - if (graph.is_node_weighted()) { - node_weights.resize(graph.n() + mapper.next_ghost_node(), static_array::noinit); - - tbb::parallel_for(tbb::blocked_range(0, graph.n()), [&](const auto &r) { - for (NodeID u = r.begin(); u != r.end(); ++u) { - node_weights[u] = raw_node_weights[first_node + u]; - } - }); - } - - auto [global_to_ghost, ghost_to_global, ghost_owner] = mapper.finalize(); - auto [nodes, edges, edge_weights] = builder.build(); - - DistributedCompressedGraph compressed_graph( - std::move(node_distribution), - std::move(edge_distribution), - std::move(nodes), - std::move(edges), - std::move(node_weights), - std::move(edge_weights), - std::move(ghost_owner), - std::move(ghost_to_global), - std::move(global_to_ghost), - graph.sorted(), - graph.communicator() - ); - return compressed_graph; -} - -DistributedCompressedGraphBuilder::DistributedCompressedGraphBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted -) - : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights, _edge_weights) { - _sorted = sorted; - _nodes.resize(num_nodes + 1, static_array::noinit); - - _num_edges = num_edges; - _compressed_edges_builder.init(0); - - if (has_edge_weights) { - _edge_weights.resize(num_edges, static_array::noinit); - } -} - -void DistributedCompressedGraphBuilder::add_node( - const NodeID node, std::vector> &neighbourhood -) { - KASSERT(node + 1 < _nodes.size()); - - const EdgeID offset = _compressed_edges_builder.add(node, neighbourhood); - _nodes[node] = offset; -} - -std::tuple, CompressedEdges, StaticArray> -DistributedCompressedGraphBuilder::build() { - std::size_t compressed_edges_size = _compressed_edges_builder.size(); - heap_profiler::unique_ptr wrapped_compressed_edges = - _compressed_edges_builder.take_compressed_data(); - - // Store in the last entry of the node array the offset one after the last byte belonging to the - // last node. - _nodes[_nodes.size() - 1] = static_cast(compressed_edges_size); - - // Store at the end of the compressed edge array the (gap of the) id of the last edge. This - // ensures that the the degree of the last node can be computed from the difference between - // the last two first edge ids. - const EdgeID last_edge = _num_edges; - std::uint8_t *compressed_edges_end = wrapped_compressed_edges.get() + compressed_edges_size; - if constexpr (CompressedEdges::kIntervalEncoding) { - compressed_edges_size += marked_varint_encode(last_edge, false, compressed_edges_end); - } else { - compressed_edges_size += varint_encode(last_edge, compressed_edges_end); - } - - // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to - // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. - if constexpr (CompressedEdges::kStreamEncoding) { - compressed_edges_size += 15; - } - - if constexpr (kHeapProfiling) { - heap_profiler::HeapProfiler::global().record_alloc( - wrapped_compressed_edges.get(), compressed_edges_size - ); - } - - StaticArray raw_compressed_edges( - compressed_edges_size, std::move(wrapped_compressed_edges) - ); - CompressedEdges compressed_edges(_num_edges, std::move(raw_compressed_edges)); - - return std::make_tuple(std::move(_nodes), std::move(compressed_edges), std::move(_edge_weights)); -} - -} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h b/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h deleted file mode 100644 index 80ea25ce..00000000 --- a/kaminpar-dist/datastructures/distributed_compressed_graph_builder.h +++ /dev/null @@ -1,70 +0,0 @@ -/******************************************************************************* - * Sequential builder for distributed compressed graphs. - * - * @file: distributed_compressed_graph_builder.h - * @author: Daniel Salwasser - * @date: 07.06.2024 - ******************************************************************************/ -#pragma once - -#include - -#include "kaminpar-dist/datastructures/distributed_compressed_graph.h" -#include "kaminpar-dist/datastructures/distributed_csr_graph.h" -#include "kaminpar-dist/dkaminpar.h" - -#include "kaminpar-common/datastructures/static_array.h" -#include "kaminpar-common/graph-compression/compressed_edges_builder.h" - -namespace kaminpar::dist { - -/*! - * A sequential builder that constructs compressed graphs. - */ -class DistributedCompressedGraphBuilder { -public: - [[nodiscard]] static DistributedCompressedGraph compress(const DistributedCSRGraph &graph); - - /*! - * Constructs a new DistributedCompressedGraphBuilder. - * - * @param num_nodes The number of nodes of the graph to compress. - * @param num_edges The number of edges of the graph to compress. - * @param has_node_weights Whether node weights are stored. - * @param has_edge_weights Whether edge weights are stored. - * @param sorted Whether the nodes to add are stored in degree-bucket order. - */ - DistributedCompressedGraphBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted - ); - - /*! - * Adds a node to the compressed graph. Note that the neighbourhood vector is modified. - * - * @param node The node to add. - * @param neighbourhood The neighbourhood of the node to add. - */ - void add_node(const NodeID node, std::vector> &neighbourhood); - - /*! - * Builds the compressed graph. The builder must then be reinitialized in order to compress - * another graph. - * - * @return The components of the compressed graph that has been build. - */ - std::tuple, CompressedEdges, StaticArray> build(); - -private: - bool _sorted; // Whether the nodes of the graph are stored in degree-bucket order - StaticArray _nodes; - - EdgeID _num_edges; - CompressedEdgesBuilder _compressed_edges_builder; - StaticArray _edge_weights; -}; - -} // namespace kaminpar::dist diff --git a/kaminpar-dist/datastructures/distributed_csr_graph.h b/kaminpar-dist/datastructures/distributed_csr_graph.h index ae305672..7d0ec777 100644 --- a/kaminpar-dist/datastructures/distributed_csr_graph.h +++ b/kaminpar-dist/datastructures/distributed_csr_graph.h @@ -194,7 +194,7 @@ class DistributedCSRGraph : public AbstractDistributedGraph { return !_edge_weights.empty(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { + [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { return is_edge_weighted() ? _edge_weights[e] : 1; } @@ -295,56 +295,133 @@ class DistributedCSRGraph : public AbstractDistributedGraph { // template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { - constexpr bool non_stoppable = std::is_invocable_r_v; - static_assert(non_stoppable || std::is_invocable_r_v); - - const EdgeID from = _nodes[u]; - const EdgeID to = _nodes[u + 1]; - for (EdgeID edge = from; edge < to; ++edge) { - if constexpr (non_stoppable) { - l(_edges[edge]); - } else { - if (l(_edges[edge])) { - return; + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_adjacent_nodes = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(_edges[edge], decode_edge_weight(edge)); + } else { + return l(_edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } } } + }; + + if (is_edge_weighted()) { + decode_adjacent_nodes([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_adjacent_nodes([](const EdgeID) { return 1; }); } } template inline void neighbors(const NodeID u, Lambda &&l) const { - constexpr bool non_stoppable = std::is_invocable_r_v; - static_assert(non_stoppable || std::is_invocable_r_v); - - const EdgeID from = _nodes[u]; - const EdgeID to = _nodes[u + 1]; - for (EdgeID edge = from; edge < to; ++edge) { - if constexpr (non_stoppable) { - l(edge, _edges[edge]); - } else { - if (l(edge, _edges[edge])) { - return; + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_neighbors = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(edge, _edges[edge], decode_edge_weight(edge)); + } else { + return l(edge, _edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const EdgeID to = _nodes[u + 1]; + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } } } + }; + + if (is_edge_weighted()) { + decode_neighbors([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_neighbors([](const EdgeID) { return 1; }); } } template inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { - constexpr bool non_stoppable = std::is_invocable_r_v; - static_assert(non_stoppable || std::is_invocable_r_v); - - const EdgeID from = _nodes[u]; - const EdgeID degree = _nodes[u + 1] - from; - const EdgeID to = from + std::min(degree, max_num_neighbors); - - for (EdgeID edge = from; edge < to; ++edge) { - if constexpr (non_stoppable) { - l(edge, _edges[edge]); - } else { - if (l(edge, _edges[edge])) { - return; + KASSERT(u < n()); + + constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; + constexpr bool kDecodeEdgeWeights = std::is_invocable_v; + static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); + + using LambdaReturnType = std::conditional_t< + kDecodeEdgeWeights, + std::invoke_result, + std::invoke_result>::type; + constexpr bool kNonStoppable = std::is_void_v; + + const auto decode_neighbors = [&](auto &&decode_edge_weight) { + const auto invoke_caller = [&](const EdgeID edge) { + if constexpr (kDecodeEdgeWeights) { + return l(edge, _edges[edge], decode_edge_weight(edge)); + } else { + return l(edge, _edges[edge]); + } + }; + + const EdgeID from = _nodes[u]; + const NodeID degree = static_cast(_nodes[u + 1] - from); + const EdgeID to = from + std::min(degree, max_num_neighbors); + for (EdgeID edge = from; edge < to; ++edge) { + if constexpr (kNonStoppable) { + invoke_caller(edge); + } else { + const bool stop = invoke_caller(edge); + if (stop) { + return; + } } } + }; + + if (is_edge_weighted()) { + decode_neighbors([&](const EdgeID edge) { return _edge_weights[edge]; }); + } else { + decode_neighbors([](const EdgeID) { return 1; }); } } @@ -399,7 +476,7 @@ class DistributedCSRGraph : public AbstractDistributedGraph { return _node_weights; } - [[nodiscard]] inline const StaticArray &edge_weights() const final { + [[nodiscard]] inline const StaticArray &edge_weights() const { return _edge_weights; } diff --git a/kaminpar-dist/datastructures/distributed_graph.cc b/kaminpar-dist/datastructures/distributed_graph.cc index 5f8af086..75154bc1 100644 --- a/kaminpar-dist/datastructures/distributed_graph.cc +++ b/kaminpar-dist/datastructures/distributed_graph.cc @@ -75,11 +75,11 @@ void print_graph(const DistributedGraph &graph) { if (graph.is_owned_node(u)) { buf << " | "; - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const char v_prefix = graph.is_owned_node(v) ? ' ' : '!'; buf << v_prefix << "L" << std::setw(w) << v << " G" << std::setw(w) - << graph.local_to_global_node(v) << " EW" << std::setw(w) << graph.edge_weight(e) - << " NW" << std::setw(w) << graph.node_weight(v) << "\t"; + << graph.local_to_global_node(v) << " EW" << std::setw(w) << w << " NW" << std::setw(w) + << graph.node_weight(v) << "\t"; }); if (graph.degree(u) == 0) { buf << ""; @@ -261,7 +261,7 @@ bool validate_graph(const DistributedGraph &graph) { const auto recvbufs = mpi::graph::sparse_alltoall_interface_to_ghost_get( graph, - [&](const NodeID u, const EdgeID, const NodeID v) -> GhostNodeEdge { + [&](const NodeID u, EdgeID, const NodeID v, EdgeWeight) -> GhostNodeEdge { return {.owned = graph.local_to_global_node(u), .ghost = graph.local_to_global_node(v)}; } ); diff --git a/kaminpar-dist/datastructures/distributed_graph.h b/kaminpar-dist/datastructures/distributed_graph.h index 0d107530..2fa559be 100644 --- a/kaminpar-dist/datastructures/distributed_graph.h +++ b/kaminpar-dist/datastructures/distributed_graph.h @@ -11,20 +11,14 @@ ******************************************************************************/ #pragma once -#include #include -#include - -#include "kaminpar-mpi/utils.h" #include "kaminpar-dist/datastructures/abstract_distributed_graph.h" #include "kaminpar-dist/datastructures/distributed_compressed_graph.h" #include "kaminpar-dist/datastructures/distributed_csr_graph.h" -#include "kaminpar-dist/datastructures/growt.h" #include "kaminpar-dist/dkaminpar.h" #include "kaminpar-common/datastructures/static_array.h" -#include "kaminpar-common/degree_buckets.h" #include "kaminpar-common/ranges.h" namespace kaminpar::dist { @@ -138,10 +132,6 @@ class DistributedGraph : public AbstractDistributedGraph { return _underlying_graph->is_edge_weighted(); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const final { - return _underlying_graph->edge_weight(e); - } - [[nodiscard]] inline EdgeWeight total_edge_weight() const final { return _underlying_graph->total_edge_weight(); } @@ -235,10 +225,6 @@ class DistributedGraph : public AbstractDistributedGraph { return _underlying_graph->node_weights(); } - [[nodiscard]] inline const StaticArray &edge_weights() const final { - return _underlying_graph->edge_weights(); - } - inline void set_ghost_node_weight(const NodeID ghost_node, const NodeWeight weight) final { _underlying_graph->set_ghost_node_weight(ghost_node, weight); } @@ -338,11 +324,11 @@ class DistributedGraph : public AbstractDistributedGraph { // High degree classification // - void init_high_degree_info(const EdgeID high_degree_threshold) const final { + inline void init_high_degree_info(const EdgeID high_degree_threshold) const final { _underlying_graph->init_high_degree_info(high_degree_threshold); } - [[nodiscard]] bool is_high_degree_node(const NodeID node) const final { + [[nodiscard]] inline bool is_high_degree_node(const NodeID node) const final { return _underlying_graph->is_high_degree_node(node); } @@ -350,7 +336,7 @@ class DistributedGraph : public AbstractDistributedGraph { // Graph permutation // - void set_permutation(StaticArray permutation) final { + inline void set_permutation(StaticArray permutation) final { _underlying_graph->set_permutation(std::move(permutation)); } @@ -390,7 +376,7 @@ class DistributedGraph : public AbstractDistributedGraph { // Graph permutation by coloring // - void set_color_sorted(StaticArray color_sizes) final { + inline void set_color_sorted(StaticArray color_sizes) final { _underlying_graph->set_color_sorted(std::move(color_sizes)); } @@ -398,15 +384,15 @@ class DistributedGraph : public AbstractDistributedGraph { return _underlying_graph->color_sorted(); } - [[nodiscard]] std::size_t number_of_colors() const final { + [[nodiscard]] inline std::size_t number_of_colors() const final { return _underlying_graph->number_of_colors(); } - [[nodiscard]] NodeID color_size(const std::size_t c) const final { + [[nodiscard]] inline NodeID color_size(const std::size_t c) const final { return _underlying_graph->color_size(c); } - [[nodiscard]] const StaticArray &get_color_sizes() const final { + [[nodiscard]] inline const StaticArray &get_color_sizes() const final { return _underlying_graph->get_color_sizes(); } @@ -414,25 +400,30 @@ class DistributedGraph : public AbstractDistributedGraph { // Access to underlying graph // - [[nodiscard]] AbstractDistributedGraph *underlying_graph() { + [[nodiscard]] inline AbstractDistributedGraph *underlying_graph() { return _underlying_graph.get(); } - [[nodiscard]] const AbstractDistributedGraph *underlying_graph() const { + [[nodiscard]] inline const AbstractDistributedGraph *underlying_graph() const { return _underlying_graph.get(); } - [[nodiscard]] AbstractDistributedGraph *take_underlying_graph() { + [[nodiscard]] inline AbstractDistributedGraph *take_underlying_graph() { return _underlying_graph.release(); } - [[nodiscard]] const DistributedCompressedGraph &compressed_graph() const { + [[nodiscard]] inline const DistributedCSRGraph &csr_graph() const { + const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); + return *dynamic_cast(abstract_graph); + } + + [[nodiscard]] inline const DistributedCompressedGraph &compressed_graph() const { const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); return *dynamic_cast(abstract_graph); } template - decltype(auto) reified(Lambda1 &&l1, Lambda2 &&l2) const { + inline decltype(auto) reified(Lambda1 &&l1, Lambda2 &&l2) const { const AbstractDistributedGraph *abstract_graph = _underlying_graph.get(); if (const auto *graph = dynamic_cast(abstract_graph); @@ -446,7 +437,7 @@ class DistributedGraph : public AbstractDistributedGraph { __builtin_unreachable(); } - template decltype(auto) reified(Lambda &&l) const { + template inline decltype(auto) reified(Lambda &&l) const { return reified(std::forward(l), std::forward(l)); } diff --git a/kaminpar-dist/datastructures/distributed_partitioned_graph.h b/kaminpar-dist/datastructures/distributed_partitioned_graph.h index 12e518c4..0312a8ae 100644 --- a/kaminpar-dist/datastructures/distributed_partitioned_graph.h +++ b/kaminpar-dist/datastructures/distributed_partitioned_graph.h @@ -102,7 +102,6 @@ class DistributedPartitionedGraph { [[nodiscard]] inline NodeID map_remote_node(const NodeID lnode, const PEID owner) const { return _graph->map_remote_node(lnode, owner); } [[nodiscard]] inline NodeID global_to_local_node(const GlobalNodeID global_u) const { return _graph->global_to_local_node(global_u); } [[nodiscard]] inline NodeWeight node_weight(const NodeID u) const { return _graph->node_weight(u); } - [[nodiscard]] inline EdgeWeight edge_weight(const EdgeID e) const { return _graph->edge_weight(e); } [[nodiscard]] inline NodeID degree(const NodeID u) const { return _graph->degree(u); } [[nodiscard]] inline const auto &node_distribution() const { return _graph->node_distribution(); } [[nodiscard]] inline GlobalNodeID node_distribution(const PEID pe) const { return _graph->node_distribution(pe); } diff --git a/kaminpar-dist/debug.cc b/kaminpar-dist/debug.cc index 58edbcd7..13b3c9b9 100644 --- a/kaminpar-dist/debug.cc +++ b/kaminpar-dist/debug.cc @@ -65,10 +65,10 @@ void write_metis_graph(const std::string &filename, const DistributedGraph &grap out << graph.node_weight(lu) << " "; } - graph.neighbors(lu, [&](const EdgeID e, const NodeID lv) { + graph.adjacent_nodes(lu, [&](const NodeID lv, const EdgeWeight w) { out << graph.local_to_global_node(lv) + 1 << " "; if (graph.is_edge_weighted()) { - out << graph.edge_weight(e) << " "; + out << w << " "; } }); out << "\n"; diff --git a/kaminpar-dist/distributed_label_propagation.h b/kaminpar-dist/distributed_label_propagation.h index dd872e91..9312fd78 100644 --- a/kaminpar-dist/distributed_label_propagation.h +++ b/kaminpar-dist/distributed_label_propagation.h @@ -317,12 +317,10 @@ template class LabelPropagat bool is_interface_node = false; - _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { + _graph->neighbors(u, _max_num_neighbors, [&](EdgeID, const NodeID v, const EdgeWeight w) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); - const EdgeWeight rating = _graph->edge_weight(e); - - map[v_cluster] += rating; + map[v_cluster] += w; if constexpr (Config::kUseLocalActiveSetStrategy) { is_interface_node |= v >= _num_active_nodes; diff --git a/kaminpar-dist/graphutils/bfs_extractor.cc b/kaminpar-dist/graphutils/bfs_extractor.cc index e8a74b19..0340e328 100644 --- a/kaminpar-dist/graphutils/bfs_extractor.cc +++ b/kaminpar-dist/graphutils/bfs_extractor.cc @@ -328,7 +328,7 @@ auto BfsExtractor::bfs( external_degrees_map.clear(); - explore_outgoing_edges(node, [&](const EdgeID edge, const NodeID neighbor) { + explore_outgoing_edges(node, [&](const NodeID neighbor, const EdgeWeight weight) { const bool is_real_target = taken.get(neighbor) || // (_graph->is_owned_node(neighbor) && !is_distance_border_node) || // @@ -347,7 +347,7 @@ auto BfsExtractor::bfs( if (is_real_target) { edges.push_back(_graph->local_to_global_node(neighbor)); - edge_weights.push_back(_graph->edge_weight(edge)); + edge_weights.push_back(weight); if (!taken.get(neighbor)) { taken.set(neighbor); @@ -372,7 +372,7 @@ auto BfsExtractor::bfs( next_ghost_seed_edges.emplace_back(node, neighbor, current_distance + 1); } } else { - external_degrees_map[_p_graph->block(neighbor)] += _graph->edge_weight(edge); + external_degrees_map[_p_graph->block(neighbor)] += weight; } return true; @@ -409,21 +409,25 @@ void BfsExtractor::explore_outgoing_edges(const NodeID node, Lambda &&lambda) { const bool is_high_degree_node = _graph->degree(node) >= _high_degree_threshold; if (!is_high_degree_node || _high_degree_strategy == HighDegreeStrategy::TAKE_ALL) { - _graph->neighbors(node, [&](const EdgeID e, const NodeID v) { - const bool abort = !lambda(e, v); + _graph->adjacent_nodes(node, [&](const NodeID v, const EdgeWeight w) { + const bool abort = !lambda(v, w); return abort; }); } else if (_high_degree_strategy == HighDegreeStrategy::CUT) { - _graph->neighbors(node, _high_degree_threshold, [&](const EdgeID e, const NodeID v) { - const bool abort = !lambda(e, v); - return abort; - }); + _graph->neighbors( + node, + _high_degree_threshold, + [&](const EdgeID, const NodeID v, const EdgeWeight w) { + const bool abort = !lambda(v, w); + return abort; + } + ); } else if (_high_degree_strategy == HighDegreeStrategy::SAMPLE) { const double skip_prob = 1.0 * _high_degree_threshold / _graph->degree(node); std::geometric_distribution skip_dist(skip_prob); - _graph->neighbors(node, [&](const EdgeID e, const NodeID v) { - const bool abort = !lambda(e, v); + _graph->adjacent_nodes(node, [&](const NodeID v, const EdgeWeight w) { + const bool abort = !lambda(v, w); return abort; }); // @todo @@ -588,9 +592,8 @@ void BfsExtractor::init_external_degrees() { }); _graph->pfor_nodes([&](const NodeID u) { - _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight e_weight) { const BlockID v_block = _p_graph->block(v); - const EdgeWeight e_weight = _graph->edge_weight(e); external_degree(u, v_block) += e_weight; }); }); diff --git a/kaminpar-dist/graphutils/communication.h b/kaminpar-dist/graphutils/communication.h index 5fc06ebd..8a1b9abf 100644 --- a/kaminpar-dist/graphutils/communication.h +++ b/kaminpar-dist/graphutils/communication.h @@ -126,9 +126,9 @@ void sparse_alltoall_interface_to_ghost_custom_range( SCOPED_TIMER("Sparse AllToAll"); constexpr bool builder_invocable_with_pe = - std::is_invocable_r_v; + std::is_invocable_r_v; constexpr bool builder_invocable_without_pe = - std::is_invocable_r_v; + std::is_invocable_r_v; static_assert(builder_invocable_with_pe || builder_invocable_without_pe, "bad builder type"); constexpr bool receiver_invocable_with_pe = @@ -138,7 +138,7 @@ void sparse_alltoall_interface_to_ghost_custom_range( static_assert(receiver_invocable_with_pe || receiver_invocable_without_pe, "bad receiver type"); constexpr bool filter_invocable_with_edge = - std::is_invocable_r_v; + std::is_invocable_r_v; constexpr bool filter_invocable_with_node = std::is_invocable_r_v; static_assert(filter_invocable_with_edge || filter_invocable_with_node, "bad filter type"); @@ -165,10 +165,10 @@ void sparse_alltoall_interface_to_ghost_custom_range( const PEID thread = omp_get_thread_num(); - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if (graph.is_ghost_node(v)) { if constexpr (filter_invocable_with_edge) { - if (!filter(u, e, v)) { + if (!filter(u, e, v, w)) { return; } } @@ -200,10 +200,10 @@ void sparse_alltoall_interface_to_ghost_custom_range( } const PEID thread = omp_get_thread_num(); - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.neighbors(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if (graph.is_ghost_node(v)) { if constexpr (filter_invocable_with_edge) { - if (!filter(u, e, v)) { + if (!filter(u, e, v, w)) { return; } } @@ -211,9 +211,9 @@ void sparse_alltoall_interface_to_ghost_custom_range( const PEID pe = graph.ghost_owner(v); const std::size_t slot = --num_messages[thread][pe]; if constexpr (builder_invocable_with_pe) { - send_buffers[pe][slot] = builder(u, e, v, pe); + send_buffers[pe][slot] = builder(u, e, v, w, pe); } else /* if (builder_invocable_without_pe) */ { - send_buffers[pe][slot] = builder(u, e, v); + send_buffers[pe][slot] = builder(u, e, v, w); } } }); @@ -494,7 +494,7 @@ void sparse_alltoall_interface_to_pe_custom_range( } } - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v) { if (!graph.is_ghost_node(v)) { return; } diff --git a/kaminpar-dist/graphutils/replicator.cc b/kaminpar-dist/graphutils/replicator.cc index 28eb87e5..8efaca8f 100644 --- a/kaminpar-dist/graphutils/replicator.cc +++ b/kaminpar-dist/graphutils/replicator.cc @@ -47,6 +47,24 @@ template decltype(auto) copy_raw_nodes(const Graph &graph) { } } +template decltype(auto) copy_raw_edge_weights(const Graph &graph) { + constexpr bool kIsCompressedGraph = std::is_same_v; + + // Copy edge weights with (uncompressed) weights or simply forward the raw edge weights if the + // graph is uncompresed + if constexpr (kIsCompressedGraph) { + StaticArray raw_edge_weights(graph.m()); + graph.pfor_nodes([&](const NodeID u) { + graph.neighbors(u, [&](const EdgeID e, NodeID, const EdgeWeight w) { + raw_edge_weights[e] = w; + }); + }); + return raw_edge_weights; + } else { + return graph.raw_edge_weights(); + } +} + } // namespace std::unique_ptr allgather_graph(const DistributedGraph &graph) { @@ -172,7 +190,7 @@ template shm::Graph replicate_graph_everywhere(const Graph &gra KASSERT((graph.is_edge_weighted() || graph.m() == 0)); if constexpr (std::is_same_v) { mpi::allgatherv( - graph.raw_edge_weights().data(), + copy_raw_edge_weights(graph).data(), asserting_cast(graph.m()), edge_weights.data(), edges_recvcounts.data(), @@ -182,7 +200,7 @@ template shm::Graph replicate_graph_everywhere(const Graph &gra } else { StaticArray edge_weights_buffer(graph.global_m()); mpi::allgatherv( - graph.raw_edge_weights().data(), + copy_raw_edge_weights(graph).data(), asserting_cast(graph.m()), edge_weights_buffer.data(), edges_recvcounts.data(), @@ -311,7 +329,7 @@ DistributedGraph replicate_graph(const Graph &graph, const int num_replications) if (is_edge_weighted) { KASSERT(graph.is_edge_weighted() || graph.m() == 0); mpi::allgatherv( - graph.raw_edge_weights().data(), + copy_raw_edge_weights(graph).data(), asserting_cast(graph.m()), edge_weights.data(), edges_counts.data(), diff --git a/kaminpar-dist/graphutils/subgraph_extractor.cc b/kaminpar-dist/graphutils/subgraph_extractor.cc index 81e65769..71ca79f2 100644 --- a/kaminpar-dist/graphutils/subgraph_extractor.cc +++ b/kaminpar-dist/graphutils/subgraph_extractor.cc @@ -207,12 +207,12 @@ extract_local_block_induced_subgraphs(const DistributedPartitionedGraph &p_graph const NodeID pos = n0 + u; const NodeID u_prime = shared_nodes[pos]; - p_graph.neighbors(u_prime, [&](const EdgeID e_prime, const NodeID v_prime) { + p_graph.adjacent_nodes(u_prime, [&](const NodeID v_prime, const EdgeWeight w_prime) { if (p_graph.block(v_prime) != b) { return; } - shared_edge_weights[e0 + e] = p_graph.edge_weight(e_prime); + shared_edge_weights[e0 + e] = w_prime; shared_edges[e0 + e] = mapping[v_prime]; ++e; }); diff --git a/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc b/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc index 44bb687c..d3a515cd 100644 --- a/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc +++ b/kaminpar-dist/initial_partitioning/mtkahypar_initial_partitioner.cc @@ -59,7 +59,7 @@ shm::PartitionedGraph MtKaHyParInitialPartitioner::initial_partition( graph.pfor_nodes([&](const NodeID u) { vertex_weights[u] = static_cast(graph.node_weight(u)); - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (v < u) { // Only need edges in one direction return; } @@ -67,7 +67,7 @@ shm::PartitionedGraph MtKaHyParInitialPartitioner::initial_partition( EdgeID position = edge_position[e] - 1; edges[2 * position] = static_cast(u); edges[2 * position + 1] = static_cast(v); - edge_weights[position] = static_cast(graph.edge_weight(e)); + edge_weights[position] = static_cast(w); }); }); diff --git a/kaminpar-dist/metrics.cc b/kaminpar-dist/metrics.cc index 023f8d6b..19570510 100644 --- a/kaminpar-dist/metrics.cc +++ b/kaminpar-dist/metrics.cc @@ -23,9 +23,9 @@ GlobalEdgeWeight local_edge_cut(const DistributedPartitionedGraph &p_graph) { auto &cut = cut_ets.local(); for (NodeID u = r.begin(); u < r.end(); ++u) { const BlockID u_block = p_graph.block(u); - p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (u_block != p_graph.block(v)) { - cut += p_graph.edge_weight(e); + cut += w; } }); } diff --git a/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc b/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc index 98b54c5d..6d5ce923 100644 --- a/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc +++ b/kaminpar-dist/refinement/adapters/mtkahypar_refiner.cc @@ -115,7 +115,7 @@ bool MtKaHyParRefiner::refine() { shm_graph->pfor_nodes([&](const NodeID u) { vertex_weights[u] = static_cast(shm_graph->node_weight(u)); - shm_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + shm_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (v < u) { // Only need edges in one direction return; } @@ -123,8 +123,7 @@ bool MtKaHyParRefiner::refine() { EdgeID position = edge_position[e] - 1; edges[2 * position] = asserting_cast(u); edges[2 * position + 1] = asserting_cast(v); - edge_weights[position] = - asserting_cast(shm_graph->edge_weight(e)); + edge_weights[position] = asserting_cast(w); }); }); diff --git a/kaminpar-dist/refinement/balancer/clusters.cc b/kaminpar-dist/refinement/balancer/clusters.cc index 229a3ef0..bee99002 100644 --- a/kaminpar-dist/refinement/balancer/clusters.cc +++ b/kaminpar-dist/refinement/balancer/clusters.cc @@ -15,6 +15,7 @@ #include "kaminpar-dist/coarsening/clusterer.h" #include "kaminpar-dist/context.h" +#include "kaminpar-dist/dkaminpar.h" #include "kaminpar-dist/factories.h" #include "kaminpar-dist/logger.h" #include "kaminpar-dist/timer.h" @@ -25,6 +26,7 @@ #include "kaminpar-common/datastructures/binary_heap.h" #include "kaminpar-common/datastructures/fast_reset_array.h" #include "kaminpar-common/datastructures/noinit_vector.h" +#include "kaminpar-common/parallel/algorithm.h" #define HEAVY assert::heavy @@ -93,12 +95,12 @@ void Clusters::init_ghost_node_adjacency() { for (const NodeID cluster : clusters()) { for (const NodeID u : nodes(cluster)) { - _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (!_p_graph->is_ghost_node(v)) { return; } - weight_to_ghost[v - _p_graph->n()] += _p_graph->edge_weight(e); + weight_to_ghost[v - _p_graph->n()] += w; }); } @@ -219,9 +221,9 @@ bool Clusters::dbg_check_conns(const NodeID cluster) const { std::vector actual(_p_graph->k()); for (const NodeID u : nodes(cluster)) { - _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (!_p_graph->is_owned_node(v) || cluster_of(v) != cluster_of(u)) { - actual[_p_graph->block(v)] += _p_graph->edge_weight(e); + actual[_p_graph->block(v)] += w; } }); } @@ -309,13 +311,13 @@ class BatchedClusterBuilder { add_to_cluster(u); - _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == kInvalidBlockID && _p_graph.block(v) == bu) { if (_frontier.contains(v)) { - _frontier.decrease_priority(v, _frontier.key(v) + _p_graph.edge_weight(e)); + _frontier.decrease_priority(v, _frontier.key(v) + w); } else { - _frontier.push(v, _p_graph.edge_weight(e)); + _frontier.push(v, w); } } }); @@ -338,15 +340,15 @@ class BatchedClusterBuilder { _clusters[_cur_pos] = u; ++_cur_pos; - _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == _cur_cluster) { - _cur_block_conn -= _p_graph.edge_weight(e); + _cur_block_conn -= w; } else { const BlockID bv = _p_graph.block(v); if (bv == _cur_block) { - _cur_block_conn += _p_graph.edge_weight(e); + _cur_block_conn += w; } else if (_p_graph.block_weight(bv) + _cur_weight <= _p_ctx.graph->max_block_weight(bv)) { - _cur_conns.change_priority(bv, _cur_conns.key(bv) + _p_graph.edge_weight(e)); + _cur_conns.change_priority(bv, _cur_conns.key(bv) + w); } else if (_cur_conns.key(bv) > 0) { // no longer a viable target _cur_conns.change_priority(bv, -1); } @@ -372,12 +374,12 @@ class BatchedClusterBuilder { // @todo should do this when updating _best_* for (NodeID pos = _cluster_indices[_cur_cluster]; pos < _best_prefix_pos; ++pos) { const NodeID u = _clusters[pos]; - _p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (_p_graph.is_owned_node(v) && _node_to_cluster[v] == _cur_cluster) { return; } const BlockID bv = _p_graph.block(v); - _conns[_cur_cluster * _p_graph.k() + bv] += _p_graph.edge_weight(e); + _conns[_cur_cluster * _p_graph.k() + bv] += w; }); } @@ -478,11 +480,11 @@ Clusters build_singleton_clusters( for (const BlockID k : p_graph.blocks()) { m_ctx.cluster_conns.push_back(0); } - p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const BlockID bv = p_graph.block(v); const std::size_t idx = cur_move_set * p_graph.k() + bv; KASSERT(idx < m_ctx.cluster_conns.size()); - m_ctx.cluster_conns[idx] += p_graph.edge_weight(e); + m_ctx.cluster_conns[idx] += w; }); ++cur_move_set; @@ -554,11 +556,11 @@ Clusters build_local_clusters( m_ctx.clusters[cluster_sizes[clustering[u]]++] = u; m_ctx.cluster_indices[ms + 1] = cluster_sizes[clustering[u]]; - p_graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + p_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { // We may not access clustering[.] for ghost vertices if (!p_graph.is_owned_node(v) || clustering[v] != clustering[u]) { const BlockID bv = p_graph.block(v); - m_ctx.cluster_conns[ms * p_graph.k() + bv] += p_graph.edge_weight(e); + m_ctx.cluster_conns[ms * p_graph.k() + bv] += w; } }); } else { diff --git a/kaminpar-dist/refinement/balancer/clusters.h b/kaminpar-dist/refinement/balancer/clusters.h index 97f24766..25793311 100644 --- a/kaminpar-dist/refinement/balancer/clusters.h +++ b/kaminpar-dist/refinement/balancer/clusters.h @@ -160,7 +160,7 @@ class Clusters { for (const NodeID u : nodes(set)) { KASSERT(_p_graph->is_owned_node(u)); - _p_graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _p_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { if (!_p_graph->is_owned_node(v)) { return; } @@ -170,7 +170,7 @@ class Clusters { return; } - const EdgeWeight delta = _p_graph->edge_weight(e); + const EdgeWeight delta = w; _cluster_conns[set_v * _p_graph->k() + from] -= delta; _cluster_conns[set_v * _p_graph->k() + to] += delta; }); diff --git a/kaminpar-dist/refinement/gain_calculator.h b/kaminpar-dist/refinement/gain_calculator.h index 954a2091..136b9728 100644 --- a/kaminpar-dist/refinement/gain_calculator.h +++ b/kaminpar-dist/refinement/gain_calculator.h @@ -88,12 +88,12 @@ template class GainCalculator { BlockID max_target = b_u; auto action = [&](auto &map) { - _graph->neighbors(u, [&](const EdgeID e, const NodeID v) { + _graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { const BlockID b_v = _p_graph->block(v); if (b_u != b_v && weight_checker(b_v, _p_graph->block_weight(b_v) + w_u)) { - map[b_v] += _graph->edge_weight(e); + map[b_v] += w; } else if (b_u == b_v) { - int_conn += _graph->edge_weight(e); + int_conn += w; } }); diff --git a/kaminpar-dist/refinement/jet/jet_refiner.cc b/kaminpar-dist/refinement/jet/jet_refiner.cc index 6784f311..d539ac0e 100644 --- a/kaminpar-dist/refinement/jet/jet_refiner.cc +++ b/kaminpar-dist/refinement/jet/jet_refiner.cc @@ -281,17 +281,20 @@ template class JetRefiner : public GlobalRefiner { EdgeWeight projected_gain = 0; - _graph.neighbors(u, [&, gain_u = gain_u, to_u = to_u](const EdgeID e, const NodeID v) { - const auto [gain_v, to_v] = _gains_and_targets[v]; - const BlockID projected_b_v = - (gain_v > gain_u || (gain_v == gain_u && v < u)) ? to_v : _p_graph.block(v); - - if (projected_b_v == to_u) { - projected_gain += _graph.edge_weight(e); - } else if (projected_b_v == from_u) { - projected_gain -= _graph.edge_weight(e); - } - }); + _graph.adjacent_nodes( + u, + [&, gain_u = gain_u, to_u = to_u](const NodeID v, const EdgeWeight w) { + const auto [gain_v, to_v] = _gains_and_targets[v]; + const BlockID projected_b_v = + (gain_v > gain_u || (gain_v == gain_u && v < u)) ? to_v : _p_graph.block(v); + + if (projected_b_v == to_u) { + projected_gain += w; + } else if (projected_b_v == from_u) { + projected_gain -= w; + } + } + ); // Locking the node here means that the move // will be executed by move_locked_nodes() diff --git a/kaminpar-dist/refinement/lp/clp_refiner.cc b/kaminpar-dist/refinement/lp/clp_refiner.cc index 9bb433e3..dff9715e 100644 --- a/kaminpar-dist/refinement/lp/clp_refiner.cc +++ b/kaminpar-dist/refinement/lp/clp_refiner.cc @@ -392,8 +392,8 @@ NodeID ColoredLPRefiner::perform_best_moves(const ColorID c) { return num_local_moved_nodes; } -auto ColoredLPRefiner::reduce_move_candidates(std::vector &&candidates) - -> std::vector { +auto ColoredLPRefiner::reduce_move_candidates(std::vector &&candidates +) -> std::vector { const int size = mpi::get_comm_size(_p_graph.communicator()); const int rank = mpi::get_comm_rank(_p_graph.communicator()); KASSERT(math::is_power_of_2(size), "#PE must be a power of two", assert::always); @@ -822,9 +822,8 @@ NodeID ColoredLPRefiner::find_moves(const ColorID c) { auto action = [&](auto &map) { bool is_interface_node = false; - graph.neighbors(u, [&](const EdgeID e, const NodeID v) { + graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) { const BlockID b = _p_graph.block(v); - const EdgeWeight weight = graph.edge_weight(e); map[b] += weight; is_interface_node |= graph.is_ghost_node(v); }); diff --git a/kaminpar-shm/coarsening/clustering/legacy_lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/legacy_lp_clusterer.cc index 9ddc9f8e..89b1a3f9 100644 --- a/kaminpar-shm/coarsening/clustering/legacy_lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/legacy_lp_clusterer.cc @@ -250,12 +250,6 @@ void LegacyLPClustering::set_desired_cluster_count(const NodeID count) { void LegacyLPClustering::compute_clustering( StaticArray &clustering, const Graph &graph, bool ) { - if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - _core->compute_clustering(clustering, *csr_graph, false); - return; - } - - __builtin_unreachable(); + _core->compute_clustering(clustering, graph.csr_graph(), false); } } // namespace kaminpar::shm diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index ad4e943e..3e88ae72 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -303,22 +303,18 @@ class LPClusteringImpl final class LPClusteringImplWrapper { public: LPClusteringImplWrapper(const CoarseningContext &c_ctx) - : _csr_core(std::make_unique>(c_ctx, _permutations)), - _compact_csr_core(std::make_unique>(c_ctx, _permutations) - ), - _compressed_core(std::make_unique>(c_ctx, _permutations) + : _csr_impl(std::make_unique>(c_ctx, _permutations)), + _compressed_impl(std::make_unique>(c_ctx, _permutations) ) {} void set_max_cluster_weight(const NodeWeight max_cluster_weight) { - _csr_core->set_max_cluster_weight(max_cluster_weight); - _compact_csr_core->set_max_cluster_weight(max_cluster_weight); - _compressed_core->set_max_cluster_weight(max_cluster_weight); + _csr_impl->set_max_cluster_weight(max_cluster_weight); + _compressed_impl->set_max_cluster_weight(max_cluster_weight); } void set_desired_cluster_count(const NodeID count) { - _csr_core->set_desired_num_clusters(count); - _compact_csr_core->set_desired_num_clusters(count); - _compressed_core->set_desired_num_clusters(count); + _csr_impl->set_desired_num_clusters(count); + _compressed_impl->set_desired_num_clusters(count); } void compute_clustering( @@ -326,7 +322,7 @@ class LPClusteringImplWrapper { ) { // Compute a clustering and setup/release the data structures used by the core, so that they can // be shared by all implementations. - const auto compute = [&](auto &core, auto &graph) { + const auto compute_clustering = [&](auto &core, auto &graph) { if (_freed) { _freed = false; core.allocate(graph.n()); @@ -347,38 +343,32 @@ class LPClusteringImplWrapper { }; const NodeID num_nodes = graph.n(); - _csr_core->preinitialize(num_nodes); - _compact_csr_core->preinitialize(num_nodes); - _compressed_core->preinitialize(num_nodes); - - if (auto *csr_graph = dynamic_cast(graph.underlying_graph()); - csr_graph != nullptr) { - compute(*_csr_core, *csr_graph); - } else if (auto *compact_csr_graph = - dynamic_cast(graph.underlying_graph()); - compact_csr_graph != nullptr) { - compute(*_compact_csr_core, *compact_csr_graph); - } else if (auto *compressed_graph = - dynamic_cast(graph.underlying_graph()); - compressed_graph != nullptr) { - compute(*_compressed_core, *compressed_graph); - } + _csr_impl->preinitialize(num_nodes); + _compressed_impl->preinitialize(num_nodes); + + graph.reified( + [&](const auto &csr_graph) { + LPClusteringImpl &impl = *_csr_impl; + compute_clustering(impl, csr_graph); + }, + [&](const auto &compressed_graph) { + LPClusteringImpl &impl = *_compressed_impl; + compute_clustering(impl, compressed_graph); + } + ); // Only relabel clusters during the first iteration - _csr_core->set_relabel_before_second_phase(false); - _compact_csr_core->set_relabel_before_second_phase(false); - _compressed_core->set_relabel_before_second_phase(false); + _csr_impl->set_relabel_before_second_phase(false); + _compressed_impl->set_relabel_before_second_phase(false); // Only use the initially small cluster weight vector for the first lp implementation - _csr_core->set_use_small_vector_initially(false); - _compact_csr_core->set_use_small_vector_initially(false); - _compressed_core->set_use_small_vector_initially(false); + _csr_impl->set_use_small_vector_initially(false); + _compressed_impl->set_use_small_vector_initially(false); } private: - std::unique_ptr> _csr_core; - std::unique_ptr> _compact_csr_core; - std::unique_ptr> _compressed_core; + std::unique_ptr> _csr_impl; + std::unique_ptr> _compressed_impl; // The data structures that are used by the LP clusterer and are shared between the // different implementations. @@ -412,4 +402,5 @@ void LPClustering::compute_clustering( ) { return _impl_wrapper->compute_clustering(clustering, graph, free_memory_afterwards); } + } // namespace kaminpar::shm diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc index ac126f7a..174dec6a 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc @@ -14,10 +14,10 @@ #include "kaminpar-shm/coarsening/contraction/cluster_contraction.h" #include "kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.h" -#include "kaminpar-common/datastructures/compact_static_array.h" #include "kaminpar-common/datastructures/rating_map.h" #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/timer.h" namespace kaminpar::shm::contraction { diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc index da271ef9..50a14f2c 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc @@ -9,6 +9,7 @@ #include "kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/timer.h" namespace kaminpar::shm::contraction { diff --git a/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc index caac97bb..1b408986 100644 --- a/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/legacy_buffered_cluster_contraction.cc @@ -13,9 +13,9 @@ #include "kaminpar-shm/coarsening/contraction/cluster_contraction.h" #include "kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.h" -#include "kaminpar-common/datastructures/compact_static_array.h" #include "kaminpar-common/datastructures/rating_map.h" #include "kaminpar-common/datastructures/static_array.h" +#include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/timer.h" namespace kaminpar::shm::contraction { diff --git a/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc index 5b6a110a..a2015674 100644 --- a/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/naive_unbuffered_cluster_contraction.cc @@ -7,12 +7,11 @@ #include "kaminpar-shm/coarsening/contraction/cluster_contraction.h" #include "kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.h" -#include "kaminpar-shm/coarsening/contraction/unbuffered_cluster_contraction.h" -#include "kaminpar-common/datastructures/compact_static_array.h" #include "kaminpar-common/datastructures/rating_map.h" #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/heap_profiler.h" +#include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/timer.h" namespace kaminpar::shm::contraction { diff --git a/kaminpar-shm/datastructures/abstract_graph.h b/kaminpar-shm/datastructures/abstract_graph.h index 9c71fe3d..e1bf7fdf 100644 --- a/kaminpar-shm/datastructures/abstract_graph.h +++ b/kaminpar-shm/datastructures/abstract_graph.h @@ -13,6 +13,7 @@ #include "kaminpar-common/ranges.h" namespace kaminpar::shm { + class AbstractGraph { public: // Data types used by this graph @@ -37,34 +38,35 @@ class AbstractGraph { // Node and edge weights [[nodiscard]] virtual bool is_node_weighted() const = 0; - [[nodiscard]] virtual NodeWeight node_weight(NodeID u) const = 0; + [[nodiscard]] virtual NodeWeight node_weight(const NodeID u) const = 0; [[nodiscard]] virtual NodeWeight max_node_weight() const = 0; [[nodiscard]] virtual NodeWeight total_node_weight() const = 0; + virtual void update_total_node_weight() = 0; [[nodiscard]] virtual bool is_edge_weighted() const = 0; [[nodiscard]] virtual EdgeWeight total_edge_weight() const = 0; - // Low-level access to the graph structure - [[nodiscard]] virtual NodeID max_degree() const = 0; - [[nodiscard]] virtual NodeID degree(NodeID u) const = 0; - // Iterators for nodes / edges [[nodiscard]] virtual IotaRange nodes() const = 0; [[nodiscard]] virtual IotaRange edges() const = 0; + [[nodiscard]] virtual IotaRange incident_edges(const NodeID u) const = 0; + + // Node degree + [[nodiscard]] virtual NodeID max_degree() const = 0; + [[nodiscard]] virtual NodeID degree(const NodeID u) const = 0; // Graph permutation virtual void set_permutation(StaticArray permutation) = 0; [[nodiscard]] virtual bool permuted() const = 0; - [[nodiscard]] virtual NodeID map_original_node(NodeID u) const = 0; + [[nodiscard]] virtual NodeID map_original_node(const NodeID u) const = 0; [[nodiscard]] virtual StaticArray &&take_raw_permutation() = 0; // Degree buckets - [[nodiscard]] virtual std::size_t bucket_size(std::size_t bucket) const = 0; - [[nodiscard]] virtual NodeID first_node_in_bucket(std::size_t bucket) const = 0; - [[nodiscard]] virtual NodeID first_invalid_node_in_bucket(std::size_t bucket) const = 0; - [[nodiscard]] virtual std::size_t number_of_buckets() const = 0; [[nodiscard]] virtual bool sorted() const = 0; - - virtual void update_total_node_weight() = 0; + [[nodiscard]] virtual std::size_t number_of_buckets() const = 0; + [[nodiscard]] virtual std::size_t bucket_size(const std::size_t bucket) const = 0; + [[nodiscard]] virtual NodeID first_node_in_bucket(const std::size_t bucket) const = 0; + [[nodiscard]] virtual NodeID first_invalid_node_in_bucket(const std::size_t bucket) const = 0; }; + } // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/compressed_graph.cc b/kaminpar-shm/datastructures/compressed_graph.cc index ccf86a8f..aa0c10a1 100644 --- a/kaminpar-shm/datastructures/compressed_graph.cc +++ b/kaminpar-shm/datastructures/compressed_graph.cc @@ -1,5 +1,5 @@ /******************************************************************************* - * Compressed static graph representation. + * Static compressed graph representation. * * @file: compressed_graph.cc * @author: Daniel Salwasser @@ -17,36 +17,13 @@ namespace kaminpar::shm { CompressedGraph::CompressedGraph( - CompactStaticArray nodes, - StaticArray compressed_edges, + CompressedNeighborhoods compressed_neighborhoods, StaticArray node_weights, - EdgeID edge_count, - EdgeWeight total_edge_weight, - bool has_edge_weights, - NodeID max_degree, - bool sorted, - std::size_t num_high_degree_nodes, - std::size_t num_high_degree_parts, - std::size_t num_interval_nodes, - std::size_t num_intervals + bool sorted ) - : _nodes(std::move(nodes)), - _compressed_edges(std::move(compressed_edges)), + : _compressed_neighborhoods(std::move(compressed_neighborhoods)), _node_weights(std::move(node_weights)), - _edge_count(edge_count), - _total_edge_weight(total_edge_weight), - _has_edge_weights(has_edge_weights), - _max_degree(max_degree), - _sorted(sorted), - _num_high_degree_nodes(num_high_degree_nodes), - _num_high_degree_parts(num_high_degree_parts), - _num_interval_nodes(num_interval_nodes), - _num_intervals(num_intervals) { - KASSERT(kHighDegreeEncoding || _num_high_degree_nodes == 0); - KASSERT(kHighDegreeEncoding || _num_high_degree_parts == 0); - KASSERT(kIntervalEncoding || _num_interval_nodes == 0); - KASSERT(kIntervalEncoding || _num_intervals == 0); - + _sorted(sorted) { if (_node_weights.empty()) { _total_node_weight = static_cast(n()); _max_node_weight = 1; @@ -102,15 +79,15 @@ void CompressedGraph::update_total_node_weight() { } } -void CompressedGraph::remove_isolated_nodes(const NodeID isolated_nodes) { +void CompressedGraph::remove_isolated_nodes(const NodeID num_isolated_nodes) { KASSERT(sorted()); - if (isolated_nodes == 0) { + if (num_isolated_nodes == 0) { return; } - const NodeID new_n = n() - isolated_nodes; - _nodes.restrict(new_n + 1); + const NodeID new_n = n() - num_isolated_nodes; + _compressed_neighborhoods.restrict_nodes(new_n + 1); if (!_node_weights.empty()) { _node_weights.restrict(new_n); } @@ -119,7 +96,7 @@ void CompressedGraph::remove_isolated_nodes(const NodeID isolated_nodes) { // Update degree buckets for (std::size_t i = 0; i < _buckets.size() - 1; ++i) { - _buckets[1 + i] -= isolated_nodes; + _buckets[1 + i] -= num_isolated_nodes; } // If the graph has only isolated nodes then there are no buckets afterwards @@ -132,7 +109,7 @@ void CompressedGraph::integrate_isolated_nodes() { KASSERT(sorted()); const NodeID nonisolated_nodes = n(); - _nodes.unrestrict(); + _compressed_neighborhoods.unrestrict_nodes(); _node_weights.unrestrict(); const NodeID isolated_nodes = n() - nonisolated_nodes; diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index 7f5e92cc..309f57e8 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Compressed static graph representation. + * Static compressed graph representation. * * @file: compressed_graph.h * @author: Daniel Salwasser @@ -7,7 +7,6 @@ ******************************************************************************/ #pragma once -#include #include #include @@ -15,144 +14,84 @@ #include "kaminpar-shm/datastructures/abstract_graph.h" -#include "kaminpar-common/constexpr_utils.h" -#include "kaminpar-common/datastructures/compact_static_array.h" #include "kaminpar-common/datastructures/static_array.h" #include "kaminpar-common/degree_buckets.h" -#include "kaminpar-common/math.h" +#include "kaminpar-common/graph-compression/compressed_neighborhoods.h" #include "kaminpar-common/ranges.h" -#include "kaminpar-common/varint_codec.h" -#include "kaminpar-common/varint_run_length_codec.h" -#include "kaminpar-common/varint_stream_codec.h" namespace kaminpar::shm { /*! * A compressed static graph that stores the nodes and edges in a compressed adjacency array. It * uses variable length encoding, gap encoding and interval encoding to compress the edge array. + * Additionally, it stores the edge weights interleaved with the edges and stores them with variable + * length encoding and gap encoding. */ class CompressedGraph : public AbstractGraph { + using CompressedNeighborhoods = kaminpar::CompressedNeighborhoods; + public: using AbstractGraph::EdgeID; using AbstractGraph::EdgeWeight; using AbstractGraph::NodeID; using AbstractGraph::NodeWeight; - using SignedID = std::int64_t; -#ifdef KAMINPAR_COMPRESSION_HIGH_DEGREE_ENCODING - /*! - * Whether high degree encoding is used. - */ - static constexpr bool kHighDegreeEncoding = true; -#else /*! * Whether high degree encoding is used. */ - static constexpr bool kHighDegreeEncoding = false; -#endif + static constexpr bool kHighDegreeEncoding = CompressedNeighborhoods::kHighDegreeEncoding; /*! * The minimum degree of a node to be considered high degree. */ - static constexpr NodeID kHighDegreeThreshold = 10000; + static constexpr NodeID kHighDegreeThreshold = CompressedNeighborhoods::kHighDegreeThreshold; /*! - * The length of a part when splitting the neighbourhood of a high degree node. + * The length of a part when splitting the neighbourhood of a high degree + * node. */ - static constexpr NodeID kHighDegreePartLength = 1000; + static constexpr NodeID kHighDegreePartLength = CompressedNeighborhoods::kHighDegreePartLength; -#ifdef KAMINPAR_COMPRESSION_INTERVAL_ENCODING - /*! - * Whether interval encoding is used. - */ - static constexpr bool kIntervalEncoding = true; -#else /*! * Whether interval encoding is used. */ - static constexpr bool kIntervalEncoding = false; -#endif + static constexpr bool kIntervalEncoding = CompressedNeighborhoods::kIntervalEncoding; /*! * The minimum length of an interval to encode if interval encoding is used. */ - static constexpr NodeID kIntervalLengthTreshold = 3; + static constexpr NodeID kIntervalLengthTreshold = + CompressedNeighborhoods::kIntervalLengthTreshold; -#ifdef KAMINPAR_COMPRESSION_RUN_LENGTH_ENCODING /*! * Whether run-length encoding is used. */ - static constexpr bool kRunLengthEncoding = true; -#else - /*! - * Whether run-length encoding is used. - */ - static constexpr bool kRunLengthEncoding = false; -#endif + static constexpr bool kRunLengthEncoding = CompressedNeighborhoods::kRunLengthEncoding; -#ifdef KAMINPAR_COMPRESSION_STREAM_ENCODING /*! * Whether stream encoding is used. */ - static constexpr bool kStreamEncoding = true; -#else - /*! - * Whether stream encoding is used. - */ - static constexpr bool kStreamEncoding = false; -#endif - - static_assert( - !kRunLengthEncoding || !kStreamEncoding, - "Either run-length or stream encoding can be used for varints but not both." - ); + static constexpr bool kStreamEncoding = CompressedNeighborhoods::kStreamEncoding; -#ifdef KAMINPAR_COMPRESSION_ISOLATED_NODES_SEPARATION - /*! - * Whether the isolated nodes of the compressed graph are continuously stored at the end of the - * nodes array. - */ - static constexpr bool kIsolatedNodesSeparation = true; -#else /*! - * Whether the isolated nodes of the compressed graph are continuously stored at the end of the - * nodes array. + * Whether the isolated nodes of the compressed graph are continuously stored + * at the end of the nodes array. */ - static constexpr bool kIsolatedNodesSeparation = false; -#endif + static constexpr bool kIsolatedNodesSeparation = + CompressedNeighborhoods::kIsolatedNodesSeparation; /*! * Constructs a new compressed graph. * - * @param nodes The node array which stores for each node the offset in the compressed edges array - * of the first edge. - * @param compressed_edges The edge array which stores the edges for each node in a compressed - * format. - * @param node_weights The array of node weights in which the weights of each node in the - * respective entry are stored. - * @param edge_count The number of edges stored in the compressed edge array. - * @param max_degree The maximum degree of the graph. - * @param sorted Whether the nodes are stored by deg-buckets order. - * @param num_high_degree_nodes The number of nodes that have high degree. - * @param num_high_degree_parts The total number of parts that result from splitting high degree - * neighborhoods. - * @param num_interval_nodes The number of nodes that have at least one interval in its - * neighborhood. - * @param num_intervals The total number of intervals. + * @param compressed_neighborhoods The nodes, edges and edge weights that are stored in compressed + * form. + * @param node_weights The node weights. + * @param sorted Whether the nodes are stored in degree-buckets order. */ explicit CompressedGraph( - CompactStaticArray nodes, - StaticArray compressed_edges, + CompressedNeighborhoods compressed_neighborhoods, StaticArray node_weights, - EdgeID edge_count, - EdgeWeight total_edge_weight, - bool has_edge_weights, - NodeID max_degree, - bool sorted, - std::size_t num_high_degree_nodes, - std::size_t num_high_degree_parts, - std::size_t num_interval_nodes, - std::size_t num_intervals + bool sorted ); CompressedGraph(const CompressedGraph &) = delete; @@ -161,49 +100,22 @@ class CompressedGraph : public AbstractGraph { CompressedGraph(CompressedGraph &&) noexcept = default; CompressedGraph &operator=(CompressedGraph &&) noexcept = default; - template decltype(auto) reified(Lambda &&l) const { - return l(*this); - } - - // Direct member access -- used for some "low level" operations - [[nodiscard]] inline CompactStaticArray &raw_nodes() { - return _nodes; - } - - [[nodiscard]] inline const CompactStaticArray &raw_nodes() const { - return _nodes; - } - - [[nodiscard]] inline StaticArray &raw_node_weights() { - return _node_weights; - } - - [[nodiscard]] inline const StaticArray &raw_node_weights() const { - return _node_weights; - } - - [[nodiscard]] inline CompactStaticArray &&take_raw_nodes() { - return std::move(_nodes); - } - - [[nodiscard]] inline StaticArray &&take_raw_node_weights() { - return std::move(_node_weights); - } - - [[nodiscard]] const StaticArray &raw_compressed_edges() const { - return _compressed_edges; - } - + // // Size of the graph + // + [[nodiscard]] NodeID n() const final { - return static_cast(_nodes.size() - 1); + return _compressed_neighborhoods.num_nodes(); }; [[nodiscard]] EdgeID m() const final { - return _edge_count; + return _compressed_neighborhoods.num_edges(); } + // // Node and edge weights + // + [[nodiscard]] inline bool is_node_weighted() const final { return static_cast(n()) != total_node_weight(); } @@ -220,35 +132,20 @@ class CompressedGraph : public AbstractGraph { return _total_node_weight; } + void update_total_node_weight() final; + [[nodiscard]] inline bool is_edge_weighted() const final { - return _has_edge_weights; + return _compressed_neighborhoods.has_edge_weights(); } [[nodiscard]] inline EdgeWeight total_edge_weight() const final { return _total_edge_weight; } - // Low-level access to the graph structure - [[nodiscard]] inline NodeID max_degree() const final { - return _max_degree; - } - - [[nodiscard]] inline NodeID degree(const NodeID node) const final { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + _nodes[node]; - const std::uint8_t *next_node_data = data + _nodes[node + 1]; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return 0; - } - - const auto [first_edge, degree, _, __] = decode_header(node, node_data, next_node_data); - return degree; - } - + // // Iterators for nodes / edges + // + [[nodiscard]] IotaRange nodes() const final { return {static_cast(0), n()}; } @@ -257,152 +154,94 @@ class CompressedGraph : public AbstractGraph { return {static_cast(0), m()}; } - // Parallel iteration - template inline void pfor_nodes(Lambda &&l) const { - tbb::parallel_for(static_cast(0), n(), std::forward(l)); - } - - template inline void pfor_edges(Lambda &&l) const { - tbb::parallel_for(static_cast(0), m(), std::forward(l)); + [[nodiscard]] inline IotaRange incident_edges(const NodeID node) const final { + return _compressed_neighborhoods.incident_edges(node); } - // Graph operations - [[nodiscard]] inline IotaRange incident_edges(const NodeID node) const { - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + _nodes[node]; - const std::uint8_t *next_node_data = data + _nodes[node + 1]; + // + // Node degree + // - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return {0, 0}; - } + [[nodiscard]] inline NodeID max_degree() const final { + return _compressed_neighborhoods.max_degree(); + } - const auto [first_edge, degree, _, __] = decode_header(node, node_data, next_node_data); - return {first_edge, first_edge + degree}; + [[nodiscard]] inline NodeID degree(const NodeID node) const final { + return _compressed_neighborhoods.degree(node); } - template void adjacent_nodes(const NodeID u, Lambda &&l) const { - KASSERT(u < n()); + // + // Graph operations + // + template inline void adjacent_nodes(const NodeID u, Lambda &&l) const { constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - const auto invoke_caller = [&](const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.decode(u, [&](const EdgeID, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(v, w); } else { return l(v); } - }; - - if (is_edge_weighted()) { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - return invoke_caller(v, w); - }); - } else { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return invoke_caller(v, 1); - }); - } + }); } - template void neighbors(const NodeID u, Lambda &&l) const { - KASSERT(u < n()); - + template inline void neighbors(const NodeID u, Lambda &&l) const { constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - const auto invoke_caller = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + _compressed_neighborhoods.decode(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { if constexpr (kDecodeEdgeWeights) { return l(e, v, w); } else { return l(e, v); } - }; - - if (is_edge_weighted()) { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - return invoke_caller(e, v, w); - }); - } else { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return invoke_caller(e, v, 1); - }); - } + }); } template - void neighbors(const NodeID u, const NodeID max_neighbor_count, Lambda &&l) const { - KASSERT(u < n()); - KASSERT(max_neighbor_count > 0); - + inline void neighbors(const NodeID u, const NodeID max_num_neighbors, Lambda &&l) const { constexpr bool kDontDecodeEdgeWeights = std::is_invocable_v; constexpr bool kDecodeEdgeWeights = std::is_invocable_v; static_assert(kDontDecodeEdgeWeights || kDecodeEdgeWeights); - using LambdaReturnType = std::conditional_t< - kDecodeEdgeWeights, - std::invoke_result, - std::invoke_result>::type; - constexpr bool kNonStoppable = std::is_void_v; - - const auto invoke_caller = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - if constexpr (kDecodeEdgeWeights) { - return l(e, v, w); - } else { - return l(e, v); - } - }; - - NodeID num_neighbors_visited = 1; - const auto check_abort_condition = [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - bool abort = num_neighbors_visited++ >= max_neighbor_count; + _compressed_neighborhoods + .decode(u, max_num_neighbors, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { + if constexpr (kDecodeEdgeWeights) { + return l(e, v, w); + } else { + return l(e, v); + } + }); + } - if constexpr (kNonStoppable) { - invoke_caller(e, v, w); - } else { - abort |= invoke_caller(e, v, w); - } + // + // Parallel iteration + // - return abort; - }; + template inline void pfor_nodes(Lambda &&l) const { + tbb::parallel_for(static_cast(0), n(), std::forward(l)); + } - if (is_edge_weighted()) { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v, const EdgeWeight w) { - return check_abort_condition(e, v, w); - }); - } else { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return check_abort_condition(e, v, 1); - }); - } + template inline void pfor_edges(Lambda &&l) const { + tbb::parallel_for(static_cast(0), m(), std::forward(l)); } template - void pfor_neighbors( - const NodeID u, const NodeID max_neighbor_count, const NodeID grainsize, Lambda &&l + inline void pfor_neighbors( + const NodeID u, const NodeID max_num_neighbors, const NodeID grainsize, Lambda &&l ) const { - if (is_edge_weighted()) { - decode_neighborhood(u, std::forward(l)); - } else { - constexpr bool kInvokeDirectly = std::is_invocable_v; - - if constexpr (kInvokeDirectly) { - decode_neighborhood(u, [&](const EdgeID e, const NodeID v) { - return l(e, v, 1); - }); - } else { - decode_neighborhood(u, [&](auto &&l2) { - l([&](auto &&l3) { l2([&](const EdgeID e, const NodeID v) { return l3(e, v, 1); }); }); - }); - } - } + constexpr bool kParallelDecoding = true; + _compressed_neighborhoods.decode(u, std::forward(l)); } + // // Graph permutation + // + inline void set_permutation(StaticArray permutation) final { _permutation = std::move(permutation); } @@ -419,7 +258,18 @@ class CompressedGraph : public AbstractGraph { return std::move(_permutation); } + // // Degree buckets + // + + [[nodiscard]] inline bool sorted() const final { + return _sorted; + } + + [[nodiscard]] inline std::size_t number_of_buckets() const final { + return _number_of_buckets; + } + [[nodiscard]] inline std::size_t bucket_size(const std::size_t bucket) const final { return _buckets[bucket + 1] - _buckets[bucket]; } @@ -432,21 +282,17 @@ class CompressedGraph : public AbstractGraph { return first_node_in_bucket(bucket + 1); } - [[nodiscard]] inline std::size_t number_of_buckets() const final { - return _number_of_buckets; - } + // + // Isolated nodes + // - [[nodiscard]] inline bool sorted() const final { - return _sorted; - } - - void update_total_node_weight() final; - - void remove_isolated_nodes(const NodeID isolated_nodes); + void remove_isolated_nodes(const NodeID num_isolated_nodes); void integrate_isolated_nodes(); + // // Compressions statistics + // /*! * Returns the number of nodes that have high degree. @@ -454,7 +300,7 @@ class CompressedGraph : public AbstractGraph { * @returns The number of nodes that have high degree. */ [[nodiscard]] std::size_t num_high_degree_nodes() const { - return _num_high_degree_nodes; + return _compressed_neighborhoods.num_high_degree_nodes(); } /*! @@ -463,7 +309,7 @@ class CompressedGraph : public AbstractGraph { * @returns The total number of parts that result from splitting high degree neighborhoods. */ [[nodiscard]] std::size_t num_high_degree_parts() const { - return _num_high_degree_parts; + return _compressed_neighborhoods.num_high_degree_parts(); } /*! @@ -472,7 +318,7 @@ class CompressedGraph : public AbstractGraph { * @returns The number of nodes that have at least one interval. */ [[nodiscard]] std::size_t num_interval_nodes() const { - return _num_interval_nodes; + return _compressed_neighborhoods.num_interval_nodes(); } /*! @@ -481,7 +327,7 @@ class CompressedGraph : public AbstractGraph { * @returns The total number of intervals. */ [[nodiscard]] std::size_t num_intervals() const { - return _num_intervals; + return _compressed_neighborhoods.num_intervals(); } /*! @@ -491,7 +337,7 @@ class CompressedGraph : public AbstractGraph { */ [[nodiscard]] double compression_ratio() const { std::size_t uncompressed_size = (n() + 1) * sizeof(EdgeID) + m() * sizeof(NodeID); - std::size_t compressed_size = _nodes.allocated_size() + _compressed_edges.size(); + std::size_t compressed_size = _compressed_neighborhoods.memory_space(); if (is_node_weighted()) { uncompressed_size += n() * sizeof(NodeWeight); @@ -512,7 +358,7 @@ class CompressedGraph : public AbstractGraph { */ [[nodiscard]] std::int64_t size_reduction() const { std::size_t uncompressed_size = (n() + 1) * sizeof(EdgeID) + m() * sizeof(NodeID); - std::size_t compressed_size = _nodes.allocated_size() + _compressed_edges.size(); + std::size_t compressed_size = _compressed_neighborhoods.memory_space(); if (is_node_weighted()) { uncompressed_size += n() * sizeof(NodeWeight); @@ -532,328 +378,55 @@ class CompressedGraph : public AbstractGraph { * @return The amount of memory in bytes used by the data structure. */ [[nodiscard]] std::size_t used_memory() const { - return _nodes.allocated_size() + _compressed_edges.size() + - _node_weights.size() * sizeof(NodeWeight); + return _compressed_neighborhoods.memory_space() + _node_weights.size() * sizeof(NodeWeight); } -private: - CompactStaticArray _nodes; - StaticArray _compressed_edges; - StaticArray _node_weights; - - EdgeID _edge_count; - bool _has_edge_weights; - NodeID _max_degree; - bool _sorted; - - NodeWeight _total_node_weight = kInvalidNodeWeight; - EdgeWeight _total_edge_weight = kInvalidEdgeWeight; - NodeWeight _max_node_weight = kInvalidNodeWeight; - - StaticArray _permutation; - - std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); - std::size_t _number_of_buckets = 0; - - std::size_t _num_high_degree_nodes; - std::size_t _num_high_degree_parts; - std::size_t _num_interval_nodes; - std::size_t _num_intervals; - - void init_degree_buckets(); - - inline std::tuple decode_header( - const NodeID node, const std::uint8_t *node_data, const std::uint8_t *next_node_data - ) const { - const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { - if constexpr (CompressedGraph::kIntervalEncoding) { - auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); - auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); - - return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); - } else { - auto [first_edge, len] = varint_decode(node_data); - auto [next_first_edge, _] = varint_decode(next_node_data); + // + // Direct member access -- used for some "low level" operations + // - return std::make_tuple(first_edge, next_first_edge, false, len); - } - }(); - - if constexpr (kIsolatedNodesSeparation) { - const EdgeID ungapped_first_edge = first_edge + node; - const NodeID degree = static_cast(1 + next_first_edge - first_edge); - return std::make_tuple(ungapped_first_edge, degree, uses_intervals, len); - } else { - const NodeID degree = static_cast(next_first_edge - first_edge); - return std::make_tuple(first_edge, degree, uses_intervals, len); - } + [[nodiscard]] inline CompactStaticArray &raw_nodes() { + return _compressed_neighborhoods.raw_nodes(); } - template - void decode_neighborhood(const NodeID node, Lambda &&l) const { - constexpr bool kInvokeDirectly = []() { - if constexpr (kHasEdgeWeights) { - return std::is_invocable_v; - } else { - return std::is_invocable_v; - } - }(); - - const std::uint8_t *data = _compressed_edges.data(); - - const std::uint8_t *node_data = data + _nodes[node]; - const std::uint8_t *next_node_data = data + _nodes[node + 1]; - - const bool is_isolated_node = node_data == next_node_data; - if (is_isolated_node) { - return; - } - - const auto header = decode_header(node, node_data, next_node_data); - const auto &edge = std::get<0>(header); - const auto °ree = std::get<1>(header); - const auto &uses_intervals = std::get<2>(header); - const auto &len = std::get<3>(header); - - node_data += len; - - if constexpr (kHighDegreeEncoding) { - if (degree >= kHighDegreeThreshold) { - decode_parts( - node_data, node, edge, degree, std::forward(l) - ); - return; - } - } - - invoke_indirect(std::forward(l), [&](auto &&l2) { - decode_edges( - node_data, node, edge, degree, uses_intervals, std::forward(l2) - ); - }); + [[nodiscard]] inline const CompactStaticArray &raw_nodes() const { + return _compressed_neighborhoods.raw_nodes(); } - template - void decode_parts( - const std::uint8_t *data, - const NodeID node, - const EdgeID edge, - const NodeID degree, - Lambda &&l - ) const { - constexpr bool kInvokeDirectly = []() { - if constexpr (kHasEdgeWeights) { - return std::is_invocable_v; - } else { - return std::is_invocable_v; - } - }(); - - const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); - - const auto iterate_part = [&](const NodeID part) { - const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); - const std::uint8_t *part_data = data + part_offset; - - const NodeID part_count_m1 = part_count - 1; - const bool last_part = part == part_count_m1; - - const EdgeID part_edge = edge + kHighDegreePartLength * part; - const NodeID part_degree = - last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; - - return invoke_indirect2(std::forward(l), [&](auto &&l2) { - return decode_edges( - part_data, node, part_edge, part_degree, true, std::forward(l2) - ); - }); - }; - - if constexpr (kParallelDecoding) { - tbb::parallel_for(0, part_count, std::forward(iterate_part)); - } else { - for (NodeID part = 0; part < part_count; ++part) { - const bool stop = iterate_part(part); - if (stop) { - return; - } - } - } + [[nodiscard]] inline StaticArray &raw_node_weights() { + return _node_weights; } - template - bool decode_edges( - const std::uint8_t *data, - const NodeID node, - EdgeID edge, - const NodeID degree, - bool uses_intervals, - Lambda &&l - ) const { - const EdgeID max_edge = edge + degree; - EdgeWeight prev_edge_weight = 0; - - if constexpr (kIntervalEncoding) { - if (uses_intervals) { - const bool stop = decode_intervals( - data, edge, prev_edge_weight, std::forward(l) - ); - if (stop) { - return true; - } - - if (edge == max_edge) { - return false; - } - } - } - - return decode_gaps( - data, node, edge, prev_edge_weight, max_edge, std::forward(l) - ); + [[nodiscard]] inline const StaticArray &raw_node_weights() const { + return _node_weights; } - template - bool decode_intervals( - const std::uint8_t *&data, EdgeID &edge, EdgeWeight &prev_edge_weight, Lambda &&l - ) const { - using LambdaReturnType = std::conditional_t< - kHasEdgeWeights, - std::invoke_result, - std::invoke_result>::type; - constexpr bool kNonStoppable = std::is_void_v; - - const auto invoke_caller = [&](const NodeID adjacent_node) { - if constexpr (kHasEdgeWeights) { - const auto [edge_weight_gap, length] = signed_varint_decode(data); - data += length; - - const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; - prev_edge_weight = edge_weight; - return l(edge, adjacent_node, edge_weight); - } else { - return l(edge, adjacent_node); - } - }; - - const NodeID interval_count = *((NodeID *)data); - data += sizeof(NodeID); - - NodeID previous_right_extreme = 2; - for (NodeID i = 0; i < interval_count; ++i) { - const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); - data += left_extreme_gap_len; - - const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); - data += interval_length_gap_len; - - const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; - const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; - previous_right_extreme = cur_left_extreme + cur_interval_len - 1; - - for (NodeID j = 0; j < cur_interval_len; ++j) { - if constexpr (kNonStoppable) { - invoke_caller(cur_left_extreme + j); - } else { - const bool stop = invoke_caller(cur_left_extreme + j); - if (stop) { - return true; - } - } - - edge += 1; - } - } - - return false; + [[nodiscard]] inline CompactStaticArray &&take_raw_nodes() { + return _compressed_neighborhoods.take_raw_nodes(); } - template - bool decode_gaps( - const std::uint8_t *data, - NodeID node, - EdgeID &edge, - EdgeWeight &prev_edge_weight, - const EdgeID max_edge, - Lambda &&l - ) const { - using LambdaReturnType = std::conditional_t< - kHasEdgeWeights, - std::invoke_result, - std::invoke_result>::type; - constexpr bool kNonStoppable = std::is_void_v; - - const auto invoke_caller = [&](const NodeID adjacent_node) { - if constexpr (kHasEdgeWeights) { - const auto [edge_weight_gap, length] = signed_varint_decode(data); - data += length; - - const EdgeWeight edge_weight = edge_weight_gap + prev_edge_weight; - prev_edge_weight = edge_weight; - return l(edge, adjacent_node, edge_weight); - } else { - return l(edge, adjacent_node); - } - }; - - const auto [first_gap, first_gap_len] = signed_varint_decode(data); - data += first_gap_len; - - const NodeID first_adjacent_node = static_cast(first_gap + node); - NodeID prev_adjacent_node = first_adjacent_node; + [[nodiscard]] inline StaticArray &&take_raw_node_weights() { + return std::move(_node_weights); + } - if constexpr (kNonStoppable) { - invoke_caller(first_adjacent_node); - } else { - const bool stop = invoke_caller(first_adjacent_node); - if (stop) { - return true; - } - } - edge += 1; + [[nodiscard]] const StaticArray &raw_compressed_edges() const { + return _compressed_neighborhoods.raw_compressed_edges(); + } - /* - const auto handle_gap = [&](const NodeID gap) { - const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; +private: + CompressedNeighborhoods _compressed_neighborhoods; + StaticArray _node_weights; - if constexpr (kNonStoppable) { - l(edge++, adjacent_node); - } else { - return l(edge++, adjacent_node); - } - }; - */ - - if constexpr (kRunLengthEncoding) { - // VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); - // rl_decoder.decode(std::forward(handle_gap)); - } else if constexpr (kStreamEncoding) { - // VarIntStreamDecoder sv_encoder(data, max_edge - edge); - // sv_encoder.decode(std::forward(handle_gap)); - } else { - while (edge != max_edge) { - const auto [gap, gap_len] = varint_decode(data); - data += gap_len; - - const NodeID adjacent_node = gap + prev_adjacent_node + 1; - prev_adjacent_node = adjacent_node; - - if constexpr (kNonStoppable) { - invoke_caller(adjacent_node); - } else { - const bool stop = invoke_caller(adjacent_node); - if (stop) { - return true; - } - } + NodeWeight _max_node_weight = kInvalidNodeWeight; + NodeWeight _total_node_weight = kInvalidNodeWeight; + EdgeWeight _total_edge_weight = kInvalidEdgeWeight; - edge += 1; - } - } + StaticArray _permutation; + bool _sorted; + std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); + std::size_t _number_of_buckets = 0; - return false; - } + void init_degree_buckets(); }; } // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.cc b/kaminpar-shm/datastructures/compressed_graph_builder.cc deleted file mode 100644 index 74b1bf46..00000000 --- a/kaminpar-shm/datastructures/compressed_graph_builder.cc +++ /dev/null @@ -1,445 +0,0 @@ -/******************************************************************************* - * Sequential and parallel builder for compressed graphs. - * - * @file: compressed_graph_builder.cc - * @author: Daniel Salwasser - * @date: 03.05.2024 - ******************************************************************************/ -#include "kaminpar-shm/datastructures/compressed_graph_builder.h" - -#include -#include - -#include -#include -#include - -#include "kaminpar-shm/kaminpar.h" - -#include "kaminpar-common/heap_profiler.h" -#include "kaminpar-common/varint_codec.h" - -namespace kaminpar::shm { - -namespace { - -template -[[nodiscard]] std::size_t compressed_edge_array_max_size( - const NodeID num_nodes, const EdgeID num_edges, const bool has_edge_weights -) { - std::size_t edge_id_width; - if constexpr (kActualNumEdges) { - if constexpr (CompressedGraph::kIntervalEncoding) { - edge_id_width = marked_varint_length(num_edges); - } else { - edge_id_width = varint_length(num_edges); - } - } else { - edge_id_width = varint_max_length(); - } - - std::size_t max_size = num_nodes * edge_id_width + num_edges * varint_length(num_nodes); - - if constexpr (CompressedGraph::kHighDegreeEncoding) { - if constexpr (CompressedGraph::kIntervalEncoding) { - max_size += 2 * num_nodes * varint_max_length(); - } else { - max_size += num_nodes * varint_max_length(); - } - - max_size += (num_edges / CompressedGraph::kHighDegreePartLength) * varint_max_length(); - } - - if (has_edge_weights) { - max_size += num_edges * varint_max_length(); - } - - return max_size; -} - -} // namespace - -CompressedEdgesBuilder::CompressedEdgesBuilder( - const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights -) - : _has_edge_weights(has_edge_weights) { - const std::size_t max_size = - compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); - _compressed_data_start = heap_profiler::overcommit_memory(max_size); - _compressed_data = _compressed_data_start.get(); - _compressed_data_max_size = 0; -} - -CompressedEdgesBuilder::CompressedEdgesBuilder( - const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights -) - : _has_edge_weights(has_edge_weights) { - const std::size_t max_size = - compressed_edge_array_max_size(num_nodes, max_degree, has_edge_weights); - _compressed_data_start = heap_profiler::overcommit_memory(max_size); - _compressed_data = _compressed_data_start.get(); - _compressed_data_max_size = 0; -} - -CompressedEdgesBuilder::~CompressedEdgesBuilder() { - if constexpr (kHeapProfiling) { - if (_compressed_data_start) { - const auto prev_compressed_data_size = - static_cast(_compressed_data - _compressed_data_start.get()); - const std::size_t compressed_data_size = - std::max(_compressed_data_max_size, prev_compressed_data_size); - - heap_profiler::HeapProfiler::global().record_alloc( - _compressed_data_start.get(), compressed_data_size - ); - } - } -} - -void CompressedEdgesBuilder::init(const EdgeID first_edge) { - const auto prev_compressed_data_size = - static_cast(_compressed_data - _compressed_data_start.get()); - _compressed_data_max_size = std::max(_compressed_data_max_size, prev_compressed_data_size); - _compressed_data = _compressed_data_start.get(); - - _edge = first_edge; - _max_degree = 0; - _total_edge_weight = 0; - - _num_high_degree_nodes = 0; - _num_high_degree_parts = 0; - _num_interval_nodes = 0; - _num_intervals = 0; -} - -std::size_t CompressedEdgesBuilder::size() const { - return static_cast(_compressed_data - _compressed_data_start.get()); -} - -const std::uint8_t *CompressedEdgesBuilder::compressed_data() const { - return _compressed_data_start.get(); -} - -heap_profiler::unique_ptr CompressedEdgesBuilder::take_compressed_data() { - return std::move(_compressed_data_start); -} - -std::size_t CompressedEdgesBuilder::max_degree() const { - return _max_degree; -} - -std::int64_t CompressedEdgesBuilder::total_edge_weight() const { - return _total_edge_weight; -} - -std::size_t CompressedEdgesBuilder::num_high_degree_nodes() const { - return _num_high_degree_nodes; -} - -std::size_t CompressedEdgesBuilder::num_high_degree_parts() const { - return _num_high_degree_parts; -} - -std::size_t CompressedEdgesBuilder::num_interval_nodes() const { - return _num_interval_nodes; -} - -std::size_t CompressedEdgesBuilder::num_intervals() const { - return _num_intervals; -} - -std::size_t CompressedEdgesBuilder::num_adjacent_node_bytes() const { - return _num_adjacent_node_bytes; -} - -std::size_t CompressedEdgesBuilder::num_edge_weights_bytes() const { - return _num_edge_weights_bytes; -} - -CompressedGraph CompressedGraphBuilder::compress(const CSRGraph &graph) { - const bool store_node_weights = graph.is_node_weighted(); - const bool store_edge_weights = graph.is_edge_weighted(); - - CompressedGraphBuilder builder( - graph.n(), graph.m(), store_node_weights, store_edge_weights, graph.sorted() - ); - - std::vector> neighbourhood; - neighbourhood.reserve(graph.max_degree()); - - for (const NodeID u : graph.nodes()) { - graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight w) { - neighbourhood.emplace_back(v, w); - }); - - builder.add_node(u, neighbourhood); - if (store_node_weights) { - builder.add_node_weight(u, graph.node_weight(u)); - } - - neighbourhood.clear(); - } - - return builder.build(); -} - -CompressedGraphBuilder::CompressedGraphBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted -) - : _compressed_edges_builder(num_nodes, num_edges, has_edge_weights), - _store_edge_weights(has_edge_weights) { - KASSERT(num_nodes < std::numeric_limits::max() - 1); - const std::size_t max_size = - compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); - - _nodes.resize(math::byte_width(max_size), num_nodes + 1); - _sorted = sorted; - - _compressed_edges_builder.init(0); - _num_edges = num_edges; - - if (has_node_weights) { - _node_weights.resize(num_nodes); - } - - _store_node_weights = has_node_weights; - _total_node_weight = 0; -} - -void CompressedGraphBuilder::add_node( - const NodeID node, std::vector> &neighbourhood -) { - KASSERT(node + 1 < _nodes.size()); - - const EdgeID offset = _compressed_edges_builder.add(node, neighbourhood); - _nodes.write(node, offset); -} - -void CompressedGraphBuilder::add_node_weight(const NodeID node, const NodeWeight weight) { - KASSERT(_store_node_weights); - - _total_node_weight += weight; - _node_weights[node] = weight; -} - -CompressedGraph CompressedGraphBuilder::build() { - std::size_t compressed_edges_size = _compressed_edges_builder.size(); - heap_profiler::unique_ptr wrapped_compressed_edges = - _compressed_edges_builder.take_compressed_data(); - - // Store in the last entry of the node array the offset one after the last byte belonging to the - // last node. - _nodes.write(_nodes.size() - 1, static_cast(compressed_edges_size)); - - // Store at the end of the compressed edge array the (gap of the) id of the last edge. This - // ensures that the the degree of the last node can be computed from the difference between the - // last two first edge ids. - const EdgeID last_edge = _num_edges; - std::uint8_t *compressed_edges_end = wrapped_compressed_edges.get() + compressed_edges_size; - if constexpr (CompressedGraph::kIntervalEncoding) { - compressed_edges_size += marked_varint_encode(last_edge, false, compressed_edges_end); - } else { - compressed_edges_size += varint_encode(last_edge, compressed_edges_end); - } - - // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to - // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. - if constexpr (CompressedGraph::kStreamEncoding) { - compressed_edges_size += 15; - } - - if constexpr (kHeapProfiling) { - heap_profiler::HeapProfiler::global().record_alloc( - wrapped_compressed_edges.get(), compressed_edges_size - ); - } - - RECORD("compressed_edges") - StaticArray compressed_edges( - compressed_edges_size, std::move(wrapped_compressed_edges) - ); - - const bool unit_node_weights = static_cast(_total_node_weight + 1) == _nodes.size(); - if (unit_node_weights) { - _node_weights.free(); - } - - return CompressedGraph( - std::move(_nodes), - std::move(compressed_edges), - std::move(_node_weights), - _num_edges, - _compressed_edges_builder.total_edge_weight(), - _store_edge_weights, - _compressed_edges_builder.max_degree(), - _sorted, - _compressed_edges_builder.num_high_degree_nodes(), - _compressed_edges_builder.num_high_degree_parts(), - _compressed_edges_builder.num_interval_nodes(), - _compressed_edges_builder.num_intervals() - ); -} - -std::size_t CompressedGraphBuilder::currently_used_memory() const { - return _nodes.allocated_size() + _compressed_edges_builder.size() + - _node_weights.size() * sizeof(NodeWeight); -} - -std::int64_t CompressedGraphBuilder::total_node_weight() const { - return _total_node_weight; -} - -std::int64_t CompressedGraphBuilder::total_edge_weight() const { - return _compressed_edges_builder.total_edge_weight(); -} - -CompressedGraph ParallelCompressedGraphBuilder::compress(const CSRGraph &graph) { - return ParallelCompressedGraphBuilder::compress( - graph.n(), - graph.m(), - graph.is_node_weighted(), - graph.is_edge_weighted(), - graph.sorted(), - [](const NodeID u) { return u; }, - [&](const NodeID u) { return graph.degree(u); }, - [&](const NodeID u) { return graph.first_edge(u); }, - [&](const EdgeID e) { return graph.edge_target(e); }, - [&](const NodeID u) { return graph.node_weight(u); }, - [&](const EdgeID e) { return graph.edge_weight(e); } - ); -} - -ParallelCompressedGraphBuilder::ParallelCompressedGraphBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted -) { - KASSERT(num_nodes != std::numeric_limits::max() - 1); - const std::size_t max_size = - compressed_edge_array_max_size(num_nodes, num_edges, has_edge_weights); - - _nodes.resize(math::byte_width(max_size), num_nodes + 1); - _sorted = sorted; - - _compressed_edges = heap_profiler::overcommit_memory(max_size); - _compressed_edges_size = 0; - _num_edges = num_edges; - _has_edge_weights = has_edge_weights; - - if (has_node_weights) { - _node_weights.resize(num_nodes, static_array::noinit); - } - - _max_degree = 0; - _total_node_weight = 0; - _total_edge_weight = 0; - - _num_high_degree_nodes = 0; - _num_high_degree_parts = 0; - _num_interval_nodes = 0; - _num_intervals = 0; -} - -void ParallelCompressedGraphBuilder::add_node(const NodeID node, const EdgeID offset) { - _nodes.write(node, offset); -} - -void ParallelCompressedGraphBuilder::add_compressed_edges( - const EdgeID offset, const EdgeID length, const std::uint8_t *data -) { - __atomic_fetch_add(&_compressed_edges_size, length, __ATOMIC_RELAXED); - std::memcpy(_compressed_edges.get() + offset, data, length); -} - -void ParallelCompressedGraphBuilder::add_node_weight(const NodeID node, const NodeWeight weight) { - _node_weights[node] = weight; -} - -void ParallelCompressedGraphBuilder::record_local_statistics( - NodeID max_degree, - NodeWeight node_weight, - EdgeWeight edge_weight, - std::size_t num_high_degree_nodes, - std::size_t num_high_degree_parts, - std::size_t num_interval_nodes, - std::size_t num_intervals -) { - NodeID global_max_degree = __atomic_load_n(&_max_degree, __ATOMIC_RELAXED); - while (max_degree > global_max_degree) { - const bool success = __atomic_compare_exchange_n( - &_max_degree, &global_max_degree, max_degree, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED - ); - - if (success) { - break; - } - } - - __atomic_fetch_add(&_total_node_weight, node_weight, __ATOMIC_RELAXED); - __atomic_fetch_add(&_total_edge_weight, edge_weight, __ATOMIC_RELAXED); - - __atomic_fetch_add(&_num_high_degree_nodes, num_high_degree_nodes, __ATOMIC_RELAXED); - __atomic_fetch_add(&_num_high_degree_parts, num_high_degree_parts, __ATOMIC_RELAXED); - __atomic_fetch_add(&_num_interval_nodes, num_interval_nodes, __ATOMIC_RELAXED); - __atomic_fetch_add(&_num_intervals, num_intervals, __ATOMIC_RELAXED); -} - -CompressedGraph ParallelCompressedGraphBuilder::build() { - // Store in the last entry of the node array the offset one after the last byte belonging to the - // last node. - _nodes.write(_nodes.size() - 1, _compressed_edges_size); - - // Store at the end of the compressed edge array the (gap of the) id of the last edge. This - // ensures that the the degree of the last node can be computed from the difference between the - // last two first edge ids. - std::uint8_t *_compressed_edges_end = _compressed_edges.get() + _compressed_edges_size; - const EdgeID last_edge = _num_edges; - if constexpr (CompressedGraph::kIntervalEncoding) { - _compressed_edges_size += marked_varint_encode(last_edge, false, _compressed_edges_end); - } else { - _compressed_edges_size += varint_encode(last_edge, _compressed_edges_end); - } - - // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to - // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. - if constexpr (CompressedGraph::kStreamEncoding) { - _compressed_edges_size += 15; - } - - if constexpr (kHeapProfiling) { - heap_profiler::HeapProfiler::global().record_alloc( - _compressed_edges.get(), _compressed_edges_size - ); - } - - RECORD("compressed_edges") - StaticArray compressed_edges(_compressed_edges_size, std::move(_compressed_edges)); - - const bool unit_node_weights = static_cast(_total_node_weight + 1) == _nodes.size(); - if (unit_node_weights) { - _node_weights.free(); - } - - return CompressedGraph( - std::move(_nodes), - std::move(compressed_edges), - std::move(_node_weights), - _num_edges, - _total_edge_weight, - _has_edge_weights, - _max_degree, - _sorted, - _num_high_degree_nodes, - _num_high_degree_parts, - _num_interval_nodes, - _num_intervals - ); -} - -} // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/compressed_graph_builder.h b/kaminpar-shm/datastructures/compressed_graph_builder.h deleted file mode 100644 index d34f183c..00000000 --- a/kaminpar-shm/datastructures/compressed_graph_builder.h +++ /dev/null @@ -1,1011 +0,0 @@ -/******************************************************************************* - * Sequential and parallel builder for compressed graphs. - * - * @file: compressed_graph_builder.h - * @author: Daniel Salwasser - * @date: 03.05.2024 - ******************************************************************************/ -#pragma once - -#include - -#include "kaminpar-shm/datastructures/compressed_graph.h" -#include "kaminpar-shm/datastructures/csr_graph.h" - -#include "kaminpar-common/datastructures/concurrent_circular_vector.h" -#include "kaminpar-common/datastructures/maxsize_vector.h" -#include "kaminpar-common/timer.h" - -namespace kaminpar::shm { -SET_DEBUG(false); - -class CompressedEdgesBuilder { - using NodeID = CompressedGraph::NodeID; - using NodeWeight = CompressedGraph::NodeWeight; - using EdgeID = CompressedGraph::EdgeID; - using EdgeWeight = CompressedGraph::EdgeWeight; - using SignedID = CompressedGraph::SignedID; - -public: - /*! - * Constructs a new CompressedEdgesBuilder. - * - * @param num_nodes The number of nodes of the graph to compress. - * @param num_edges The number of edges of the graph to compress. - * @param has_edge_weights Whether the graph to compress has edge weights. - */ - CompressedEdgesBuilder(const NodeID num_nodes, const EdgeID num_edges, bool has_edge_weights); - - /*! - * Constructs a new CompressedEdgesBuilder where the maxmimum degree specifies the number of edges - * that are compressed at once. - * - * @param num_nodes The number of nodes of the graph to compress. - * @param num_edges The number of edges of the graph to compress. - * @param max_degree The maximum degree of the graph to compress. - * @param has_edge_weights Whether the graph to compress has edge weights. - */ - CompressedEdgesBuilder( - const NodeID num_nodes, const EdgeID num_edges, const NodeID max_degree, bool has_edge_weights - ); - - ~CompressedEdgesBuilder(); - - CompressedEdgesBuilder(const CompressedEdgesBuilder &) = delete; - CompressedEdgesBuilder &operator=(const CompressedEdgesBuilder &) = delete; - - CompressedEdgesBuilder(CompressedEdgesBuilder &&) noexcept = default; - CompressedEdgesBuilder &operator=(CompressedEdgesBuilder &&) noexcept = delete; - - /*! - * Initializes/resets the builder. - * - * @param first_edge The first edge ID of the first node to be added. - */ - void init(const EdgeID first_edge); - - /*! - * Adds the (possibly weighted) neighborhood of a node. Note that the neighbourhood vector is - * modified. - * - * @param node The node whose neighborhood to add. - * @param neighbourhood The neighbourhood of the node to add. - * @return The offset into the compressed edge array of the node. - */ - template EdgeID add(const NodeID node, Container &neighbourhood) { - if constexpr (std::is_same_v>) { - std::sort(neighbourhood.begin(), neighbourhood.end(), [](const auto &a, const auto &b) { - return a.first < b.first; - }); - } else { - std::sort(neighbourhood.begin(), neighbourhood.end()); - } - - return add_node(node, neighbourhood); - } - - /*! - * Returns the number of bytes that the compressed data of the added neighborhoods take up. - * - * @return The number of bytes that the compressed data of the added neighborhoods take up. - */ - [[nodiscard]] std::size_t size() const; - - /*! - * Returns a pointer to the start of the compressed data. - * - * @return A pointer to the start of the compressed data. - */ - [[nodiscard]] const std::uint8_t *compressed_data() const; - - /*! - * Returns ownership of the compressed data - * - * @return Ownership of the compressed data. - */ - [[nodiscard]] heap_profiler::unique_ptr take_compressed_data(); - - [[nodiscard]] std::size_t max_degree() const; - [[nodiscard]] std::int64_t total_edge_weight() const; - - [[nodiscard]] std::size_t num_high_degree_nodes() const; - [[nodiscard]] std::size_t num_high_degree_parts() const; - [[nodiscard]] std::size_t num_interval_nodes() const; - [[nodiscard]] std::size_t num_intervals() const; - - [[nodiscard]] std::size_t num_adjacent_node_bytes() const; - [[nodiscard]] std::size_t num_edge_weights_bytes() const; - -private: - heap_profiler::unique_ptr _compressed_data_start; - std::uint8_t *_compressed_data; - std::size_t _compressed_data_max_size; - - bool _has_edge_weights; - - EdgeID _edge; - NodeID _max_degree; - EdgeWeight _total_edge_weight; - - // Graph compression statistics - std::size_t _num_high_degree_nodes; - std::size_t _num_high_degree_parts; - std::size_t _num_interval_nodes; - std::size_t _num_intervals; - - // Debug graph compression statistics - std::size_t _num_adjacent_node_bytes; - std::size_t _num_edge_weights_bytes; - - template EdgeID add_node(const NodeID node, Container &neighbourhood) { - // The offset into the compressed edge array to the start of the neighbourhood. - const auto offset = static_cast(_compressed_data - _compressed_data_start.get()); - - const NodeID degree = neighbourhood.size(); - if (degree == 0) { - return offset; - } - - _max_degree = std::max(_max_degree, degree); - - // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes - // in one of its bits whether interval encoding is used for this node, i.e., whether the nodes - // has intervals in its neighbourhood. - std::uint8_t *marked_byte = _compressed_data; - - // Store only the first edge for the source node. The degree can be obtained by determining the - // difference between the first edge ids of a node and the next node. Additionally, store the - // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes - // array. - const EdgeID first_edge = _edge; - if constexpr (CompressedGraph::kIntervalEncoding) { - _compressed_data += marked_varint_encode(first_edge, false, _compressed_data); - } else { - _compressed_data += varint_encode(first_edge, _compressed_data); - } - - _edge += degree; - - // If high-degree encoding is used then split the neighborhood if the degree crosses a - // threshold. The neighborhood is split into equally sized parts (except possible the last part) - // and each part is encoded independently. Furthermore, the offset at which the part is encoded - // is also stored. - if constexpr (CompressedGraph::kHighDegreeEncoding) { - const bool split_neighbourhood = degree >= CompressedGraph::kHighDegreeThreshold; - - if (split_neighbourhood) { - const NodeID part_count = math::div_ceil(degree, CompressedGraph::kHighDegreePartLength); - const NodeID last_part_length = ((degree % CompressedGraph::kHighDegreePartLength) == 0) - ? CompressedGraph::kHighDegreePartLength - : (degree % CompressedGraph::kHighDegreePartLength); - - uint8_t *part_ptr = _compressed_data; - _compressed_data += sizeof(NodeID) * part_count; - - for (NodeID i = 0; i < part_count; ++i) { - const bool last_part = (i + 1) == part_count; - const NodeID part_length = - last_part ? last_part_length : CompressedGraph::kHighDegreePartLength; - - auto part_begin = neighbourhood.begin() + i * CompressedGraph::kHighDegreePartLength; - auto part_end = part_begin + part_length; - - std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; - *((NodeID *)cur_part_ptr) = static_cast(_compressed_data - part_ptr); - - using Neighbour = typename Container::value_type; - add_edges(node, nullptr, std::span(part_begin, part_end)); - } - - _num_high_degree_nodes += 1; - _num_high_degree_parts += part_count; - return offset; - } - } - - add_edges(node, marked_byte, std::forward(neighbourhood)); - return offset; - } - - template - void add_edges(const NodeID node, std::uint8_t *marked_byte, Container &&neighbourhood) { - using Neighbour = std::remove_reference_t::value_type; - constexpr bool kHasEdgeWeights = std::is_same_v>; - - const auto fetch_adjacent_node = [&](const NodeID i) { - if constexpr (kHasEdgeWeights) { - return neighbourhood[i].first; - } else { - return neighbourhood[i]; - } - }; - - const auto set_adjacent_node = [&](const NodeID i, const NodeID value) { - if constexpr (kHasEdgeWeights) { - neighbourhood[i].first = value; - } else { - neighbourhood[i] = value; - } - }; - - NodeID local_degree = neighbourhood.size(); - EdgeWeight prev_edge_weight = 0; - - // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at - // least kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i - // and the length j - i + 1. Left extremes are stored using the differences between each left - // extreme and the previous right extreme minus 2 (because there must be at least one integer - // between the end of an interval and the beginning of the next one), except the first left - // extreme, which is stored directly. The lengths are decremented by kIntervalLengthTreshold, - // the minimum length of an interval. - if constexpr (CompressedGraph::kIntervalEncoding) { - NodeID interval_count = 0; - - // Save the pointer to the interval count and skip the amount of bytes needed to store the - // interval count as we can only determine the amount of intervals after finding all of - // them. - std::uint8_t *interval_count_ptr = _compressed_data; - _compressed_data += sizeof(NodeID); - - if (local_degree >= CompressedGraph::kIntervalLengthTreshold) { - NodeID interval_len = 1; - NodeID previous_right_extreme = 2; - NodeID prev_adjacent_node = fetch_adjacent_node(0); - - for (NodeID i = 1; i < neighbourhood.size(); ++i) { - const NodeID adjacent_node = fetch_adjacent_node(i); - - if (prev_adjacent_node + 1 == adjacent_node) { - ++interval_len; - - // The interval ends if there are no more nodes or the next node is not the increment of - // the current node. - if (i + 1 == neighbourhood.size() || fetch_adjacent_node(i + 1) != adjacent_node + 1) { - if (interval_len >= CompressedGraph::kIntervalLengthTreshold) { - const NodeID left_extreme = adjacent_node + 1 - interval_len; - const NodeID left_extreme_gap = left_extreme + 2 - previous_right_extreme; - const NodeID interval_length_gap = - interval_len - CompressedGraph::kIntervalLengthTreshold; - - const std::size_t left_extreme_gap_len = - varint_encode(left_extreme_gap, _compressed_data); - _compressed_data += left_extreme_gap_len; - IF_DBG _num_adjacent_node_bytes += left_extreme_gap_len; - - const std::size_t interval_length_gap_len = - varint_encode(interval_length_gap, _compressed_data); - _compressed_data += interval_length_gap_len; - IF_DBG _num_adjacent_node_bytes += interval_length_gap_len; - - for (NodeID j = 0; j < interval_len; ++j) { - const NodeID k = i + 1 + j - interval_len; - - // Set the adjacent node to a special value, which indicates for the gap encoder - // that the node has been encoded through an interval. - set_adjacent_node(k, std::numeric_limits::max()); - - if constexpr (kHasEdgeWeights) { - if (_has_edge_weights) { - const EdgeWeight edge_weight = neighbourhood[k].second; - const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; - - const std::size_t edge_weight_gap_len = - signed_varint_encode(edge_weight_gap, _compressed_data); - _compressed_data += edge_weight_gap_len; - IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; - - prev_edge_weight = edge_weight; - _total_edge_weight += edge_weight; - } - } - } - - previous_right_extreme = adjacent_node; - - local_degree -= interval_len; - interval_count += 1; - } - - interval_len = 1; - } - } - - prev_adjacent_node = adjacent_node; - } - } - - // If intervals have been encoded store the interval count and set the bit in the marked byte - // indicating that interval encoding has been used for the neighbourhood if the marked byte is - // given. Otherwise, fix the amount of bytes stored as we don't store the interval count if no - // intervals have been encoded. - if (marked_byte == nullptr) { - *((NodeID *)interval_count_ptr) = interval_count; - _num_adjacent_node_bytes += sizeof(NodeID); - } else if (interval_count > 0) { - *((NodeID *)interval_count_ptr) = interval_count; - *marked_byte |= 0b01000000; - _num_adjacent_node_bytes += sizeof(NodeID); - } else { - _compressed_data -= sizeof(NodeID); - } - - if (interval_count > 0) { - _num_interval_nodes += 1; - _num_intervals += interval_count; - } - - // If all incident edges have been compressed using intervals then gap encoding cannot be - // applied. - if (local_degree == 0) { - return; - } - } - - // Store the remaining adjacent nodes using gap encoding. That is instead of directly storing - // the nodes v_1, v_2, ..., v_{k - 1}, v_k, store the gaps v_1 - u, v_2 - v_1 - 1, ..., v_k - - // v_{k - 1} - 1 between the nodes, where u is the source node. Note that all gaps except the - // first one have to be positive as we sorted the nodes in ascending order. Thus, only for the - // first gap the sign is additionally stored. - NodeID i = 0; - - // Go to the first adjacent node that has not been encoded through an interval. - if constexpr (CompressedGraph::kIntervalEncoding) { - while (fetch_adjacent_node(i) == std::numeric_limits::max()) { - i += 1; - } - } - - const NodeID first_adjacent_node = fetch_adjacent_node(i); - const SignedID first_gap = first_adjacent_node - static_cast(node); - - const std::size_t first_gap_len = signed_varint_encode(first_gap, _compressed_data); - _compressed_data += first_gap_len; - IF_DBG _num_adjacent_node_bytes += first_gap_len; - - if constexpr (kHasEdgeWeights) { - if (_has_edge_weights) { - const EdgeWeight first_edge_weight = neighbourhood[i].second; - const EdgeWeight first_edge_weight_gap = first_edge_weight - prev_edge_weight; - - const std::size_t first_edge_weight_gap_len = - signed_varint_encode(first_edge_weight_gap, _compressed_data); - _compressed_data += first_edge_weight_gap_len; - IF_DBG _num_edge_weights_bytes += first_edge_weight_gap_len; - - prev_edge_weight = first_edge_weight; - _total_edge_weight += first_edge_weight; - } - } - - i += 1; - - VarIntRunLengthEncoder rl_encoder(_compressed_data); - VarIntStreamEncoder sv_encoder(_compressed_data, local_degree - 1); - - NodeID prev_adjacent_node = first_adjacent_node; - while (i < neighbourhood.size()) { - const NodeID adjacent_node = fetch_adjacent_node(i); - - // Skip the adjacent node since it has been encoded through an interval. - if constexpr (CompressedGraph::kIntervalEncoding) { - if (adjacent_node == std::numeric_limits::max()) { - i += 1; - continue; - } - } - - const NodeID gap = adjacent_node - prev_adjacent_node - 1; - if constexpr (CompressedGraph::kRunLengthEncoding) { - const std::size_t gap_len = rl_encoder.add(gap); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - } else if constexpr (CompressedGraph::kStreamEncoding) { - const std::size_t gap_len = sv_encoder.add(gap); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - } else { - const std::size_t gap_len = varint_encode(gap, _compressed_data); - _compressed_data += gap_len; - IF_DBG _num_adjacent_node_bytes += gap_len; - } - - if constexpr (kHasEdgeWeights) { - if (_has_edge_weights) { - const EdgeWeight edge_weight = neighbourhood[i].second; - const EdgeWeight edge_weight_gap = edge_weight - prev_edge_weight; - - const std::size_t edge_weight_gap_len = - signed_varint_encode(edge_weight_gap, _compressed_data); - _compressed_data += edge_weight_gap_len; - IF_DBG _num_edge_weights_bytes += edge_weight_gap_len; - - prev_edge_weight = edge_weight; - _total_edge_weight += edge_weight; - } - } - - prev_adjacent_node = adjacent_node; - i += 1; - } - - if constexpr (CompressedGraph::kRunLengthEncoding) { - rl_encoder.flush(); - } else if constexpr (CompressedGraph::kStreamEncoding) { - sv_encoder.flush(); - } - } -}; - -/*! - * A sequential builder that constructs compressed graphs. - */ -class CompressedGraphBuilder { - using NodeID = CompressedGraph::NodeID; - using NodeWeight = CompressedGraph::NodeWeight; - using EdgeID = CompressedGraph::EdgeID; - using EdgeWeight = CompressedGraph::EdgeWeight; - using SignedID = CompressedGraph::SignedID; - -public: - /*! - * Compresses a graph in compressed sparse row format. - * - * @param graph The graph to compress. - * @return The compressed input graph. - */ - static CompressedGraph compress(const CSRGraph &graph); - - /*! - * Constructs a new CompressedGraphBuilder. - * - * @param node_count The number of nodes of the graph to compress. - * @param edge_count The number of edges of the graph to compress. - * @param has_node_weights Whether node weights are stored. - * @param has_edge_weights Whether edge weights are stored. - * @param sorted Whether the nodes to add are stored in degree-bucket order. - */ - CompressedGraphBuilder( - const NodeID node_count, - const EdgeID edge_count, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted - ); - - /*! - * Adds a node to the compressed graph. Note that the neighbourhood vector is modified. - * - * @param node The node to add. - * @param neighbourhood The neighbourhood of the node to add. - */ - void add_node(const NodeID node, std::vector> &neighbourhood); - - /*! - * Adds a node weight to the compressed graph. - * - * @param node The node whose weight to add. - * @param weight The weight to store. - */ - void add_node_weight(const NodeID node, const NodeWeight weight); - - /*! - * Builds the compressed graph. The builder must then be reinitialized in order to compress - * another graph. - * - * @return The compressed graph that has been build. - */ - CompressedGraph build(); - - /*! - * Returns the used memory of the compressed edge array. - * - * @return The used memory of the compressed edge array. - */ - [[nodiscard]] std::size_t currently_used_memory() const; - - /*! - * Returns the total weight of the nodes that have been added. - * - * @return The total weight of the nodes that have been added. - */ - [[nodiscard]] std::int64_t total_node_weight() const; - - /*! - * Returns the total weight of the edges that have been added. - * - * @return The total weight of the edges that have been added. - */ - [[nodiscard]] std::int64_t total_edge_weight() const; - -private: - CompactStaticArray _nodes; - bool _sorted; // Whether the nodes of the graph are stored in degree-bucket order - - CompressedEdgesBuilder _compressed_edges_builder; - EdgeID _num_edges; - bool _store_edge_weights; - - bool _store_node_weights; - std::int64_t _total_node_weight; - StaticArray _node_weights; -}; - -class ParallelCompressedGraphBuilder { - using NodeID = CompressedGraph::NodeID; - using NodeWeight = CompressedGraph::NodeWeight; - using EdgeID = CompressedGraph::EdgeID; - using EdgeWeight = CompressedGraph::EdgeWeight; - -public: - /*! - * Compresses a graph. - * - * @param num_nodes The number of nodes of the graph to compress. - * @param num_edges The number of edges of the graph to compress. - * @param has_node_weights Whether node weights are stored. - * @param has_edge_weights Whether edge weights are stored. - * @param sorted Whether the nodes are stored in degree-bucket order. - * @param node_mapper Function that maps old node IDs to (possibly) new ones. - * @param degrees Function that returns the degree of a (remapped) node. - * @param nodes Function that returns the first edge of a node. - * @param edges Function that returns the (remapped) adjacent node of an edge. - * @param node_weights Function that returns the weight of a node. - * @param edge_weights Function that returns the weight of an edge. - * @return The compressed graph. - */ - template < - typename PermutationMapper, - typename DegreeMapper, - typename NodeMapper, - typename EdgeMapper, - typename NodeWeightMapper, - typename EdgeWeightMapper> - [[nodiscard]] static CompressedGraph compress( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted, - PermutationMapper &&node_mapper, - DegreeMapper &°rees, - NodeMapper &&nodes, - EdgeMapper &&edges, - NodeWeightMapper &&node_weights, - EdgeWeightMapper &&edge_weights - ); - - /*! - * Compresses a graph stored in compressed sparse row format. - * - * @param graph The graph to compress. - * @return The compressed graph. - */ - [[nodiscard]] static CompressedGraph compress(const CSRGraph &graph); - - /*! - * Initializes the builder by allocating memory for the various arrays. - * - * @param num_nodes The number of nodes of the graph to compress. - * @param num_edges The number of edges of the graph to compress. - * @param has_node_weights Whether node weights are stored. - * @param has_edge_weights Whether edge weights are stored. - * @param sorted Whether the nodes to add are stored in degree-bucket order. - */ - ParallelCompressedGraphBuilder( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted - ); - - /*! - * Adds a node to the compressed graph. - * - * @param node The node to add. - * @param offset The offset into the compressed edge array at which the compressed neighborhood - * of the node is stored. - */ - void add_node(const NodeID node, const EdgeID offset); - - /** - * Adds compressed neighborhoods of possible multiple consecutive nodes to the compressed graph. - * - * @param offset The offset into the compressed edge array at which the compressed neighborhoods - * are stored. - * @param length The length in bytes of the compressed neighborhoods to store. - * @param data A pointer to the start of the compressed neighborhoods to copy. - */ - void add_compressed_edges(const EdgeID offset, const EdgeID length, const std::uint8_t *data); - - /*! - * Adds a node weight to the compressed graph. - * - * @param node The node whose weight to add. - * @param weight The weight to store. - */ - void add_node_weight(const NodeID node, const NodeWeight weight); - - /*! - * Adds (cummulative) statistics about nodes of the compressed graph. - */ - void record_local_statistics( - NodeID max_degree, - NodeWeight node_weight, - EdgeWeight edge_weight, - std::size_t num_high_degree_nodes, - std::size_t num_high_degree_parts, - std::size_t num_interval_nodes, - std::size_t num_intervals - ); - - /*! - * Finalizes the compressed graph. Note that all nodes, compressed neighborhoods, node weights - * and edge weights have to be added at this point. - * - * @return The resulting compressed graph. - */ - [[nodiscard]] CompressedGraph build(); - -private: - // The arrays that store information about the compressed graph - CompactStaticArray _nodes; - bool _sorted; // Whether the nodes of the graph are stored in degree-bucket order - - heap_profiler::unique_ptr _compressed_edges; - EdgeID _compressed_edges_size; - EdgeID _num_edges; - bool _has_edge_weights; - - StaticArray _node_weights; - - NodeID _max_degree; - NodeWeight _total_node_weight; - EdgeWeight _total_edge_weight; - - // Statistics about graph compression - std::size_t _num_high_degree_nodes; - std::size_t _num_high_degree_parts; - std::size_t _num_interval_nodes; - std::size_t _num_intervals; -}; - -namespace debug { - -using Duration = std::chrono::high_resolution_clock::duration; - -struct Stats { - Duration compression_time{0}; - Duration sync_time{0}; - Duration copy_time{0}; - - std::size_t num_chunks{0}; - std::size_t num_edges{0}; -}; - -template decltype(auto) scoped_time(auto &elapsed, Lambda &&l) { - constexpr bool kNonReturning = std::is_void_v>; - - if constexpr (kDebug) { - if constexpr (kNonReturning) { - auto start = std::chrono::high_resolution_clock::now(); - l(); - auto end = std::chrono::high_resolution_clock::now(); - elapsed += end - start; - } else { - auto start = std::chrono::high_resolution_clock::now(); - decltype(auto) val = l(); - auto end = std::chrono::high_resolution_clock::now(); - elapsed += end - start; - return val; - } - } else { - return l(); - } -} - -void print_graph_compression_stats(const auto &stats_ets) { - DBG << "Chunk distribution:"; - - std::size_t cur_thread = 0; - for (const auto &stats : stats_ets) { - DBG << " t" << ++cur_thread << ": " << stats.num_chunks; - } - - DBG << "Edge distribution:"; - - cur_thread = 0; - for (const auto &stats : stats_ets) { - DBG << " t" << ++cur_thread << ": " << stats.num_edges; - } - - DBG << "Time distribution: (compression, sync, copy) [s]"; - - const auto to_sec = [&](auto elapsed) { - return std::chrono::duration_cast(elapsed).count() / 1000.0; - }; - - Duration total_time_compression(0); - Duration total_time_sync(0); - Duration total_time_copy(0); - - cur_thread = 0; - for (const auto &stats : stats_ets) { - total_time_compression += stats.compression_time; - total_time_sync += stats.sync_time; - total_time_copy += stats.copy_time; - - DBG << " t" << ++cur_thread << ": " << to_sec(stats.compression_time) << ' ' - << to_sec(stats.sync_time) << ' ' << to_sec(stats.copy_time); - } - - DBG << " sum: " << to_sec(total_time_compression) << ' ' << to_sec(total_time_sync) << ' ' - << to_sec(total_time_copy); -} - -void print_compressed_graph_stats(const auto &stats_ets) { - std::size_t _total_adjacent_nodes_num_bytes = 0; - std::size_t _total_edge_weights_num_bytes = 0; - - for (const auto &neighbourhood_builder : stats_ets) { - _total_adjacent_nodes_num_bytes += neighbourhood_builder.num_adjacent_node_bytes(); - _total_edge_weights_num_bytes += neighbourhood_builder.num_edge_weights_bytes(); - } - - const auto to_mb = [](const auto num_bytes) { - return num_bytes / static_cast(1024 * 1024); - }; - - DBG << "Compressed adjacent nodes memory space: " << to_mb(_total_adjacent_nodes_num_bytes) - << " MiB"; - DBG << "Compressed edge weights memory space: " << to_mb(_total_edge_weights_num_bytes) << " MiB"; -} - -} // namespace debug - -namespace { - -template < - bool kHasEdgeWeights, - typename PermutationMapper, - typename DegreeMapper, - typename NodeMapper, - typename EdgeMapper, - typename NodeWeightMapper, - typename EdgeWeightMapper> -CompressedGraph compute_compressed_graph( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool sorted, - PermutationMapper &&node_mapper, - DegreeMapper &°rees, - NodeMapper &&nodes, - EdgeMapper &&edges, - NodeWeightMapper &&node_weights, - EdgeWeightMapper &&edge_weights -) { - // To compress the graph in parallel the nodes are split into chunks. Each parallel task fetches - // a chunk and compresses the neighbourhoods of the corresponding nodes. The compressed - // neighborhoods are meanwhile stored in a buffer. They are moved into the compressed edge array - // when the (total) length of the compressed neighborhoods of the previous chunks is determined. - - // First step: create the chunks so that each chunk has about the same number of edges. - constexpr std::size_t kNumChunks = 5000; - const EdgeID max_chunk_order = num_edges / kNumChunks; - std::vector> chunks; - - NodeID max_degree = 0; - NodeID max_chunk_size = 0; - TIMED_SCOPE("Compute chunks") { - NodeID cur_chunk_start = 0; - EdgeID cur_chunk_size = 0; - EdgeID cur_first_edge = 0; - for (NodeID i = 0; i < num_nodes; ++i) { - const NodeID node = node_mapper(i); - const NodeID degree = degrees(node); - - max_degree = std::max(max_degree, degree); - cur_chunk_size += degree; - - if (cur_chunk_size >= max_chunk_order) { - // If there is a node whose neighborhood is larger than the chunk size limit, create a chunk - // consisting only of this node. - const bool singleton_chunk = cur_chunk_start == i; - if (singleton_chunk) { - chunks.emplace_back(cur_chunk_start, i + 1, cur_first_edge); - max_chunk_size = std::max(max_chunk_size, 1); - - cur_chunk_start = i + 1; - cur_first_edge += degree; - cur_chunk_size = 0; - continue; - } - - chunks.emplace_back(cur_chunk_start, i, cur_first_edge); - max_chunk_size = std::max(max_chunk_size, i - cur_chunk_start); - - cur_chunk_start = i; - cur_first_edge += cur_chunk_size - degree; - cur_chunk_size = degree; - } - } - - // If the last chunk is smaller than the chunk size limit, add it explicitly. - if (cur_chunk_start != num_nodes) { - chunks.emplace_back(cur_chunk_start, num_nodes, cur_first_edge); - max_chunk_size = std::max(max_chunk_size, num_nodes - cur_chunk_start); - } - }; - - // Second step: Initializes the data structures used to build the compressed graph in parallel. - ParallelCompressedGraphBuilder builder( - num_nodes, num_edges, has_node_weights, kHasEdgeWeights, sorted - ); - - tbb::enumerable_thread_specific> offsets_ets([&] { - return MaxSizeVector(max_chunk_size); - }); - - using Neighbourhood = std::conditional_t< - kHasEdgeWeights, - MaxSizeVector>, - MaxSizeVector>; - tbb::enumerable_thread_specific neighbourhood_ets([&] { - const std::size_t max_capacity = std::max(max_chunk_order, max_degree); - return Neighbourhood(max_capacity); - }); - - tbb::enumerable_thread_specific neighbourhood_builder_ets([&] { - return CompressedEdgesBuilder(num_nodes, num_edges, max_degree, kHasEdgeWeights); - }); - - const std::size_t num_threads = tbb::this_task_arena::max_concurrency(); - ConcurrentCircularVectorMutex buffer(num_threads); - - // Third step: Compress the chunks in parallel. - tbb::enumerable_thread_specific dbg_ets; - tbb::parallel_for(0, chunks.size(), [&](const auto) { - auto &dbg = dbg_ets.local(); - IF_DBG dbg.num_chunks++; - - auto &offsets = offsets_ets.local(); - auto &neighbourhood = neighbourhood_ets.local(); - auto &neighbourhood_builder = neighbourhood_builder_ets.local(); - - const NodeID chunk = buffer.next(); - const auto [start, end, first_edge] = chunks[chunk]; - - NodeWeight local_node_weight = 0; - neighbourhood_builder.init(first_edge); - - // Compress the neighborhoods of the nodes in the fetched chunk. - debug::scoped_time(dbg.compression_time, [&] { - for (NodeID i = start; i < end; ++i) { - const NodeID node = node_mapper(i); - const NodeID degree = degrees(node); - IF_DBG dbg.num_edges += degree; - - EdgeID edge = nodes(node); - for (NodeID j = 0; j < degree; ++j) { - const NodeID adjacent_node = edges(edge); - - if constexpr (kHasEdgeWeights) { - const EdgeWeight edge_weight = edge_weights(edge); - neighbourhood.emplace_back(adjacent_node, edge_weight); - } else { - neighbourhood.push_back(adjacent_node); - } - - edge += 1; - } - - const EdgeID local_offset = neighbourhood_builder.add(i, neighbourhood); - offsets.push_back(local_offset); - - neighbourhood.clear(); - } - }); - - // Wait for the parallel tasks that process the previous chunks to finish. - const EdgeID offset = debug::scoped_time(dbg.sync_time, [&] { - const EdgeID compressed_neighborhoods_size = neighbourhood_builder.size(); - return buffer.fetch_and_update(chunk, compressed_neighborhoods_size); - }); - - // Store the edge offset and node weight for each node in the chunk and copy the compressed - // neighborhoods into the actual compressed edge array. - debug::scoped_time(dbg.copy_time, [&] { - for (NodeID i = start; i < end; ++i) { - const EdgeID local_offset = offsets[i - start]; - - builder.add_node(i, offset + local_offset); - - if (has_node_weights) [[unlikely]] { - const NodeID node = node_mapper(i); - const NodeWeight node_weight = node_weights(node); - local_node_weight += node_weight; - - builder.add_node_weight(i, node_weight); - } - } - offsets.clear(); - - builder.add_compressed_edges( - offset, neighbourhood_builder.size(), neighbourhood_builder.compressed_data() - ); - - builder.record_local_statistics( - neighbourhood_builder.max_degree(), - local_node_weight, - neighbourhood_builder.total_edge_weight(), - neighbourhood_builder.num_high_degree_nodes(), - neighbourhood_builder.num_high_degree_parts(), - neighbourhood_builder.num_interval_nodes(), - neighbourhood_builder.num_intervals() - ); - }); - }); - - IF_DBG debug::print_graph_compression_stats(dbg_ets); - IF_DBG debug::print_compressed_graph_stats(neighbourhood_builder_ets); - - return builder.build(); -} - -} // namespace - -template < - typename PermutationMapper, - typename DegreeMapper, - typename NodeMapper, - typename EdgeMapper, - typename NodeWeightMapper, - typename EdgeWeightMapper> -CompressedGraph ParallelCompressedGraphBuilder::compress( - const NodeID num_nodes, - const EdgeID num_edges, - const bool has_node_weights, - const bool has_edge_weights, - const bool sorted, - PermutationMapper &&node_mapper, - DegreeMapper &°rees, - NodeMapper &&nodes, - EdgeMapper &&edges, - NodeWeightMapper &&node_weights, - EdgeWeightMapper &&edge_weights -) { - // To reduce memory usage, we distinguish between graphs with and without edge weights and only - // store edge weights during compression if they are present. - if (has_edge_weights) { - constexpr bool kHasEdgeWeights = true; - return compute_compressed_graph( - num_nodes, - num_edges, - has_node_weights, - sorted, - std::forward(node_mapper), - std::forward(degrees), - std::forward(nodes), - std::forward(edges), - std::forward(node_weights), - std::forward(edge_weights) - ); - } else { - constexpr bool kHasEdgeWeights = false; - return compute_compressed_graph( - num_nodes, - num_edges, - has_node_weights, - sorted, - std::forward(node_mapper), - std::forward(degrees), - std::forward(nodes), - std::forward(edges), - std::forward(node_weights), - std::forward(edge_weights) - ); - } -} - -} // namespace kaminpar::shm diff --git a/kaminpar-shm/datastructures/csr_graph.cc b/kaminpar-shm/datastructures/csr_graph.cc index eeeeec5c..cb80cd19 100644 --- a/kaminpar-shm/datastructures/csr_graph.cc +++ b/kaminpar-shm/datastructures/csr_graph.cc @@ -7,13 +7,19 @@ ******************************************************************************/ #include "kaminpar-shm/datastructures/csr_graph.h" +#include + +#include +#include + #include "kaminpar-shm/datastructures/graph.h" #include "kaminpar-common/logger.h" +#include "kaminpar-common/parallel/algorithm.h" namespace kaminpar::shm { -template