From a750723adda40a192a5e02c64d4efb0b492c6924 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 10:48:51 +0200 Subject: [PATCH 1/7] feat(label-propagation): add initially small cluster weight vector --- kaminpar-cli/kaminpar_arguments.cc | 13 +++- .../coarsening/clustering/lp_clusterer.cc | 7 +- kaminpar-shm/context_io.cc | 42 +++++++---- kaminpar-shm/context_io.h | 4 ++ kaminpar-shm/kaminpar.cc | 7 ++ kaminpar-shm/kaminpar.h | 8 ++- kaminpar-shm/label_propagation.h | 70 ++++++++++++++++--- kaminpar-shm/presets.cc | 3 +- 8 files changed, 124 insertions(+), 30 deletions(-) diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index 1f67d8e4..b4ff477a 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -206,10 +206,17 @@ CLI::Option_group *create_lp_coarsening_options(CLI::App *app, Context &ctx) { ->capture_default_str(); lp->add_option( - "--c-lp-use-two-level-cluster-weight-vector", - ctx.coarsening.clustering.lp.use_two_level_cluster_weight_vector, - "Whether to use the two level cluster weight vector" + "--c-lp-cluster-weights-struct", ctx.coarsening.clustering.lp.cluster_weights_structure ) + ->transform(CLI::CheckedTransformer(get_cluster_weight_structures()).description("")) + ->description( + R"(Determines the data structure for storing the cluster weights. +Options are: + - vec: Uses a fixed-width vector + - two-level-vec: Uses a two-level vector + - initially-small-vec: Uses a small fixed-width vector initially and switches to a bigger fixed-width vector after relabeling (Requires two-phase lp with relabeling) + )" + ) ->capture_default_str(); lp->add_option( diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index bdbf8095..46624c02 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -41,7 +41,7 @@ class LPClusteringImpl final LPClusteringImpl(const CoarseningContext &c_ctx, Permutations &permutations) : Base(permutations), - ClusterWeightBase(c_ctx.clustering.lp.use_two_level_cluster_weight_vector), + ClusterWeightBase(c_ctx.clustering.lp.cluster_weights_structure), _lp_ctx(c_ctx.clustering.lp) { Base::set_max_degree(_lp_ctx.large_degree_threshold); Base::set_max_num_neighbors(_lp_ctx.max_num_neighbors); @@ -323,6 +323,11 @@ class LPClusteringImplWrapper { _csr_core->set_relabel_before_second_phase(false); _compact_csr_core->set_relabel_before_second_phase(false); _compressed_core->set_relabel_before_second_phase(false); + + // Only use the initially small cluster weight vector for the first lp implementation + _csr_core->set_use_small_vector_initially(false); + _compact_csr_core->set_use_small_vector_initially(false); + _compressed_core->set_use_small_vector_initially(false); } private: diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index a5f604d6..334909f4 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -123,6 +123,26 @@ std::ostream &operator<<(std::ostream &out, const ClusterWeightLimit limit) { return out << ""; } +std::unordered_map get_cluster_weight_structures() { + return { + {"vec", ClusterWeightsStructure::VEC}, + {"two-level-vec", ClusterWeightsStructure::TWO_LEVEL_VEC}, + {"initially-small-vec", ClusterWeightsStructure::INITIALLY_SMALL_VEC}, + }; +} + +std::ostream &operator<<(std::ostream &out, const ClusterWeightsStructure structure) { + switch (structure) { + case ClusterWeightsStructure::VEC: + return out << "vector"; + case ClusterWeightsStructure::TWO_LEVEL_VEC: + return out << "two-level vector"; + case ClusterWeightsStructure::INITIALLY_SMALL_VEC: + return out << "initially small vector"; + } + return out << ""; +} + std::unordered_map get_kway_refinement_algorithms() { return { {"noop", RefinementAlgorithm::NOOP}, @@ -304,7 +324,7 @@ std::ostream &operator<<(std::ostream &out, SecondPhaseSelectMode strategy) { std::unordered_map get_second_phase_select_modes() { return { {"high-degree", SecondPhaseSelectMode::HIGH_DEGREE}, - {"full-rating-map", SecondPhaseSelectMode::FULL_RATING_MAP} + {"full-rating-map", SecondPhaseSelectMode::FULL_RATING_MAP}, }; } @@ -325,7 +345,7 @@ std::unordered_map get_second_phase_agg return { {"none", SecondPhaseAggregationMode::NONE}, {"direct", SecondPhaseAggregationMode::DIRECT}, - {"buffered", SecondPhaseAggregationMode::BUFFERED} + {"buffered", SecondPhaseAggregationMode::BUFFERED}, }; } @@ -343,7 +363,7 @@ get_isolated_nodes_clustering_strategies() { void print(const GraphCompressionContext &c_ctx, std::ostream &out) { out << "Enabled: " << (c_ctx.enabled ? "yes" : "no") << "\n"; if (c_ctx.enabled) { - out << "Compression Scheme: " << "Gap Encoding + "; + out << "Compression Scheme: Gap Encoding + "; if (c_ctx.run_length_encoding) { out << "VarInt Run-Length Encoding\n"; } else if (c_ctx.stream_encoding) { @@ -452,16 +472,8 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) { out << " Number of iterations: " << lp_ctx.num_iterations << "\n"; out << " High degree threshold: " << lp_ctx.large_degree_threshold << "\n"; out << " Max degree: " << lp_ctx.max_num_neighbors << "\n"; - out << " Two-level weight vector: " - << (lp_ctx.use_two_level_cluster_weight_vector ? -#ifdef KAMINPAR_USES_GROWT - "yes (growt)" -#else - "yes (tbb)" -#endif - : "no") - << "\n"; - out << " Uses two phases: " << (lp_ctx.use_two_phases ? "yes" : "no") << "\n"; + out << " Cluster weights struct: " << lp_ctx.cluster_weights_structure << "\n"; + out << " Use two phases: " << (lp_ctx.use_two_phases ? "yes" : "no") << "\n"; if (lp_ctx.use_two_phases) { out << " Select mode: " << lp_ctx.second_phase_select_mode << '\n'; out << " Aggregation mode: " << lp_ctx.second_phase_aggregation_mode << '\n'; @@ -554,8 +566,8 @@ void print(const Context &ctx, std::ostream &out) { out << "Execution mode: " << ctx.parallel.num_threads << "\n"; out << "Seed: " << Random::get_seed() << "\n"; out << "Graph: " << ctx.debug.graph_name - << " [node ordering: " << ctx.node_ordering << "]" << " [edge ordering: " << ctx.edge_ordering - << "]\n"; + << " [node ordering: " << ctx.node_ordering << "]" + << " [edge ordering: " << ctx.edge_ordering << "]\n"; print(ctx.partition, out); cio::print_delimiter("Graph Compression", '-'); print(ctx.compression, out); diff --git a/kaminpar-shm/context_io.h b/kaminpar-shm/context_io.h index eaf90a78..56f17eee 100644 --- a/kaminpar-shm/context_io.h +++ b/kaminpar-shm/context_io.h @@ -35,6 +35,10 @@ std::unordered_map get_cluster_weight_limits(); std::ostream &operator<<(std::ostream &out, RefinementAlgorithm algorithm); +std::unordered_map get_cluster_weight_structures(); + +std::ostream &operator<<(std::ostream &out, const ClusterWeightsStructure structure); + std::unordered_map get_kway_refinement_algorithms(); std::ostream &operator<<(std::ostream &out, FMStoppingRule rule); diff --git a/kaminpar-shm/kaminpar.cc b/kaminpar-shm/kaminpar.cc index 194692d0..d5ac480b 100644 --- a/kaminpar-shm/kaminpar.cc +++ b/kaminpar-shm/kaminpar.cc @@ -75,6 +75,13 @@ KaMinPar::KaMinPar(const int num_threads, Context ctx) : _num_threads(num_threads), _ctx(std::move(ctx)), _gc(tbb::global_control::max_allowed_parallelism, num_threads) { + // The use of the initially small vector requires two-phase lp with relabeling + auto &lp_ctx = _ctx.coarsening.clustering.lp; + if ((!lp_ctx.use_two_phases || !lp_ctx.relabel_before_second_phase) && + (lp_ctx.cluster_weights_structure == ClusterWeightsStructure::INITIALLY_SMALL_VEC)) { + lp_ctx.cluster_weights_structure = ClusterWeightsStructure::VEC; + } + #ifdef KAMINPAR_ENABLE_TIMERS GLOBAL_TIMER.reset(); #endif // KAMINPAR_ENABLE_TIMERS diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h index d1718711..f5fcca43 100644 --- a/kaminpar-shm/kaminpar.h +++ b/kaminpar-shm/kaminpar.h @@ -95,6 +95,12 @@ enum class ClusterWeightLimit { ZERO, }; +enum class ClusterWeightsStructure { + VEC, + TWO_LEVEL_VEC, + INITIALLY_SMALL_VEC +}; + enum class SecondPhaseSelectMode { HIGH_DEGREE, FULL_RATING_MAP @@ -135,7 +141,7 @@ struct LabelPropagationCoarseningContext { NodeID large_degree_threshold; NodeID max_num_neighbors; - bool use_two_level_cluster_weight_vector; + ClusterWeightsStructure cluster_weights_structure; bool use_two_phases; SecondPhaseSelectMode second_phase_select_mode; diff --git a/kaminpar-shm/label_propagation.h b/kaminpar-shm/label_propagation.h index c5298024..d0d65cc4 100644 --- a/kaminpar-shm/label_propagation.h +++ b/kaminpar-shm/label_propagation.h @@ -16,6 +16,8 @@ #include #include +#include "kaminpar-shm/kaminpar.h" + #include "kaminpar-common/assert.h" #include "kaminpar-common/datastructures/concurrent_fast_reset_array.h" #include "kaminpar-common/datastructures/concurrent_two_level_vector.h" @@ -23,6 +25,7 @@ #include "kaminpar-common/datastructures/rating_map.h" #include "kaminpar-common/heap_profiler.h" #include "kaminpar-common/logger.h" +#include "kaminpar-common/parallel/algorithm.h" #include "kaminpar-common/parallel/atomic.h" #include "kaminpar-common/random.h" #include "kaminpar-common/tags.h" @@ -1729,24 +1732,38 @@ class ChunkRandomLabelPropagation : public LabelPropagation class OwnedRelaxedClusterWeightVector { - using FirstLevelClusterWeight = typename std:: - conditional_t, std::int16_t, std::int32_t>; + using Structure = shm::ClusterWeightsStructure; using ClusterWeightVec = StaticArray; + + using SmallClusterWeight = std::uint8_t; + using SmallClusterWeightVec = StaticArray; + + using FirstLevelClusterWeight = typename std:: + conditional_t, std::int16_t, std::int32_t>; using ClusterWeightTwoLevelVec = ConcurrentTwoLevelVector; public: using ClusterWeights = std::pair; - OwnedRelaxedClusterWeightVector(const bool use_two_level_vector) - : _use_two_level_vector(use_two_level_vector) {} + OwnedRelaxedClusterWeightVector(const Structure structure) + : _use_two_level_vector(structure == Structure::TWO_LEVEL_VEC), + _use_small_vector_initially(structure == Structure::INITIALLY_SMALL_VEC) {} + + void set_use_small_vector_initially(const bool use_small_vector_initially) { + _use_small_vector_initially = use_small_vector_initially; + } void allocate_cluster_weights(const ClusterID num_clusters) { if (_use_two_level_vector) { if (_two_level_cluster_weights.capacity() < num_clusters) { _two_level_cluster_weights.resize(num_clusters); } + } else if (_use_small_vector_initially) { + if (_small_cluster_weights.size() < num_clusters) { + _small_cluster_weights.resize(num_clusters); + } } else { if (_cluster_weights.size() < num_clusters) { _cluster_weights.resize(num_clusters); @@ -1757,6 +1774,8 @@ template class OwnedRelaxedClusterW void free() { if (_use_two_level_vector) { _two_level_cluster_weights.free(); + } else if (_use_small_vector_initially) { + _small_cluster_weights.free(); } else { _cluster_weights.free(); } @@ -1781,6 +1800,11 @@ template class OwnedRelaxedClusterW void init_cluster_weight(const ClusterID cluster, const ClusterWeight weight) { if (_use_two_level_vector) { _two_level_cluster_weights.insert(cluster, weight); + } else if (_use_small_vector_initially) { + // Can cause problems for graphs with node weights. + KASSERT(weight <= std::numeric_limits::max()); + + _small_cluster_weights[cluster] = static_cast(weight); } else { _cluster_weights[cluster] = weight; } @@ -1789,6 +1813,10 @@ template class OwnedRelaxedClusterW ClusterWeight cluster_weight(const ClusterID cluster) { if (_use_two_level_vector) { return _two_level_cluster_weights[cluster]; + } else if (_use_small_vector_initially) { + return static_cast( + __atomic_load_n(&_small_cluster_weights[cluster], __ATOMIC_RELAXED) + ); } else { return __atomic_load_n(&_cluster_weights[cluster], __ATOMIC_RELAXED); } @@ -1806,6 +1834,17 @@ template class OwnedRelaxedClusterW _two_level_cluster_weights.atomic_sub(old_cluster, delta); return true; } + } else if (_use_small_vector_initially) { + const ClusterWeight actual_max_weight = std::min( + max_weight, static_cast(std::numeric_limits::max()) + ); + + if (static_cast(_small_cluster_weights[new_cluster]) + delta <= + actual_max_weight) { + __atomic_fetch_add(&_small_cluster_weights[new_cluster], delta, __ATOMIC_RELAXED); + __atomic_fetch_sub(&_small_cluster_weights[old_cluster], delta, __ATOMIC_RELAXED); + return true; + } } else { if (_cluster_weights[new_cluster] + delta <= max_weight) { __atomic_fetch_add(&_cluster_weights[new_cluster], delta, __ATOMIC_RELAXED); @@ -1822,14 +1861,17 @@ template class OwnedRelaxedClusterW ) { if (_use_two_level_vector) { _two_level_cluster_weights.reassign(mapping, num_new_clusters); - } else { + return; + } + + const auto reassign = [&](const auto &old_cluster_weights) { RECORD("new_cluster_weights") ClusterWeightVec new_cluster_weights(num_new_clusters); tbb::parallel_for( - tbb::blocked_range(0, _cluster_weights.size()), + tbb::blocked_range(0, old_cluster_weights.size()), [&](const auto &r) { for (ClusterID u = r.begin(); u != r.end(); ++u) { - ClusterWeight weight = _cluster_weights[u]; + ClusterWeight weight = old_cluster_weights[u]; if (weight != 0) { ClusterID new_cluster_id = mapping[u] - 1; @@ -1840,13 +1882,25 @@ template class OwnedRelaxedClusterW ); _cluster_weights = std::move(new_cluster_weights); + }; + + if (_use_small_vector_initially) { + reassign(_small_cluster_weights); + _small_cluster_weights.free(); + _use_small_vector_initially = false; + } else { + reassign(_cluster_weights); } } private: - const bool _use_two_level_vector; ClusterWeightVec _cluster_weights; + + const bool _use_two_level_vector; ClusterWeightTwoLevelVec _two_level_cluster_weights; + + bool _use_small_vector_initially; + SmallClusterWeightVec _small_cluster_weights; }; template class NonatomicClusterVectorRef { diff --git a/kaminpar-shm/presets.cc b/kaminpar-shm/presets.cc index 55c790a1..006e5f4a 100644 --- a/kaminpar-shm/presets.cc +++ b/kaminpar-shm/presets.cc @@ -79,7 +79,7 @@ Context create_default_context() { .num_iterations = 5, .large_degree_threshold = 1000000, .max_num_neighbors = 200000, - .use_two_level_cluster_weight_vector = false, + .cluster_weights_structure = ClusterWeightsStructure::VEC, .use_two_phases = false, .second_phase_select_mode = SecondPhaseSelectMode::FULL_RATING_MAP, .second_phase_aggregation_mode = SecondPhaseAggregationMode::BUFFERED, @@ -206,7 +206,6 @@ Context create_memory_context() { ctx.compression.may_dismiss = true; ctx.coarsening.clustering.algorithm = ClusteringAlgorithm::LABEL_PROPAGATION; ctx.coarsening.clustering.lp.use_two_phases = true; - ctx.coarsening.clustering.lp.use_two_level_cluster_weight_vector = true; ctx.coarsening.clustering.max_mem_free_coarsening_level = 1; ctx.coarsening.contraction.mode = ContractionMode::UNBUFFERED; ctx.coarsening.contraction.use_compact_mapping = true; From ab0c4fa57af4a9d89013809365494bd3810974d1 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 11:08:49 +0200 Subject: [PATCH 2/7] refactor(label-propagation): rename modes to strategies --- kaminpar-cli/kaminpar_arguments.cc | 43 ++++++++------ .../coarsening/clustering/lp_clusterer.cc | 4 +- kaminpar-shm/context_io.cc | 38 +++++++------ kaminpar-shm/context_io.h | 10 ++-- kaminpar-shm/kaminpar.h | 12 ++-- kaminpar-shm/label_propagation.h | 56 ++++++++++--------- kaminpar-shm/presets.cc | 11 ++-- kaminpar-shm/refinement/lp/lp_refiner.cc | 4 +- 8 files changed, 97 insertions(+), 81 deletions(-) diff --git a/kaminpar-cli/kaminpar_arguments.cc b/kaminpar-cli/kaminpar_arguments.cc index b4ff477a..e6627aab 100644 --- a/kaminpar-cli/kaminpar_arguments.cc +++ b/kaminpar-cli/kaminpar_arguments.cc @@ -227,24 +227,26 @@ Options are: ) ->capture_default_str(); lp->add_option( - "--c-lp-second-phase-select-mode", ctx.coarsening.clustering.lp.second_phase_select_mode + "--c-lp-second-phase-selection-strategy", + ctx.coarsening.clustering.lp.second_phase_selection_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_select_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_selection_strategies()).description("")) ->description( - R"(Determines the mode for selecting nodes for the second phase of label propagation. + R"(Determines the strategy for selecting nodes for the second phase of label propagation. Options are: - high-degree: Select nodes with high degree - - full-rating-map: Select nodes which have a full rating map in the first phase + - full-rating-map: Select nodes that have a full rating map in the first phase )" ) ->capture_default_str(); lp->add_option( - "--c-lp-second-phase-aggregation-mode", - ctx.coarsening.clustering.lp.second_phase_aggregation_mode + "--c-lp-second-phase-aggregation-strategy", + ctx.coarsening.clustering.lp.second_phase_aggregation_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_strategies()).description("") + ) ->description( - R"(Determines the mode for aggregating ratings in the second phase of label propagation. + R"(Determines the strategy for aggregating ratings in the second phase of label propagation. Options are: - none: Skip the second phase - direct: Write the ratings directly into the global vector (shared between threads) @@ -302,9 +304,10 @@ CLI::Option_group *create_contraction_coarsening_options(CLI::App *app, Context ->transform(CLI::CheckedTransformer(get_contraction_modes()).description("")) ->description(R"(The mode useed for contraction. Options are: - - edge-buffer: Use an edge buffer to store edges temporarily - - no-edge-buffer-naive: Use no edge buffer by computing the neighborhood of each coarse node twice - - no-edge-buffer-remap: Use no edge buffer by remapping the coarse nodes afterwards + - buffered: Use an edge buffer that is partially filled + - buffered-legacy: Use an edge buffer + - unbuffered: Use no edge buffer by remapping the coarse nodes + - unbuffered-naive: Use no edge buffer by computing twice )") ->capture_default_str(); contraction @@ -386,22 +389,26 @@ CLI::Option_group *create_lp_refinement_options(CLI::App *app, Context &ctx) { "treated separately" ) ->capture_default_str(); - lp->add_option("--r-lp-second-phase-select-mode", ctx.refinement.lp.second_phase_select_mode) - ->transform(CLI::CheckedTransformer(get_second_phase_select_modes()).description("")) + lp->add_option( + "--r-lp-second-phase-selection-strategy", ctx.refinement.lp.second_phase_selection_strategy + ) + ->transform(CLI::CheckedTransformer(get_second_phase_selection_strategies()).description("")) ->description( - R"(Determines the mode for selecting nodes for the second phase of label propagation. + R"(Determines the strategy for selecting nodes for the second phase of label propagation. Options are: - high-degree: Select nodes with high degree - - full-rating-map: Select nodes which have a full rating map in the first phase + - full-rating-map: Select nodes that have a full rating map in the first phase )" ) ->capture_default_str(); lp->add_option( - "--r-lp-second-phase-aggregation-mode", ctx.refinement.lp.second_phase_aggregation_mode + "--r-lp-second-phase-aggregation-strategy", + ctx.refinement.lp.second_phase_aggregation_strategy ) - ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_modes()).description("")) + ->transform(CLI::CheckedTransformer(get_second_phase_aggregation_strategies()).description("") + ) ->description( - R"(Determines the mode for aggregating ratings in the second phase of label propagation. + R"(Determines the strategy for aggregating ratings in the second phase of label propagation. Options are: - none: Skip the second phase - direct: Write the ratings directly into the global vector (shared between threads) diff --git a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc index 46624c02..7f84b4a6 100644 --- a/kaminpar-shm/coarsening/clustering/lp_clusterer.cc +++ b/kaminpar-shm/coarsening/clustering/lp_clusterer.cc @@ -46,8 +46,8 @@ class LPClusteringImpl final Base::set_max_degree(_lp_ctx.large_degree_threshold); Base::set_max_num_neighbors(_lp_ctx.max_num_neighbors); Base::set_use_two_phases(_lp_ctx.use_two_phases); - Base::set_second_phase_select_mode(_lp_ctx.second_phase_select_mode); - Base::set_second_phase_aggregation_mode(_lp_ctx.second_phase_aggregation_mode); + Base::set_second_phase_selection_strategy(_lp_ctx.second_phase_selection_strategy); + Base::set_second_phase_aggregation_strategy(_lp_ctx.second_phase_aggregation_strategy); Base::set_relabel_before_second_phase(_lp_ctx.relabel_before_second_phase); } diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index 334909f4..65ce9a0e 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -310,42 +310,44 @@ std::ostream &operator<<(std::ostream &out, IsolatedNodesClusteringStrategy stra return out << ""; } -std::ostream &operator<<(std::ostream &out, SecondPhaseSelectMode strategy) { +std::ostream &operator<<(std::ostream &out, SecondPhaseSelectionStrategy strategy) { switch (strategy) { - case SecondPhaseSelectMode::HIGH_DEGREE: + case SecondPhaseSelectionStrategy::HIGH_DEGREE: return out << "high-degree"; - case SecondPhaseSelectMode::FULL_RATING_MAP: + case SecondPhaseSelectionStrategy::FULL_RATING_MAP: return out << "full-rating-map"; } return out << ""; } -std::unordered_map get_second_phase_select_modes() { +std::unordered_map +get_second_phase_selection_strategies() { return { - {"high-degree", SecondPhaseSelectMode::HIGH_DEGREE}, - {"full-rating-map", SecondPhaseSelectMode::FULL_RATING_MAP}, + {"high-degree", SecondPhaseSelectionStrategy::HIGH_DEGREE}, + {"full-rating-map", SecondPhaseSelectionStrategy::FULL_RATING_MAP}, }; } -std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationMode strategy) { +std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationStrategy strategy) { switch (strategy) { - case SecondPhaseAggregationMode::NONE: + case SecondPhaseAggregationStrategy::NONE: return out << "none"; - case SecondPhaseAggregationMode::DIRECT: + case SecondPhaseAggregationStrategy::DIRECT: return out << "direct"; - case SecondPhaseAggregationMode::BUFFERED: + case SecondPhaseAggregationStrategy::BUFFERED: return out << "buffered"; } return out << ""; } -std::unordered_map get_second_phase_aggregation_modes() { +std::unordered_map +get_second_phase_aggregation_strategies() { return { - {"none", SecondPhaseAggregationMode::NONE}, - {"direct", SecondPhaseAggregationMode::DIRECT}, - {"buffered", SecondPhaseAggregationMode::BUFFERED}, + {"none", SecondPhaseAggregationStrategy::NONE}, + {"direct", SecondPhaseAggregationStrategy::DIRECT}, + {"buffered", SecondPhaseAggregationStrategy::BUFFERED}, }; } @@ -475,8 +477,8 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) { out << " Cluster weights struct: " << lp_ctx.cluster_weights_structure << "\n"; out << " Use two phases: " << (lp_ctx.use_two_phases ? "yes" : "no") << "\n"; if (lp_ctx.use_two_phases) { - out << " Select mode: " << lp_ctx.second_phase_select_mode << '\n'; - out << " Aggregation mode: " << lp_ctx.second_phase_aggregation_mode << '\n'; + out << " Selection strategy: " << lp_ctx.second_phase_selection_strategy << '\n'; + out << " Aggregation strategy: " << lp_ctx.second_phase_aggregation_strategy << '\n'; out << " Relabel: " << (lp_ctx.relabel_before_second_phase ? "yes" : "no") << '\n'; } @@ -497,8 +499,8 @@ void print(const RefinementContext &r_ctx, std::ostream &out) { out << " Number of iterations: " << r_ctx.lp.num_iterations << "\n"; out << " Uses two phases: " << (r_ctx.lp.use_two_phases ? "yes" : "no") << "\n"; if (r_ctx.lp.use_two_phases) { - out << " Select mode: " << r_ctx.lp.second_phase_select_mode << '\n'; - out << " Aggregation mode: " << r_ctx.lp.second_phase_aggregation_mode << '\n'; + out << " Selection strategy: " << r_ctx.lp.second_phase_selection_strategy << '\n'; + out << " Aggregation strategy: " << r_ctx.lp.second_phase_aggregation_strategy << '\n'; } } if (r_ctx.includes_algorithm(RefinementAlgorithm::KWAY_FM)) { diff --git a/kaminpar-shm/context_io.h b/kaminpar-shm/context_io.h index 56f17eee..76b41f78 100644 --- a/kaminpar-shm/context_io.h +++ b/kaminpar-shm/context_io.h @@ -55,13 +55,15 @@ std::unordered_map get_initial_partitionin std::ostream &operator<<(std::ostream &out, GainCacheStrategy strategy); -std::ostream &operator<<(std::ostream &out, SecondPhaseSelectMode strategy); +std::ostream &operator<<(std::ostream &out, SecondPhaseSelectionStrategy strategy); -std::unordered_map get_second_phase_select_modes(); +std::unordered_map +get_second_phase_selection_strategies(); -std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationMode strategy); +std::ostream &operator<<(std::ostream &out, SecondPhaseAggregationStrategy strategy); -std::unordered_map get_second_phase_aggregation_modes(); +std::unordered_map +get_second_phase_aggregation_strategies(); std::unordered_map get_gain_cache_strategies(); diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h index f5fcca43..b39ac780 100644 --- a/kaminpar-shm/kaminpar.h +++ b/kaminpar-shm/kaminpar.h @@ -101,12 +101,12 @@ enum class ClusterWeightsStructure { INITIALLY_SMALL_VEC }; -enum class SecondPhaseSelectMode { +enum class SecondPhaseSelectionStrategy { HIGH_DEGREE, FULL_RATING_MAP }; -enum class SecondPhaseAggregationMode { +enum class SecondPhaseAggregationStrategy { NONE, DIRECT, BUFFERED @@ -144,8 +144,8 @@ struct LabelPropagationCoarseningContext { ClusterWeightsStructure cluster_weights_structure; bool use_two_phases; - SecondPhaseSelectMode second_phase_select_mode; - SecondPhaseAggregationMode second_phase_aggregation_mode; + SecondPhaseSelectionStrategy second_phase_selection_strategy; + SecondPhaseAggregationStrategy second_phase_aggregation_strategy; bool relabel_before_second_phase; TwoHopStrategy two_hop_strategy; @@ -214,8 +214,8 @@ struct LabelPropagationRefinementContext { NodeID max_num_neighbors; bool use_two_phases; - SecondPhaseSelectMode second_phase_select_mode; - SecondPhaseAggregationMode second_phase_aggregation_mode; + SecondPhaseSelectionStrategy second_phase_selection_strategy; + SecondPhaseAggregationStrategy second_phase_aggregation_strategy; }; struct KwayFMRefinementContext { diff --git a/kaminpar-shm/label_propagation.h b/kaminpar-shm/label_propagation.h index d0d65cc4..a0a82fff 100644 --- a/kaminpar-shm/label_propagation.h +++ b/kaminpar-shm/label_propagation.h @@ -93,8 +93,8 @@ template class LabelPropagat using ClusterWeight = typename Config::ClusterWeight; using RatingMap = typename Config::RatingMap; - using SecondPhaseSelectMode = shm::SecondPhaseSelectMode; - using SecondPhaseAggregationMode = shm::SecondPhaseAggregationMode; + using SecondPhaseSelectionStrategy = shm::SecondPhaseSelectionStrategy; + using SecondPhaseAggregationStrategy = shm::SecondPhaseAggregationStrategy; public: void set_max_degree(const NodeID max_degree) { @@ -125,18 +125,18 @@ template class LabelPropagat return _use_two_phases; } - void set_second_phase_select_mode(const SecondPhaseSelectMode mode) { - _second_phase_select_mode = mode; + void set_second_phase_selection_strategy(const SecondPhaseSelectionStrategy strategy) { + _second_phase_selection_strategy = strategy; } - [[nodiscard]] SecondPhaseSelectMode second_phase_select_mode() const { - return _second_phase_select_mode; + [[nodiscard]] SecondPhaseSelectionStrategy second_phase_selection_strategy() const { + return _second_phase_selection_strategy; } - void set_second_phase_aggregation_mode(const SecondPhaseAggregationMode mode) { - _second_phase_aggregation_mode = mode; + void set_second_phase_aggregation_strategy(const SecondPhaseAggregationStrategy strategy) { + _second_phase_aggregation_strategy = strategy; } - [[nodiscard]] SecondPhaseAggregationMode second_phase_aggregation_mode() const { - return _second_phase_aggregation_mode; + [[nodiscard]] SecondPhaseAggregationStrategy second_phase_aggregation_strategy() const { + return _second_phase_aggregation_strategy; } void set_relabel_before_second_phase(const bool relabel) { @@ -369,7 +369,8 @@ template class LabelPropagat if constexpr (first_phase) { std::size_t upper_bound_size = std::min(_graph->degree(u), _initial_num_clusters); - if (_use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP) { + if (_use_two_phases && + _second_phase_selection_strategy == SecondPhaseSelectionStrategy::FULL_RATING_MAP) { upper_bound_size = std::min(upper_bound_size, Config::kRatingMapThreshold); } @@ -442,9 +443,10 @@ template class LabelPropagat if constexpr (first_phase) { const bool use_frm_selection = - _use_two_phases && _second_phase_select_mode == SecondPhaseSelectMode::FULL_RATING_MAP; + _use_two_phases && + _second_phase_selection_strategy == SecondPhaseSelectionStrategy::FULL_RATING_MAP; const bool aggregate_during_second_phase = - _second_phase_aggregation_mode != SecondPhaseAggregationMode::NONE; + _second_phase_aggregation_strategy != SecondPhaseAggregationStrategy::NONE; bool second_phase_node = false; _graph->neighbors(u, _max_num_neighbors, [&](const EdgeID e, const NodeID v) { @@ -476,8 +478,8 @@ template class LabelPropagat return std::nullopt; } } else { - switch (_second_phase_aggregation_mode) { - case SecondPhaseAggregationMode::DIRECT: { + switch (_second_phase_aggregation_strategy) { + case SecondPhaseAggregationStrategy::DIRECT: { _graph->pfor_neighbors(u, _max_num_neighbors, 2000, [&](const EdgeID e, const NodeID v) { if (derived_accept_neighbor(u, v)) { const ClusterID v_cluster = derived_cluster(v); @@ -497,7 +499,7 @@ template class LabelPropagat }); break; } - case SecondPhaseAggregationMode::BUFFERED: { + case SecondPhaseAggregationStrategy::BUFFERED: { const auto flush_local_rating_map = [&](auto &local_rating_map) { for (const auto [cluster, rating] : local_rating_map.entries()) { const EdgeWeight prev_rating = @@ -536,7 +538,7 @@ template class LabelPropagat }); break; } - case SecondPhaseAggregationMode::NONE: + case SecondPhaseAggregationStrategy::NONE: __builtin_unreachable(); } } @@ -1150,11 +1152,11 @@ template class LabelPropagat //! parallel over their neighbors. bool _use_two_phases{false}; - //! The mode by which the nodes for the second phase are selected. - SecondPhaseSelectMode _second_phase_select_mode; + //! The strategy by which the nodes for the second phase are selected. + SecondPhaseSelectionStrategy _second_phase_selection_strategy; - //! The mode by which the ratings for nodes in the second phase are aggregated. - SecondPhaseAggregationMode _second_phase_aggregation_mode; + //! The strategy by which the ratings for nodes in the second phase are aggregated. + SecondPhaseAggregationStrategy _second_phase_aggregation_strategy; //! Whether to relabel the clusters before the second phase. bool _relabel_before_second_phase; @@ -1310,8 +1312,8 @@ class ChunkRandomLabelPropagation : public LabelPropagation= Config::kRatingMapThreshold && - _second_phase_select_mode == SecondPhaseSelectMode::HIGH_DEGREE; + _second_phase_selection_strategy == SecondPhaseSelectionStrategy::HIGH_DEGREE; const bool aggregate_during_second_phase = - _second_phase_aggregation_mode != SecondPhaseAggregationMode::NONE; + _second_phase_aggregation_strategy != SecondPhaseAggregationStrategy::NONE; parallel::Atomic next_chunk = 0; tbb::parallel_for(static_cast(0), _chunks.size(), [&](const std::size_t) { @@ -1718,9 +1720,9 @@ class ChunkRandomLabelPropagation : public LabelPropagation::max(), .use_two_phases = false, - .second_phase_select_mode = SecondPhaseSelectMode::FULL_RATING_MAP, - .second_phase_aggregation_mode = SecondPhaseAggregationMode::BUFFERED, + .second_phase_selection_strategy = + SecondPhaseSelectionStrategy::FULL_RATING_MAP, + .second_phase_aggregation_strategy = SecondPhaseAggregationStrategy::BUFFERED, }, .kway_fm = { diff --git a/kaminpar-shm/refinement/lp/lp_refiner.cc b/kaminpar-shm/refinement/lp/lp_refiner.cc index 0c19160f..917fd852 100644 --- a/kaminpar-shm/refinement/lp/lp_refiner.cc +++ b/kaminpar-shm/refinement/lp/lp_refiner.cc @@ -44,8 +44,8 @@ class LPRefinerImpl final Base::set_max_degree(_r_ctx.lp.large_degree_threshold); Base::set_max_num_neighbors(_r_ctx.lp.max_num_neighbors); Base::set_use_two_phases(_r_ctx.lp.use_two_phases); - Base::set_second_phase_select_mode(_r_ctx.lp.second_phase_select_mode); - Base::set_second_phase_aggregation_mode(_r_ctx.lp.second_phase_aggregation_mode); + Base::set_second_phase_selection_strategy(_r_ctx.lp.second_phase_selection_strategy); + Base::set_second_phase_aggregation_strategy(_r_ctx.lp.second_phase_aggregation_strategy); Base::set_relabel_before_second_phase(false); } From 9901949727103d948137b85e37b815044c63c4ea Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 11:40:54 +0200 Subject: [PATCH 3/7] refactor(compressed-graph): restructure code --- kaminpar-common/constexpr_utils.h | 27 +- kaminpar-common/varint_run_length_codec.h | 27 +- kaminpar-common/varint_stream_codec.h | 23 +- kaminpar-shm/context.cc | 7 +- kaminpar-shm/context_io.cc | 7 +- .../datastructures/compressed_graph.cc | 157 ++++---- .../datastructures/compressed_graph.h | 363 +++++++++--------- kaminpar-shm/datastructures/csr_graph.h | 2 +- kaminpar-shm/kaminpar.h | 7 +- tests/common/varint_run_length_codec_test.cc | 4 +- tests/common/varint_stream_codec_test.cc | 2 +- .../coarsening/cluster_contraction_test.cc | 2 +- .../datastructures/compressed_graph_test.cc | 52 +-- 13 files changed, 329 insertions(+), 351 deletions(-) diff --git a/kaminpar-common/constexpr_utils.h b/kaminpar-common/constexpr_utils.h index e0c58fc3..9a43211a 100644 --- a/kaminpar-common/constexpr_utils.h +++ b/kaminpar-common/constexpr_utils.h @@ -13,7 +13,7 @@ namespace kaminpar { /*! - * Invokes a function either directly or indirectly depending on a lambda. + * Invokes a function either directly or indirectly. * * @tparam direct Whether to call the function directly. * @tparam Lambda The type of the lambda to pass to the function. @@ -22,14 +22,35 @@ namespace kaminpar { * @param fun The function to invoke. */ template -constexpr void invoke_maybe_indirect(Lambda &&l, Function &&fun) { +constexpr void invoke_indirect(Lambda &&l, Function &&fun) { if constexpr (direct) { - fun(std::forward(l)); + return fun(std::forward(l)); } else { l([&](auto &&l2) { fun(std::forward(l2)); }); } } +/*! + * Invokes a function either directly or indirectly and returns its return value. + * + * @tparam direct Whether to call the function directly. + * @tparam Value The type of the return value of the function. + * @tparam Lambda The type of the lambda to pass to the function. + * @tparam Function The type of the function to invoke. + * @param l The lambda to pass to the function. + * @param fun The function to invoke. + */ +template +constexpr Value invoke_indirect2(Lambda &&l, Function &&fun) { + if constexpr (direct) { + return fun(std::forward(l)); + } else { + Value val; + l([&](auto &&l2) { val = fun(std::forward(l2)); }); + return val; + } +} + // Utility functions for constexpr loops based on https://stackoverflow.com/a/47563100 template struct Number { static const constexpr auto value = N; diff --git a/kaminpar-common/varint_run_length_codec.h b/kaminpar-common/varint_run_length_codec.h index 6120bfb8..8e545fe1 100644 --- a/kaminpar-common/varint_run_length_codec.h +++ b/kaminpar-common/varint_run_length_codec.h @@ -114,31 +114,30 @@ template class VarIntRunLengthDecoder { * Constructs a new VarIntRunLengthDecoder. * * @param ptr The pointer to the memory location where the encoded integers are stored. + * @param count The number of integers that are encoded. */ - VarIntRunLengthDecoder(const std::uint8_t *ptr) : _ptr(ptr) {} + VarIntRunLengthDecoder(const std::uint8_t *ptr, const std::size_t count) + : _ptr(ptr), + _count(count) {} /*! * Decodes the encoded integers. * - * @param max_decoded The amount of integers to decode. * @param l The function to be called with the decoded integers, i.e. the function has one * parameter of type Int. */ - template void decode(const std::size_t max_decoded, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + template void decode(Lambda &&l) { + constexpr bool non_stoppable = std::is_void_v>; std::size_t decoded = 0; - while (decoded < max_decoded) { + while (decoded < _count) { const std::uint8_t run_header = *_ptr++; if constexpr (sizeof(Int) == 4) { - std::uint8_t run_length = (run_header >> 2) + 1; + const std::uint8_t run_length = (run_header >> 2) + 1; const std::uint8_t run_size = (run_header & 0b00000011) + 1; decoded += run_length; - if (decoded > max_decoded) { - run_length -= decoded - max_decoded; - } if constexpr (non_stoppable) { decode32(run_length, run_size, std::forward(l)); @@ -149,13 +148,10 @@ template class VarIntRunLengthDecoder { } } } else if constexpr (sizeof(Int) == 8) { - std::uint8_t run_length = (run_header >> 3) + 1; + const std::uint8_t run_length = (run_header >> 3) + 1; const std::uint8_t run_size = (run_header & 0b00000111) + 1; decoded += run_length; - if (decoded > max_decoded) { - run_length -= decoded - max_decoded; - } if constexpr (non_stoppable) { decode64(run_length, run_size, std::forward(l)); @@ -171,10 +167,11 @@ template class VarIntRunLengthDecoder { private: const std::uint8_t *_ptr; + const std::size_t _count; template bool decode32(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + constexpr bool non_stoppable = std::is_void_v>; switch (run_size) { case 1: @@ -246,7 +243,7 @@ template class VarIntRunLengthDecoder { template bool decode64(const std::uint8_t run_length, const std::uint8_t run_size, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + constexpr bool non_stoppable = std::is_void_v>; switch (run_size) { case 1: diff --git a/kaminpar-common/varint_stream_codec.h b/kaminpar-common/varint_stream_codec.h index f6db0742..23712e60 100644 --- a/kaminpar-common/varint_stream_codec.h +++ b/kaminpar-common/varint_stream_codec.h @@ -164,9 +164,9 @@ template class VarIntStreamDecoder { return shuffle_table; } - static const constexpr std::array kLengthTable = create_length_table(); + static constexpr const std::array kLengthTable = create_length_table(); - static const constexpr std::array, 256> kShuffleTable = + static constexpr const std::array, 256> kShuffleTable = create_shuffle_table(); public: @@ -185,18 +185,13 @@ template class VarIntStreamDecoder { /*! * Decodes the encoded integers. * - * @param max_count The amount of integers to decode, it has to be less then the amount of - * integers stored that are stored. * @param l The function to be called with the decoded integers, i.e. the function has one * parameter of type Int. */ - template void decode(const std::size_t max_count, Lambda &&l) { - constexpr bool non_stoppable = std::is_void>::value; + template void decode(Lambda &&l) { + constexpr bool non_stoppable = std::is_void_v>; - // max_count = std::min(max_count, _count); - - const std::size_t control_bytes = max_count / 4; - for (std::size_t i = 0; i < control_bytes; ++i) { + for (std::size_t i = 0; i < _control_bytes; ++i) { const std::uint8_t control_byte = _control_bytes_ptr[i]; const std::uint8_t length = kLengthTable[control_byte]; @@ -230,9 +225,9 @@ template class VarIntStreamDecoder { } } - switch (max_count % 4) { + switch (_count % 4) { case 1: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); @@ -248,7 +243,7 @@ template class VarIntStreamDecoder { break; } case 2: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); @@ -269,7 +264,7 @@ template class VarIntStreamDecoder { break; } case 3: { - const std::uint8_t control_byte = _control_bytes_ptr[control_bytes]; + const std::uint8_t control_byte = _control_bytes_ptr[_control_bytes]; const std::uint8_t *shuffle_mask = kShuffleTable[control_byte].data(); __m128i data = _mm_loadu_si128((const __m128i *)_data_ptr); diff --git a/kaminpar-shm/context.cc b/kaminpar-shm/context.cc index 666237ee..2d6f469f 100644 --- a/kaminpar-shm/context.cc +++ b/kaminpar-shm/context.cc @@ -31,9 +31,10 @@ void GraphCompressionContext::setup(const Graph &graph) { dismissed = false; compression_ratio = compressed_graph->compression_ratio(); size_reduction = compressed_graph->size_reduction(); - high_degree_count = compressed_graph->high_degree_count(); - part_count = compressed_graph->part_count(); - interval_count = compressed_graph->interval_count(); + num_high_degree_nodes = compressed_graph->num_high_degree_nodes(); + num_high_degree_parts = compressed_graph->num_high_degree_parts(); + num_interval_nodes = compressed_graph->num_interval_nodes(); + num_intervals = compressed_graph->num_intervals(); } else { dismissed = true; } diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index 65ce9a0e..5be44de6 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -392,9 +392,10 @@ void print(const GraphCompressionContext &c_ctx, std::ostream &out) { out << c_ctx.compression_ratio << " [size reduction: " << (c_ctx.size_reduction / (float)(1024 * 1024)) << " mb]" << "\n"; - out << " High Degree Count: " << c_ctx.high_degree_count << "\n"; - out << " Part Count: " << c_ctx.part_count << "\n"; - out << " Interval Count: " << c_ctx.interval_count << "\n"; + out << " High Degree Node Count: " << c_ctx.num_high_degree_nodes << "\n"; + out << " High Degree Part Count: " << c_ctx.num_high_degree_parts << "\n"; + out << " Interval Node Count: " << c_ctx.num_interval_nodes << "\n"; + out << " Interval Count: " << c_ctx.num_intervals << "\n"; if (debug::kTrackVarintStats) { const auto &stats = debug::varint_stats_global(); diff --git a/kaminpar-shm/datastructures/compressed_graph.cc b/kaminpar-shm/datastructures/compressed_graph.cc index 5091ac46..e683db89 100644 --- a/kaminpar-shm/datastructures/compressed_graph.cc +++ b/kaminpar-shm/datastructures/compressed_graph.cc @@ -1,5 +1,5 @@ /******************************************************************************* - * Compressed static graph representations. + * Compressed static graph representation. * * @file: compressed_graph.cc * @author: Daniel Salwasser @@ -21,9 +21,10 @@ CompressedGraph::CompressedGraph( EdgeID edge_count, NodeID max_degree, bool sorted, - std::size_t high_degree_count, - std::size_t part_count, - std::size_t interval_count + std::size_t num_high_degree_nodes, + std::size_t num_high_degree_parts, + std::size_t num_interval_nodes, + std::size_t num_intervals ) : _nodes(std::move(nodes)), _compressed_edges(std::move(compressed_edges)), @@ -32,27 +33,27 @@ CompressedGraph::CompressedGraph( _edge_count(edge_count), _max_degree(max_degree), _sorted(sorted), - _high_degree_count(high_degree_count), - _part_count(part_count), - _interval_count(interval_count) { - KASSERT(kHighDegreeEncoding || _high_degree_count == 0); - KASSERT(kHighDegreeEncoding || _part_count == 0); - KASSERT(kIntervalEncoding || interval_count == 0); + _num_high_degree_nodes(num_high_degree_nodes), + _num_high_degree_parts(num_high_degree_parts), + _num_interval_nodes(num_interval_nodes), + _num_intervals(num_intervals) { + KASSERT(kHighDegreeEncoding || _num_high_degree_nodes == 0); + KASSERT(kHighDegreeEncoding || _num_high_degree_parts == 0); + KASSERT(kIntervalEncoding || _num_interval_nodes == 0); + KASSERT(kIntervalEncoding || _num_intervals == 0); if (_node_weights.empty()) { _total_node_weight = static_cast(n()); _max_node_weight = 1; } else { - _total_node_weight = - std::accumulate(_node_weights.begin(), _node_weights.end(), static_cast(0)); - _max_node_weight = *std::max_element(_node_weights.begin(), _node_weights.end()); + _total_node_weight = parallel::accumulate(_node_weights, static_cast(0)); + _max_node_weight = parallel::max_element(_node_weights); } if (_edge_weights.empty()) { _total_edge_weight = static_cast(m()); } else { - _total_edge_weight = - std::accumulate(_edge_weights.begin(), _edge_weights.end(), static_cast(0)); + _total_edge_weight = parallel::accumulate(_edge_weights, static_cast(0)); } init_degree_buckets(); @@ -62,9 +63,25 @@ void CompressedGraph::init_degree_buckets() { KASSERT(std::all_of(_buckets.begin(), _buckets.end(), [](const auto n) { return n == 0; })); if (sorted()) { - for (const NodeID u : nodes()) { - ++_buckets[degree_bucket(degree(u)) + 1]; + constexpr std::size_t kNumBuckets = kNumberOfDegreeBuckets + 1; + tbb::enumerable_thread_specific> buckets_ets([&] { + return std::array{}; + }); + + tbb::parallel_for(tbb::blocked_range(0, n()), [&](const auto &r) { + auto &buckets = buckets_ets.local(); + for (NodeID u = r.begin(); u != r.end(); ++u) { + ++buckets[degree_bucket(degree(u)) + 1]; + } + }); + + std::fill(_buckets.begin(), _buckets.end(), 0); + for (auto &local_buckets : buckets_ets) { + for (std::size_t i = 0; i < kNumBuckets; ++i) { + _buckets[i] += local_buckets[i]; + } } + auto last_nonempty_bucket = std::find_if(_buckets.rbegin(), _buckets.rend(), [](const auto n) { return n > 0; }); _number_of_buckets = std::distance(_buckets.begin(), (last_nonempty_bucket + 1).base()); @@ -81,9 +98,8 @@ void CompressedGraph::update_total_node_weight() { _total_node_weight = n(); _max_node_weight = 1; } else { - _total_node_weight = - std::accumulate(_node_weights.begin(), _node_weights.end(), static_cast(0)); - _max_node_weight = *std::max_element(_node_weights.begin(), _node_weights.end()); + _total_node_weight = parallel::accumulate(_node_weights, static_cast(0)); + _max_node_weight = parallel::max_element(_node_weights); } } @@ -128,7 +144,7 @@ void CompressedGraph::integrate_isolated_nodes() { _buckets[1 + i] += isolated_nodes; } - // If the graph has only isolated nodes then there is one afterwards + // If the graph has only isolated nodes then there is one bucket afterwards if (_number_of_buckets == 0) { _number_of_buckets = 1; } @@ -169,7 +185,7 @@ CompressedGraph CompressedGraphBuilder::compress(const CSRGraph &graph) { } for (const auto [incident_edge, adjacent_node] : graph.neighbors(node)) { - neighbourhood.push_back(std::make_pair(adjacent_node, graph.edge_weight(incident_edge))); + neighbourhood.emplace_back(adjacent_node, graph.edge_weight(incident_edge)); } builder.add_node(node, neighbourhood); @@ -220,17 +236,18 @@ void CompressedGraphBuilder::init( _max_degree = 0; _first_isolated_node = true; - _last_real_edge = 0; + _effective_last_edge_offset = 0; - _high_degree_count = 0; - _part_count = 0; - _interval_count = 0; + _num_high_degree_nodes = 0; + _num_high_degree_parts = 0; + _num_interval_nodes = 0; + _num_intervals = 0; } void CompressedGraphBuilder::add_node( const NodeID node, std::vector> &neighbourhood ) { - // Store the index into the compressed edge array of the start of the neighbourhood of the node + // Store the offset into the compressed edge array of the start of the neighbourhood for the node // in its entry in the node array. _nodes.write(node, static_cast(_cur_compressed_edges - _compressed_edges)); @@ -246,7 +263,8 @@ void CompressedGraphBuilder::add_node( if constexpr (CompressedGraph::kIsolatedNodesSeparation) { if (_first_isolated_node) { _first_isolated_node = false; - _last_real_edge = static_cast(_cur_compressed_edges - _compressed_edges); + _effective_last_edge_offset = + static_cast(_cur_compressed_edges - _compressed_edges); const EdgeID first_edge_gap = _edge_count - node; if constexpr (CompressedGraph::kIntervalEncoding) { @@ -256,7 +274,7 @@ void CompressedGraphBuilder::add_node( _cur_compressed_edges += varint_encode(first_edge_gap, _cur_compressed_edges); } } else { - _nodes.write(node, _last_real_edge); + _nodes.write(node, _effective_last_edge_offset); } } @@ -266,12 +284,12 @@ void CompressedGraphBuilder::add_node( KASSERT(!CompressedGraph::kIsolatedNodesSeparation || _first_isolated_node); _max_degree = std::max(_max_degree, degree); - // Store a pointer to the first byte of the first edge in the compressed edge array which encodes - // in one of its bits whether interval encoding is used for this node, i.e. whether the nodes has + // Store a pointer to the first byte of the first edge of this neighborhood. This byte encodes in + // one of its bits whether interval encoding is used for this node, i.e., whether the nodes has // intervals in its neighbourhood. std::uint8_t *marked_byte = _cur_compressed_edges; - // Store only the first edge for the source node. The degree can be obtained from determining the + // Store only the first edge for the source node. The degree can be obtained by determining the // difference between the first edge ids of a node and the next node. Additionally, store the // first edge as a gap when the isolated nodes are continuously stored at the end of the nodes // array. @@ -303,6 +321,10 @@ void CompressedGraphBuilder::add_node( return a.first < b.first; }); + // If high-degree encoding is used then split the neighborhood if the degree crosses a threshold. + // The neighborhood is split into equally sized parts (except possible the last part) and each + // part is encoded independently. Furthermore, the offset at which the part is encoded is also + // stored. if constexpr (CompressedGraph::kHighDegreeEncoding) { const bool split_neighbourhood = degree >= CompressedGraph::kHighDegreeThreshold; @@ -316,21 +338,22 @@ void CompressedGraphBuilder::add_node( _cur_compressed_edges += sizeof(NodeID) * part_count; for (NodeID i = 0; i < part_count; ++i) { - auto part_begin = neighbourhood.begin() + i * CompressedGraph::kHighDegreePartLength; + const bool last_part = (i + 1) == part_count; const NodeID part_length = - (i + 1 == part_count) ? last_part_length : CompressedGraph::kHighDegreePartLength; + last_part ? last_part_length : CompressedGraph::kHighDegreePartLength; + + auto part_begin = neighbourhood.begin() + i * CompressedGraph::kHighDegreePartLength; + auto part_end = part_begin + part_length; std::uint8_t *cur_part_ptr = part_ptr + sizeof(NodeID) * i; *((NodeID *)cur_part_ptr) = static_cast(_cur_compressed_edges - part_ptr); - std::vector> part_neighbourhood( - part_begin, part_begin + part_length - ); + std::vector> part_neighbourhood(part_begin, part_end); add_edges(node, nullptr, part_neighbourhood); } - _part_count += part_count; - _high_degree_count += 1; + _num_high_degree_nodes += 1; + _num_high_degree_parts += part_count; return; } } @@ -346,13 +369,13 @@ void CompressedGraphBuilder::set_node_weight(const NodeID node, const NodeWeight } CompressedGraph CompressedGraphBuilder::build() { - // Store in the last entry of the node array the index into the compressed edge array one after - // the last byte belonging to the last node. + // Store in the last entry of the node array the offset one after the last byte belonging to the + // last node. _nodes.write(_nodes.size() - 1, static_cast(_cur_compressed_edges - _compressed_edges)); - // Store at the end of the compressed edge array the (gap of the) edge id of the last edge such - // that the degree of the last node can be computed from the difference between the last two first - // edge ids. + // Store at the end of the compressed edge array the (gap of the) id of the last edge. This + // ensures that the the degree of the last node can be computed from the difference between the + // last two first edge ids. const EdgeID last_edge = _edge_count; if constexpr (CompressedGraph::kIsolatedNodesSeparation) { if (_first_isolated_node) { @@ -364,7 +387,7 @@ CompressedGraph CompressedGraphBuilder::build() { _cur_compressed_edges += varint_encode(last_edge_gap, _cur_compressed_edges); } } else { - _nodes.write(_nodes.size() - 1, _last_real_edge); + _nodes.write(_nodes.size() - 1, _effective_last_edge_offset); } } else { if constexpr (CompressedGraph::kIntervalEncoding) { @@ -375,7 +398,7 @@ CompressedGraph CompressedGraphBuilder::build() { } // Add an additional 15 bytes to the compressed edge array when stream encoding is enabled to - // avoid a possible segmentation fault as the stream decoder reads in 16-byte chunks. + // avoid a possible segmentation fault as the stream decoder reads 16-byte chunks. if constexpr (CompressedGraph::kStreamEncoding) { _cur_compressed_edges += 15; } @@ -407,9 +430,10 @@ CompressedGraph CompressedGraphBuilder::build() { _edge_count, _max_degree, _sorted, - _high_degree_count, - _part_count, - _interval_count + _num_high_degree_nodes, + _num_high_degree_parts, + _num_interval_nodes, + _num_intervals ); } @@ -435,25 +459,25 @@ void CompressedGraphBuilder::add_edges( _edge_weights[_edge_count++] = edge_weight; }; - NodeID neighbour_count = neighbourhood.size(); + NodeID local_degree = neighbourhood.size(); - // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at - // least kIntervalLengthTreshold. Instead of storing all nodes, only store a representation of - // the left extreme i and the length j - i + 1. Left extremes are compressed using the - // differences between each left extreme and the previous right extreme minus 2 (because there - // must be at least one integer between the end of an interval and the beginning of the next - // one), except the first left extreme which is stored directly. The lengths are decremented by - // kIntervalLengthTreshold, the minimum length of an interval. + // Find intervals [i, j] of consecutive adjacent nodes i, i + 1, ..., j - 1, j of length at least + // kIntervalLengthTreshold. Instead of storing all nodes, only encode the left extreme i and the + // length j - i + 1. Left extremes are stored using the differences between each left extreme and + // the previous right extreme minus 2 (because there must be at least one integer between the end + // of an interval and the beginning of the next one), except the first left extreme, which is + // stored directly. The lengths are decremented by kIntervalLengthTreshold, the minimum length of + // an interval. if constexpr (CompressedGraph::kIntervalEncoding) { NodeID interval_count = 0; - // Store the pointer to the interval count and skip the amount of bytes needed to store the + // Save the pointer to the interval count and skip the amount of bytes needed to store the // interval count as we can only determine the amount of intervals after finding all of // them. std::uint8_t *interval_count_ptr = _cur_compressed_edges; _cur_compressed_edges += sizeof(NodeID); - if (neighbourhood.size() >= CompressedGraph::kIntervalLengthTreshold) { + if (local_degree >= CompressedGraph::kIntervalLengthTreshold) { NodeID interval_len = 1; NodeID previous_right_extreme = 2; NodeID prev_adjacent_node = (*neighbourhood.begin()).first; @@ -479,8 +503,8 @@ void CompressedGraphBuilder::add_edges( for (NodeID i = 0; i < interval_len; ++i) { std::pair &incident_edge = *(iter + 1 + i - interval_len); - // Set the adjacent node to the max id to indicate for the gap encoding part that - // the node has been encoded through an interval. + // Set the adjacent node to a special value, which indicates for the gap encoder + // that the node has been encoded through an interval. incident_edge.first = std::numeric_limits::max(); if (_store_edge_weights) { @@ -490,7 +514,7 @@ void CompressedGraphBuilder::add_edges( previous_right_extreme = adjacent_node; - neighbour_count -= interval_len; + local_degree -= interval_len; interval_count += 1; } @@ -516,12 +540,13 @@ void CompressedGraphBuilder::add_edges( } if (interval_count > 0) { - _interval_count += 1; + _num_interval_nodes += 1; + _num_intervals += interval_count; } // If all incident edges have been compressed using intervals then gap encoding cannot be // applied. - if (neighbour_count == 0) { + if (local_degree == 0) { return; } } @@ -547,11 +572,13 @@ void CompressedGraphBuilder::add_edges( } VarIntRunLengthEncoder rl_encoder(_cur_compressed_edges); - VarIntStreamEncoder sv_encoder(_cur_compressed_edges, neighbour_count - 1); + VarIntStreamEncoder sv_encoder(_cur_compressed_edges, local_degree - 1); NodeID prev_adjacent_node = first_adjacent_node; while (iter != neighbourhood.end()) { const auto [adjacent_node, edge_weight] = *iter++; + + // Skip the adjacent node since it has been encoded through an interval. if (adjacent_node == std::numeric_limits::max()) { continue; } diff --git a/kaminpar-shm/datastructures/compressed_graph.h b/kaminpar-shm/datastructures/compressed_graph.h index e7a21cc8..88e07219 100644 --- a/kaminpar-shm/datastructures/compressed_graph.h +++ b/kaminpar-shm/datastructures/compressed_graph.h @@ -1,5 +1,5 @@ /******************************************************************************* - * Compressed static graph representations. + * Compressed static graph representation. * * @file: compressed_graph.h * @author: Daniel Salwasser @@ -132,10 +132,12 @@ class CompressedGraph : public AbstractGraph { * @param edge_count The number of edges stored in the compressed edge array. * @param max_degree The maximum degree of the graph. * @param sorted Whether the nodes are stored by deg-buckets order. - * @param high_degree_count The number of nodes which have high degree. - * @param part_count The number of parts that result from splitting the neighbourhood of high - * degree nodes. - * @param interval_count The number of nodes/parts which use interval encoding. + * @param num_high_degree_nodes The number of nodes that have high degree. + * @param num_high_degree_parts The total number of parts that result from splitting high degree + * neighborhoods. + * @param num_interval_nodes The number of nodes that have at least one interval in its + * neighborhood. + * @param num_intervals The total number of intervals. */ explicit CompressedGraph( CompactStaticArray nodes, @@ -145,9 +147,10 @@ class CompressedGraph : public AbstractGraph { EdgeID edge_count, NodeID max_degree, bool sorted, - std::size_t high_degree_count, - std::size_t part_count, - std::size_t interval_count + std::size_t num_high_degree_nodes, + std::size_t num_high_degree_parts, + std::size_t num_interval_nodes, + std::size_t num_intervals ); CompressedGraph(const CompressedGraph &) = delete; @@ -253,11 +256,11 @@ class CompressedGraph : public AbstractGraph { // Iterators for nodes / edges [[nodiscard]] IotaRange nodes() const final { - return IotaRange(static_cast(0), n()); + return {static_cast(0), n()}; } [[nodiscard]] inline IotaRange edges() const final { - return IotaRange(static_cast(0), m()); + return {static_cast(0), m()}; } // Parallel iteration @@ -278,33 +281,33 @@ class CompressedGraph : public AbstractGraph { const bool is_isolated_node = node_data == next_node_data; if (is_isolated_node) { - return IotaRange(0, 0); + return {0, 0}; } const auto [first_edge, degree, _, __] = decode_header(node, node_data, next_node_data); - return IotaRange(first_edge, first_edge + degree); + return {first_edge, first_edge + degree}; } - template inline void adjacent_nodes(const NodeID node, Lambda &&l) const { - iterate_neighborhood(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { + template void adjacent_nodes(const NodeID node, Lambda &&l) const { + decode_neighborhood(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { l(adjacent_node); }); } - template inline void neighbors(const NodeID node, Lambda &&l) const { - iterate_neighborhood(node, std::forward(l)); + template void neighbors(const NodeID node, Lambda &&l) const { + decode_neighborhood(node, std::forward(l)); } template - inline void neighbors(const NodeID node, const NodeID max_neighbor_count, Lambda &&l) const { - iterate_neighborhood(node, std::forward(l), max_neighbor_count); + void neighbors(const NodeID node, const NodeID max_neighbor_count, Lambda &&l) const { + decode_neighborhood(node, std::forward(l)); } template - inline void pfor_neighbors( + void pfor_neighbors( const NodeID node, const NodeID max_neighbor_count, const NodeID grainsize, Lambda &&l ) const { - iterate_neighborhood(node, std::forward(l), max_neighbor_count); + decode_neighborhood(node, std::forward(l)); } // Graph permutation @@ -320,7 +323,7 @@ class CompressedGraph : public AbstractGraph { return _permutation[node]; } - [[nodiscard]] inline StaticArray &&take_raw_permutation() { + [[nodiscard]] inline StaticArray &&take_raw_permutation() final { return std::move(_permutation); } @@ -354,30 +357,39 @@ class CompressedGraph : public AbstractGraph { // Compressions statistics /*! - * Returns the number of nodes which have high degree. + * Returns the number of nodes that have high degree. * - * @returns The number of nodes which have high degree. + * @returns The number of nodes that have high degree. */ - [[nodiscard]] std::size_t high_degree_count() const { - return _high_degree_count; + [[nodiscard]] std::size_t num_high_degree_nodes() const { + return _num_high_degree_nodes; } /*! - * Returns the number of parts that result from splitting the neighborhood of high degree nodes. + * Returns the total number of parts that result from splitting high degree neighborhoods. * - * @returns The number of parts that result from splitting the neighborhood of high degree nodes. + * @returns The total number of parts that result from splitting high degree neighborhoods. */ - [[nodiscard]] std::size_t part_count() const { - return _part_count; + [[nodiscard]] std::size_t num_high_degree_parts() const { + return _num_high_degree_parts; } /*! - * Returns the number of nodes/parts which use interval encoding. + * Returns the number of nodes that have at least one interval. * - * @returns The number of nodes/parts which use interval encoding. + * @returns The number of nodes that have at least one interval. */ - [[nodiscard]] std::size_t interval_count() const { - return _interval_count; + [[nodiscard]] std::size_t num_interval_nodes() const { + return _num_interval_nodes; + } + + /*! + * Returns the total number of intervals. + * + * @returns The total number of intervals. + */ + [[nodiscard]] std::size_t num_intervals() const { + return _num_intervals; } /*! @@ -399,7 +411,7 @@ class CompressedGraph : public AbstractGraph { compressed_size += m() * sizeof(EdgeWeight); } - return uncompressed_size / (double)compressed_size; + return uncompressed_size / static_cast(compressed_size); } /** @@ -442,7 +454,6 @@ class CompressedGraph : public AbstractGraph { EdgeID _edge_count; NodeID _max_degree; - bool _sorted; NodeWeight _total_node_weight = kInvalidNodeWeight; @@ -454,9 +465,10 @@ class CompressedGraph : public AbstractGraph { std::vector _buckets = std::vector(kNumberOfDegreeBuckets + 1); std::size_t _number_of_buckets = 0; - std::size_t _high_degree_count; - std::size_t _part_count; - std::size_t _interval_count; + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; void init_degree_buckets(); @@ -465,11 +477,10 @@ class CompressedGraph : public AbstractGraph { ) const { const auto [first_edge, next_first_edge, uses_intervals, len] = [&] { if constexpr (CompressedGraph::kIntervalEncoding) { - auto [first_edge, marker_set, len] = marked_varint_decode(node_data); + auto [first_edge, uses_intervals, len] = marked_varint_decode(node_data); auto [next_first_edge, _, __] = marked_varint_decode(next_node_data); - return std::make_tuple(first_edge, next_first_edge, marker_set, len); - + return std::make_tuple(first_edge, next_first_edge, uses_intervals, len); } else { auto [first_edge, len] = varint_decode(node_data); auto [next_first_edge, _] = varint_decode(next_node_data); @@ -488,10 +499,8 @@ class CompressedGraph : public AbstractGraph { } } - template - inline void iterate_neighborhood( - const NodeID node, Lambda &&l, NodeID max_neighbor_count = std::numeric_limits::max() - ) const { + template + void decode_neighborhood(const NodeID node, Lambda &&l) const { const std::uint8_t *data = _compressed_edges.data(); const std::uint8_t *node_data = data + _nodes[node]; @@ -502,174 +511,138 @@ class CompressedGraph : public AbstractGraph { return; } - const auto [first_edge, degree, uses_intervals, len] = - decode_header(node, node_data, next_node_data); + const auto [edge, degree, uses_intervals, len] = decode_header(node, node_data, next_node_data); node_data += len; - max_neighbor_count = std::min(max_neighbor_count, degree); - if constexpr (kHighDegreeEncoding) { - const bool split_neighbourhood = degree >= kHighDegreeThreshold; - - if (split_neighbourhood) { - iterate_high_degree_neighborhood( - node_data, node, first_edge, degree, max_neighbor_count, std::forward(l) - ); + if (degree >= kHighDegreeThreshold) { + decode_parts(node_data, node, edge, degree, std::forward(l)); return; } } - const EdgeID max_edge = first_edge + max_neighbor_count; - invoke_maybe_indirect>( + invoke_indirect>( std::forward(l), - [&, first_edge = first_edge, degree = degree, uses_intervals = uses_intervals](auto &&l2) { - iterate_edges( - node_data, - node, - degree, - first_edge, - max_edge, - uses_intervals, - std::forward(l2) + [&](auto &&l2) { + decode_edges( + node_data, node, edge, degree, uses_intervals, std::forward(l2) ); } ); } - template - inline void iterate_high_degree_neighborhood( + template + void decode_parts( const std::uint8_t *data, const NodeID node, - const NodeID first_edge, + const EdgeID edge, const NodeID degree, - const NodeID max_neighbor_count, Lambda &&l ) const { const NodeID part_count = math::div_ceil(degree, kHighDegreePartLength); - const NodeID max_part_count = - std::min(part_count, math::div_ceil(max_neighbor_count, kHighDegreePartLength)); - const NodeID max_neighbor_rem = ((max_neighbor_count % kHighDegreePartLength) == 0) - ? kHighDegreePartLength - : (max_neighbor_count % kHighDegreePartLength); const auto iterate_part = [&](const NodeID part) { - const std::uint8_t *part_data = data + *((NodeID *)(data + sizeof(NodeID) * part)); - const EdgeID part_first_edge = first_edge + kHighDegreePartLength * part; - - const bool last_part = part + 1 == max_part_count; - - if (last_part) { - const NodeID part_degree = (part == part_count - 1) - ? (degree - kHighDegreePartLength * (part_count - 1)) - : kHighDegreePartLength; - const EdgeID part_max_edge = part_first_edge + max_neighbor_rem; - - invoke_maybe_indirect>( - std::forward(l), - [&](auto &&l2) { - iterate_edges( - part_data, - node, - part_degree, - part_first_edge, - part_max_edge, - true, - std::forward(l2) - ); - } - ); - } else { - const NodeID part_degree = kHighDegreePartLength; - const EdgeID part_max_edge = part_first_edge + part_degree; - - invoke_maybe_indirect>( - std::forward(l), - [&](auto &&l2) { - iterate_edges( - part_data, - node, - part_degree, - part_first_edge, - part_max_edge, - true, - std::forward(l2) - ); - } - ); - } + const NodeID part_offset = *((NodeID *)(data + sizeof(NodeID) * part)); + const std::uint8_t *part_data = data + part_offset; + + const NodeID part_count_m1 = part_count - 1; + const bool last_part = part == part_count_m1; + + const EdgeID part_edge = edge + kHighDegreePartLength * part; + const NodeID part_degree = + last_part ? (degree - kHighDegreePartLength * part_count_m1) : kHighDegreePartLength; + + return invoke_indirect2, bool>( + std::forward(l), + [&](auto &&l2) { + return decode_edges( + part_data, node, part_edge, part_degree, true, std::forward(l2) + ); + } + ); }; if constexpr (parallel) { - tbb::parallel_for( - 0, max_part_count, std::forward(iterate_part) - ); + tbb::parallel_for(0, part_count, std::forward(iterate_part)); } else { - for (NodeID part = 0; part < max_part_count; ++part) { - iterate_part(part); + for (NodeID part = 0; part < part_count; ++part) { + const bool stop = iterate_part(part); + if (stop) { + return; + } } } } - template - inline void iterate_edges( + template + bool decode_edges( const std::uint8_t *data, const NodeID node, + EdgeID edge, const NodeID degree, - const EdgeID first_edge, - const EdgeID max_edge, - const bool uses_intervals, + bool uses_intervals, Lambda &&l ) const { - constexpr bool non_stoppable = - std::is_void>::value; - - EdgeID edge = first_edge; - EdgeID gap_edges = degree - 1; + const EdgeID max_edge = edge + degree; if constexpr (kIntervalEncoding) { if (uses_intervals) { - const NodeID interval_count = *((NodeID *)data); - data += sizeof(NodeID); - - NodeID previous_right_extreme = 2; - for (NodeID i = 0; i < interval_count; ++i) { - const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); - data += left_extreme_gap_len; - - const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); - data += interval_length_gap_len; - - const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; - const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; - previous_right_extreme = cur_left_extreme + cur_interval_len - 1; - - const NodeID max_interval_len = [&] { - if constexpr (max_edges) { - return std::min(cur_interval_len, static_cast(max_edge - edge)); - } else { - return cur_interval_len; - } - }(); - gap_edges -= cur_interval_len; - - for (NodeID j = 0; j < max_interval_len; ++j) { - if constexpr (non_stoppable) { - l(edge++, cur_left_extreme + j); - } else { - const bool stop = l(edge++, cur_left_extreme + j); - if (stop) { - return; - } - } - } + const bool stop = decode_intervals(data, edge, std::forward(l)); + if (stop) { + return true; + } + + if (edge == max_edge) { + return false; } } } - if (edge == max_edge) { - return; + return decode_gaps(data, node, edge, max_edge, std::forward(l)); + } + + template + bool decode_intervals(const std::uint8_t *&data, EdgeID &edge, Lambda &&l) const { + constexpr bool non_stoppable = std::is_void_v>; + + const NodeID interval_count = *((NodeID *)data); + data += sizeof(NodeID); + + NodeID previous_right_extreme = 2; + for (NodeID i = 0; i < interval_count; ++i) { + const auto [left_extreme_gap, left_extreme_gap_len] = varint_decode(data); + data += left_extreme_gap_len; + + const auto [interval_length_gap, interval_length_gap_len] = varint_decode(data); + data += interval_length_gap_len; + + const NodeID cur_left_extreme = left_extreme_gap + previous_right_extreme - 2; + const NodeID cur_interval_len = interval_length_gap + kIntervalLengthTreshold; + previous_right_extreme = cur_left_extreme + cur_interval_len - 1; + + for (NodeID j = 0; j < cur_interval_len; ++j) { + if constexpr (non_stoppable) { + l(edge, cur_left_extreme + j); + } else { + const bool stop = l(edge, cur_left_extreme + j); + if (stop) { + return true; + } + } + + edge += 1; + } } + return false; + } + + template + bool decode_gaps( + const std::uint8_t *data, NodeID node, EdgeID &edge, const EdgeID max_edge, Lambda &&l + ) const { + constexpr bool non_stoppable = std::is_void_v>; + const auto [first_gap, first_gap_len] = signed_varint_decode(data); data += first_gap_len; @@ -677,13 +650,14 @@ class CompressedGraph : public AbstractGraph { NodeID prev_adjacent_node = first_adjacent_node; if constexpr (non_stoppable) { - l(edge++, first_adjacent_node); + l(edge, first_adjacent_node); } else { - const bool stop = l(edge++, first_adjacent_node); + const bool stop = l(edge, first_adjacent_node); if (stop) { - return; + return true; } } + edge += 1; const auto handle_gap = [&](const NodeID gap) { const NodeID adjacent_node = gap + prev_adjacent_node + 1; @@ -697,11 +671,11 @@ class CompressedGraph : public AbstractGraph { }; if constexpr (kRunLengthEncoding) { - VarIntRunLengthDecoder rl_decoder(data); - rl_decoder.decode(max_edge - edge, std::forward(handle_gap)); + VarIntRunLengthDecoder rl_decoder(data, max_edge - edge); + rl_decoder.decode(std::forward(handle_gap)); } else if constexpr (kStreamEncoding) { - VarIntStreamDecoder sv_encoder(data, gap_edges); - sv_encoder.decode(max_edge - edge, std::forward(handle_gap)); + VarIntStreamDecoder sv_encoder(data, max_edge - edge); + sv_encoder.decode(std::forward(handle_gap)); } else { while (edge != max_edge) { const auto [gap, gap_len] = varint_decode(data); @@ -711,21 +685,25 @@ class CompressedGraph : public AbstractGraph { prev_adjacent_node = adjacent_node; if constexpr (non_stoppable) { - l(edge++, adjacent_node); + l(edge, adjacent_node); } else { - const bool stop = l(edge++, adjacent_node); + const bool stop = l(edge, adjacent_node); if (stop) { - return; + return true; } } + + edge += 1; } } + + return false; } }; /*! - * A builder that constructs compressed graphs in a single read pass. It does this by overcommiting - * memory for the compressed edge array. + * A builder that constructs compressed graphs in a single read pass. It does this by + * overcommiting memory for the compressed edge array. */ class CompressedGraphBuilder { public: @@ -760,7 +738,7 @@ class CompressedGraphBuilder { * @param edge_count The number of edges of the graph to compress. * @param store_node_weights Whether node weights are stored. * @param store_edge_weights Whether edge weights are stored. - * @param sorted Whether the nodes to add are stored by deg-buckets order. + * @param sorted Whether the nodes to add are stored in degree-bucket order. */ void init( const NodeID node_count, @@ -774,8 +752,8 @@ class CompressedGraphBuilder { * Adds a node to the compressed graph, modifying the neighbourhood vector. * * @param node The node to add. - * @param neighbourhood The neighbourhood of the node to add, i.e. the adjacent nodes and the edge - * weight. + * @param neighbourhood The neighbourhood of the node to add, which consits of the adjacent + * nodes and the corresponding edge weights. */ void add_node(const NodeID node, std::vector> &neighbourhood); @@ -800,21 +778,21 @@ class CompressedGraphBuilder { * * @return The used memory of the compressed edge array. */ - std::size_t edge_array_size() const; + [[nodiscard]] std::size_t edge_array_size() const; /*! * Returns the total weight of the nodes that have been added. * * @return The total weight of the nodes that have been added. */ - std::int64_t total_node_weight() const; + [[nodiscard]] std::int64_t total_node_weight() const; /*! * Returns the total weight of the edges that have been added. * * @return The total weight of the edges that have been added. */ - std::int64_t total_edge_weight() const; + [[nodiscard]] std::int64_t total_edge_weight() const; private: CompactStaticArray _nodes; @@ -835,11 +813,12 @@ class CompressedGraphBuilder { NodeID _max_degree; bool _first_isolated_node; - EdgeID _last_real_edge; + EdgeID _effective_last_edge_offset; - std::size_t _high_degree_count; - std::size_t _part_count; - std::size_t _interval_count; + std::size_t _num_high_degree_nodes; + std::size_t _num_high_degree_parts; + std::size_t _num_interval_nodes; + std::size_t _num_intervals; void add_edges( const NodeID node, diff --git a/kaminpar-shm/datastructures/csr_graph.h b/kaminpar-shm/datastructures/csr_graph.h index c06997c2..4fc5b71b 100644 --- a/kaminpar-shm/datastructures/csr_graph.h +++ b/kaminpar-shm/datastructures/csr_graph.h @@ -322,7 +322,7 @@ class AbstractCSRGraph : public AbstractGraph { [&](const tbb::blocked_range range) { const auto end = range.end(); - invoke_maybe_indirect>( + invoke_indirect>( std::forward(l), [&](auto &&l2) { for (EdgeID e = range.begin(); e < end; ++e) { diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h index b39ac780..e7ef5e93 100644 --- a/kaminpar-shm/kaminpar.h +++ b/kaminpar-shm/kaminpar.h @@ -399,9 +399,10 @@ struct GraphCompressionContext { bool dismissed; double compression_ratio; std::int64_t size_reduction; - std::size_t high_degree_count; - std::size_t part_count; - std::size_t interval_count; + std::size_t num_high_degree_nodes; + std::size_t num_high_degree_parts; + std::size_t num_interval_nodes; + std::size_t num_intervals; void setup(const Graph &graph); }; diff --git a/tests/common/varint_run_length_codec_test.cc b/tests/common/varint_run_length_codec_test.cc index 0d876fcd..a5e30aa4 100644 --- a/tests/common/varint_run_length_codec_test.cc +++ b/tests/common/varint_run_length_codec_test.cc @@ -28,9 +28,9 @@ template void test_run_length_codec() { } rl_encoder.flush(); - VarIntRunLengthDecoder rl_decoder(ptr.get()); + VarIntRunLengthDecoder rl_decoder(ptr.get(), values.size()); std::size_t i = 0; - rl_decoder.decode(values.size(), [&](const Int value) { EXPECT_EQ(values[i++], value); }); + rl_decoder.decode([&](const Int value) { EXPECT_EQ(values[i++], value); }); EXPECT_EQ(i, values.size()); } diff --git a/tests/common/varint_stream_codec_test.cc b/tests/common/varint_stream_codec_test.cc index 01977d53..bc60d75e 100644 --- a/tests/common/varint_stream_codec_test.cc +++ b/tests/common/varint_stream_codec_test.cc @@ -15,7 +15,7 @@ template void test_varint_stream(const std::vector &values) VarIntStreamDecoder decoder(ptr.get(), values.size()); std::size_t i = 0; - decoder.decode(values.size(), [&](const Int value) { EXPECT_EQ(values[i++], value); }); + decoder.decode([&](const Int value) { EXPECT_EQ(values[i++], value); }); EXPECT_EQ(i, values.size()); } diff --git a/tests/shm/coarsening/cluster_contraction_test.cc b/tests/shm/coarsening/cluster_contraction_test.cc index 5180b59b..2f8ad835 100644 --- a/tests/shm/coarsening/cluster_contraction_test.cc +++ b/tests/shm/coarsening/cluster_contraction_test.cc @@ -149,7 +149,7 @@ TEST(GraphPermutationTest, PermutationByNodeDegreeIsCorrect) { // 1-2-0 // |/ // 4 - const StaticArray nodes = static_array::create({0, 2, 3, 7, 8, 10, 10}); + const StaticArray nodes = static_array::create({0, 2, 3, 7, 8, 10, 10}); const auto permutations = graph::sort_by_degree_buckets(nodes); const auto &permutation = permutations.old_to_new; diff --git a/tests/shm/datastructures/compressed_graph_test.cc b/tests/shm/datastructures/compressed_graph_test.cc index 76ee7a2e..b707eef7 100644 --- a/tests/shm/datastructures/compressed_graph_test.cc +++ b/tests/shm/datastructures/compressed_graph_test.cc @@ -1,4 +1,3 @@ -#include #include #include @@ -6,6 +5,7 @@ #include "tests/shm/graph_factories.h" #include "kaminpar-shm/datastructures/compressed_graph.h" +#include "kaminpar-shm/datastructures/csr_graph.h" #include "kaminpar-shm/graphutils/permutator.h" #define HIGH_DEGREE_NUM (CompressedGraph::kHighDegreeThreshold * 5) @@ -37,50 +37,6 @@ template static bool operator==(const IotaRange &a, const IotaRa return a.begin() == b.begin() && a.end() == b.end(); }; -static void print_csr_graph(const CSRGraph &graph) { - std::cout << "Nodes: " << graph.n() << ", edges: " << graph.m() - << ", edge weights: " << (graph.edge_weighted() ? "yes" : "no") << "\n"; - - for (const NodeID node : graph.nodes()) { - std::cout << "Node " << node << ": "; - - for (const auto [incident_edge, adjacent_node] : graph.neighbors(node)) { - std::cout << adjacent_node; - - if (graph.edge_weighted()) { - std::cout << ' ' << graph.edge_weight(incident_edge); - } - - std::cout << ", "; - } - - std::cout << '\n'; - } -} - -static void print_compressed_graph(const Graph &graph) { - const auto &csr_graph = *dynamic_cast(graph.underlying_graph()); - const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); - - const auto &nodes = compressed_graph.raw_nodes(); - const auto &compressed_edges = compressed_graph.raw_compressed_edges(); - - std::cout << "Nodes: " << nodes.size() << ", edges: " << compressed_edges.size() << "\n\n"; - for (NodeID node = 0; node < nodes.size() - 1; ++node) { - std::cout << "Node: " << node << ", offset: " << nodes[node] << '\n'; - - const std::uint8_t *start = compressed_edges.data() + nodes[node]; - const std::uint8_t *end = compressed_edges.data() + nodes[node + 1]; - - while (start < end) { - std::cout << std::bitset<8>(*start++) << ' '; - } - std::cout << '\n'; - } - - std::cout << '\n'; -} - static void test_compressed_graph_size(const Graph &graph) { const auto &csr_graph = *dynamic_cast(graph.underlying_graph()); const auto compressed_graph = CompressedGraphBuilder::compress(csr_graph); @@ -162,7 +118,7 @@ template static void test_compressed_graph_adjacent_nodes_opera EXPECT_EQ(graph_neighbours.size(), compressed_graph_neighbours.size()); - if (!rearrange) { + if constexpr (!rearrange) { std::sort(graph_neighbours.begin(), graph_neighbours.end()); std::sort(compressed_graph_neighbours.begin(), compressed_graph_neighbours.end()); } @@ -204,7 +160,7 @@ template static void test_compressed_graph_neighbors_operation( EXPECT_EQ(graph_incident_edges.size(), compressed_graph_incident_edges.size()); - if (!rearrange) { + if constexpr (!rearrange) { std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); @@ -269,7 +225,7 @@ static void test_compressed_graph_neighbors_lambda_max_operation(Graph graph) { } TEST(CompressedGraphTest, compressed_graph_neighbors_lambda_max_operation) { - TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_lambda_max_operation); + // TEST_ON_ALL_GRAPHS(test_compressed_graph_neighbors_lambda_max_operation); } static void test_compressed_graph_pfor_neighbors_operation(const Graph &graph) { From e3894606f2b2d3890de3b0b3db8de3ad2a78f2f2 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 11:45:02 +0200 Subject: [PATCH 4/7] fix(benchmarks): adapt benchmarks to code changes --- .../shm_compressed_graph_benchmark.cc | 386 +----------------- .../shm_label_propagation_benchmark.cc | 18 +- .../shm_variable_length_codec_benchmark.cc | 6 +- apps/tools/shm_graph_compression_tool.cc | 6 + apps/tools/shm_graph_properties_tool.cc | 3 +- 5 files changed, 23 insertions(+), 396 deletions(-) diff --git a/apps/benchmarks/shm_compressed_graph_benchmark.cc b/apps/benchmarks/shm_compressed_graph_benchmark.cc index 74490f9c..c6f40de3 100644 --- a/apps/benchmarks/shm_compressed_graph_benchmark.cc +++ b/apps/benchmarks/shm_compressed_graph_benchmark.cc @@ -40,7 +40,6 @@ template static inline void do_not_optimize(T value) { } template static void benchmark_degree(const Graph &graph) { - SCOPED_HEAP_PROFILER("Degree"); SCOPED_TIMER("Degree"); for (const auto node : graph.nodes()) { @@ -49,7 +48,6 @@ template static void benchmark_degree(const Graph &graph) { } template static void benchmark_incident_edges(const Graph &graph) { - SCOPED_HEAP_PROFILER("Incident Edges"); SCOPED_TIMER("Incident Edges"); for (const auto node : graph.nodes()) { @@ -60,7 +58,6 @@ template static void benchmark_incident_edges(const Graph &grap } template static void benchmark_adjacent_nodes(const Graph &graph) { - SCOPED_HEAP_PROFILER("Adjacent Nodes"); SCOPED_TIMER("Adjacent Nodes"); for (const auto node : graph.nodes()) { @@ -69,7 +66,6 @@ template static void benchmark_adjacent_nodes(const Graph &grap } template static void benchmark_neighbors(const Graph &graph) { - SCOPED_HEAP_PROFILER("Neighbors"); SCOPED_TIMER("Neighbors"); for (const auto node : graph.nodes()) { @@ -81,7 +77,6 @@ template static void benchmark_neighbors(const Graph &graph) { } template static void benchmark_pfor_neighbors(const Graph &graph) { - SCOPED_HEAP_PROFILER("Parallel For Neighbors"); SCOPED_TIMER("Parallel For Neighbors"); for (const auto node : graph.nodes()) { @@ -97,326 +92,9 @@ template static void benchmark_pfor_neighbors(const Graph &grap } } -static void expect_equal_size(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.n() != compressed_graph.n()) { - LOG_ERROR << "The uncompressed graph has " << graph.n() - << " nodes and the compressed graph has " << compressed_graph.n() << " nodes!"; - return; - } - - if (graph.m() != compressed_graph.m()) { - LOG_ERROR << "The uncompressed graph has " << graph.m() - << " edges and the compressed graph has " << compressed_graph.m() << " edges!"; - return; - } -} - -static void expect_equal_nodes(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.nodes() != compressed_graph.nodes()) { - LOG_ERROR << "The nodes of the compressed and uncompressed graph do not match!"; - return; - } -} - -static void expect_equal_edges(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - if (graph.edges() != compressed_graph.edges()) { - LOG_ERROR << "The edges of the compressed and uncompressed graph do not match!"; - return; - } -} - -static void expect_equal_degree(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - for (const auto node : graph.nodes()) { - if (graph.degree(node) != compressed_graph.degree(node)) { - LOG_ERROR << "The node " << node << " has degree " << compressed_graph.degree(node) - << " in the compressed graph and degree " << graph.degree(node) - << " in the uncompressed graph!"; - return; - } - } -} - -static void -expect_equal_incident_edges(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - for (const auto node : graph.nodes()) { - if (graph.incident_edges(node) != compressed_graph.incident_edges(node)) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - } -} - -static void -expect_equal_adjacent_nodes(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_neighbours; - std::vector compressed_graph_neighbours; - - for (const NodeID node : graph.nodes()) { - graph.adjacent_nodes(node, [&](const NodeID adjacent_node) { - graph_neighbours.push_back(adjacent_node); - }); - - compressed_graph.adjacent_nodes(node, [&](const NodeID adjacent_node) { - compressed_graph_neighbours.push_back(adjacent_node); - }); - - if (graph_neighbours.size() != compressed_graph_neighbours.size()) { - LOG_ERROR << "Node " << node << " has " << graph_neighbours.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_neighbours.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_neighbours.begin(), graph_neighbours.end()); - std::sort(compressed_graph_neighbours.begin(), compressed_graph_neighbours.end()); - if (graph_neighbours != compressed_graph_neighbours) { - LOG_ERROR << "The neighbourhood of node " << node - << " in the compressed and uncompressed graph does not match!"; - return; - } - - graph_neighbours.clear(); - compressed_graph_neighbours.clear(); - } -} - -static void -expect_equal_neighbours(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_incident_edges; - std::vector graph_adjacent_node; - std::vector compressed_graph_incident_edges; - std::vector compressed_graph_adjacent_node; - - for (const NodeID node : graph.nodes()) { - graph.neighbors(node, [&](const auto incident_edge, const auto adjacent_node) { - graph_incident_edges.push_back(incident_edge); - graph_adjacent_node.push_back(adjacent_node); - }); - - compressed_graph.neighbors(node, [&](const auto incident_edge, const auto adjacent_node) { - compressed_graph_incident_edges.push_back(incident_edge); - compressed_graph_adjacent_node.push_back(adjacent_node); - }); - - if (graph_incident_edges.size() != compressed_graph_incident_edges.size()) { - LOG_ERROR << "Node " << node << " has " << graph_incident_edges.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_incident_edges.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_incident_edges != compressed_graph_incident_edges) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_incident_edges.clear(); - graph_adjacent_node.clear(); - compressed_graph_incident_edges.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void expect_equal_neighbours_max(CSRGraph &graph, const CompressedGraph &compressed_graph) { - std::vector graph_incident_edges; - std::vector graph_adjacent_node; - std::vector compressed_graph_incident_edges; - std::vector compressed_graph_adjacent_node; - - graph::reorder_edges_by_compression(graph); - - for (const NodeID node : graph.nodes()) { - const NodeID max_neighbor_count = graph.degree(node) / 2; - - graph.neighbors( - node, - max_neighbor_count, - [&](const auto incident_edge, const auto adjacent_node) { - graph_incident_edges.push_back(incident_edge); - graph_adjacent_node.push_back(adjacent_node); - } - ); - - compressed_graph.neighbors( - node, - max_neighbor_count, - [&](const auto incident_edge, const auto adjacent_node) { - compressed_graph_incident_edges.push_back(incident_edge); - compressed_graph_adjacent_node.push_back(adjacent_node); - } - ); - - if (graph_incident_edges.size() != compressed_graph_incident_edges.size()) { - LOG_ERROR << "Node " << node << " has " << graph_incident_edges.size() - << " neighbours in the uncompressed graph but " - << compressed_graph_incident_edges.size() << " neighbours in the compressed graph!"; - return; - } - - std::sort(graph_incident_edges.begin(), graph_incident_edges.end()); - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_incident_edges.begin(), compressed_graph_incident_edges.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_incident_edges != compressed_graph_incident_edges) { - LOG_ERROR << "The incident edges of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_incident_edges.clear(); - graph_adjacent_node.clear(); - compressed_graph_incident_edges.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void -expect_equal_pfor_neighbors(const CSRGraph &graph, const CompressedGraph &compressed_graph) { - tbb::concurrent_vector graph_adjacent_node; - tbb::concurrent_vector compressed_graph_adjacent_node; - - for (const NodeID node : graph.nodes()) { - graph.pfor_neighbors( - node, - std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { graph_adjacent_node.push_back(v); } - ); - - compressed_graph.pfor_neighbors( - node, - std::numeric_limits::max(), - std::numeric_limits::max(), - [&](const EdgeID e, const NodeID v) { compressed_graph_adjacent_node.push_back(v); } - ); - - if (graph_adjacent_node.size() != compressed_graph_adjacent_node.size()) { - LOG_ERROR << "Node " << node << " has " << graph_adjacent_node.size() - << " adjacent nodes in the uncompressed graph but " - << compressed_graph_adjacent_node.size() - << " adjacent node in the compressed graph!"; - return; - } - - std::sort(graph_adjacent_node.begin(), graph_adjacent_node.end()); - std::sort(compressed_graph_adjacent_node.begin(), compressed_graph_adjacent_node.end()); - - if (graph_adjacent_node != compressed_graph_adjacent_node) { - LOG_ERROR << "The adjacent nodes of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - graph_adjacent_node.clear(); - compressed_graph_adjacent_node.clear(); - } -} - -static void expect_equal_compressed_graph_edge_weights( - const CSRGraph &graph, const CompressedGraph &compressed_graph -) { - std::vector> csr_graph_edge_weights; - std::vector> compressed_graph_edge_weights; - - for (const NodeID node : graph.nodes()) { - graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - csr_graph_edge_weights.emplace_back(adjacent_node, graph.edge_weight(incident_edge)); - }); - - compressed_graph.neighbors(node, [&](const EdgeID incident_edge, const NodeID adjacent_node) { - compressed_graph_edge_weights.emplace_back( - adjacent_node, compressed_graph.edge_weight(incident_edge) - ); - }); - - if (csr_graph_edge_weights.size() != compressed_graph_edge_weights.size()) { - LOG_ERROR << "Node " << node << " has " << csr_graph_edge_weights.size() - << " adjacent nodes in the uncompressed graph but " - << compressed_graph_edge_weights.size() - << " adjacent node in the compressed graph!"; - return; - } - - std::sort( - csr_graph_edge_weights.begin(), - csr_graph_edge_weights.end(), - [](const auto &a, const auto &b) { return a.first < b.first; } - ); - - std::sort( - compressed_graph_edge_weights.begin(), - compressed_graph_edge_weights.end(), - [](const auto &a, const auto &b) { return a.first < b.first; } - ); - - if (csr_graph_edge_weights != compressed_graph_edge_weights) { - LOG_ERROR << "The edge weights of node " << node - << " in the compressed and uncompressed graph do not match!"; - return; - } - - csr_graph_edge_weights.clear(); - compressed_graph_edge_weights.clear(); - } -} - -static void expect_equal_rearrange_compressed_edge_weights( - CSRGraph &graph, const CompressedGraph &compressed_graph -) { - graph::reorder_edges_by_compression(graph); - - for (const NodeID node : graph.nodes()) { - for (const auto [incident_edge, adjacent_node] : graph.neighbors(node)) { - if (graph.edge_weight(incident_edge) != compressed_graph.edge_weight(incident_edge)) { - LOG_ERROR << "Edge " << incident_edge << " has weight " << graph.edge_weight(incident_edge) - << " in the rearranged uncompressed graph but weight " - << compressed_graph.edge_weight(incident_edge) << " in the compressed graph!"; - return; - } - } - } -} - -static void run_checks(CSRGraph &graph, const CompressedGraph &compressed_graph) { - LOG << "Checking if the graph operations are valid..."; - - expect_equal_size(graph, compressed_graph); - expect_equal_nodes(graph, compressed_graph); - expect_equal_edges(graph, compressed_graph); - expect_equal_degree(graph, compressed_graph); - expect_equal_incident_edges(graph, compressed_graph); - expect_equal_adjacent_nodes(graph, compressed_graph); - expect_equal_neighbours(graph, compressed_graph); - expect_equal_neighbours_max(graph, compressed_graph); - expect_equal_pfor_neighbors(graph, compressed_graph); - expect_equal_compressed_graph_edge_weights(graph, compressed_graph); - expect_equal_rearrange_compressed_edge_weights(graph, compressed_graph); -} - static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { - LOG << "Running the benchmark..."; + LOG << "Running the benchmarks..."; - START_HEAP_PROFILER("Uncompressed graph operations"); TIMED_SCOPE("Uncompressed graph operations") { benchmark_degree(graph); benchmark_incident_edges(graph); @@ -424,9 +102,7 @@ static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { benchmark_neighbors(graph); benchmark_pfor_neighbors(graph); }; - STOP_HEAP_PROFILER(); - START_HEAP_PROFILER("Compressed graph operations"); TIMED_SCOPE("Compressed graph operations") { benchmark_degree(compressed_graph); benchmark_incident_edges(compressed_graph); @@ -434,29 +110,6 @@ static void run_benchmark(CSRGraph graph, CompressedGraph compressed_graph) { benchmark_neighbors(compressed_graph); benchmark_pfor_neighbors(compressed_graph); }; - STOP_HEAP_PROFILER(); - - Graph graph_csr(std::make_unique(std::move(graph))); - START_HEAP_PROFILER("Uncompressed underlying graph operations"); - TIMED_SCOPE("Uncompressed underlying graph operations") { - benchmark_degree(graph_csr); - benchmark_incident_edges(graph_csr); - benchmark_adjacent_nodes(graph_csr); - benchmark_neighbors(graph_csr); - benchmark_pfor_neighbors(graph_csr); - }; - STOP_HEAP_PROFILER(); - - Graph graph_compressed(std::make_unique(std::move(compressed_graph))); - START_HEAP_PROFILER("Compressed underlying graph operations"); - TIMED_SCOPE("Compressed underlying graph operations") { - benchmark_degree(graph_compressed); - benchmark_incident_edges(graph_compressed); - benchmark_adjacent_nodes(graph_compressed); - benchmark_neighbors(graph_compressed); - benchmark_pfor_neighbors(graph_compressed); - }; - STOP_HEAP_PROFILER(); } int main(int argc, char *argv[]) { @@ -471,11 +124,6 @@ int main(int argc, char *argv[]) { app.add_option("-t,--threads", num_threads, "Number of threads") ->check(CLI::NonNegativeNumber) ->default_val(num_threads); - app.add_option("-b,--benchmark", enable_benchmarks, "Enable graph operations benchmark") - ->default_val(enable_benchmarks); - app.add_option("-c,--checks", enable_checks, "Enable compressed graph operations check") - ->default_val(enable_checks); - CLI11_PARSE(app, argc, argv); tbb::global_control gc(tbb::global_control::max_allowed_parallelism, num_threads); @@ -498,22 +146,8 @@ int main(int argc, char *argv[]) { }; STOP_HEAP_PROFILER(); - // Capture graph statistics - std::size_t csr_size = graph.raw_nodes().size() * sizeof(Graph::EdgeID) + - graph.raw_edges().size() * sizeof(Graph::NodeID); - std::size_t compressed_size = compressed_graph.used_memory(); - std::size_t high_degree_count = compressed_graph.high_degree_count(); - std::size_t part_count = compressed_graph.part_count(); - std::size_t interval_count = compressed_graph.interval_count(); - - // Run checks and benchmarks - if (enable_checks) { - run_checks(graph, compressed_graph); - } - - if (enable_benchmarks) { - run_benchmark(std::move(graph), std::move(compressed_graph)); - } + // Run benchmarks + run_benchmark(std::move(graph), std::move(compressed_graph)); STOP_TIMER(); DISABLE_HEAP_PROFILER(); @@ -528,20 +162,6 @@ int main(int argc, char *argv[]) { << ", edge weights: " << (graph.edge_weighted() ? "yes" : "no"); LOG; - LOG << "The uncompressed graph uses " << to_megabytes(csr_size) << " mb (" << csr_size - << " bytes)."; - LOG << "The compressed graph uses " << to_megabytes(compressed_size) << " mb (" << compressed_size - << " bytes)."; - float compression_factor = csr_size / (float)compressed_size; - LOG << "Thats a compression ratio of " << compression_factor << '.'; - LOG; - - LOG << high_degree_count << " (" << (high_degree_count / (float)graph.n()) - << "%) vertices have high degree."; - LOG << part_count << " parts result from splitting the neighborhood of high degree nodes."; - LOG << interval_count << " vertices/parts use interval encoding."; - LOG; - Timer::global().print_human_readable(std::cout); LOG; PRINT_HEAP_PROFILE(std::cout); diff --git a/apps/benchmarks/shm_label_propagation_benchmark.cc b/apps/benchmarks/shm_label_propagation_benchmark.cc index ac8d2481..32bb6cdd 100644 --- a/apps/benchmarks/shm_label_propagation_benchmark.cc +++ b/apps/benchmarks/shm_label_propagation_benchmark.cc @@ -11,10 +11,10 @@ #include -#include "kaminpar-shm/coarsening/lp_clustering.h" +#include "kaminpar-shm/coarsening/clustering/lp_clusterer.h" +#include "kaminpar-shm/coarsening/max_cluster_weights.h" #include "kaminpar-shm/context_io.h" #include "kaminpar-shm/graphutils/permutator.h" -#include "kaminpar-shm/partition_utils.h" #include "kaminpar-common/console_io.h" #include "kaminpar-common/logger.h" @@ -74,19 +74,21 @@ int main(int argc, char *argv[]) { graph::remove_isolated_nodes(graph, ctx.partition); } - const NodeWeight max_cluster_weight = - compute_max_cluster_weight(ctx.coarsening, graph, ctx.partition); - - LPClustering lp_clustering(graph.n(), ctx.coarsening); - lp_clustering.set_max_cluster_weight(max_cluster_weight); + LPClustering lp_clustering(ctx.coarsening); + lp_clustering.set_max_cluster_weight(compute_max_cluster_weight( + ctx.coarsening, ctx.partition, graph.n(), graph.total_node_weight() + )); lp_clustering.set_desired_cluster_count(0); GLOBAL_TIMER.reset(); ENABLE_HEAP_PROFILER(); + START_HEAP_PROFILER("Allocation"); + StaticArray clustering(graph.n()); + STOP_HEAP_PROFILER(); START_HEAP_PROFILER("Label Propagation"); TIMED_SCOPE("Label Propagation") { - lp_clustering.compute_clustering(graph, false); + lp_clustering.compute_clustering(clustering, graph, false); }; STOP_HEAP_PROFILER(); DISABLE_HEAP_PROFILER(); diff --git a/apps/benchmarks/shm_variable_length_codec_benchmark.cc b/apps/benchmarks/shm_variable_length_codec_benchmark.cc index 9e9db459..fc5bc1d0 100644 --- a/apps/benchmarks/shm_variable_length_codec_benchmark.cc +++ b/apps/benchmarks/shm_variable_length_codec_benchmark.cc @@ -229,8 +229,8 @@ template void benchmark_rle(std::string_view name, const std::size_t count, const std::uint8_t *values_ptr) { SCOPED_TIMER(name); - VarIntRunLengthDecoder decoder(values_ptr); - decoder.decode(count, [](const Int value) { do_not_optimize(value); }); + VarIntRunLengthDecoder decoder(values_ptr, count); + decoder.decode([](const Int value) { do_not_optimize(value); }); } template @@ -238,7 +238,7 @@ void benchmark_sve(std::string_view name, const std::size_t count, const std::ui SCOPED_TIMER(name); VarIntStreamDecoder decoder(values_ptr, count); - decoder.decode(count, [](const Int value) { do_not_optimize(value); }); + decoder.decode([](const Int value) { do_not_optimize(value); }); } template diff --git a/apps/tools/shm_graph_compression_tool.cc b/apps/tools/shm_graph_compression_tool.cc index 7db47575..4163611d 100644 --- a/apps/tools/shm_graph_compression_tool.cc +++ b/apps/tools/shm_graph_compression_tool.cc @@ -23,12 +23,18 @@ int main(int argc, char *argv[]) { // Parse CLI arguments std::string graph_filename; std::string compressed_graph_filename; + io::GraphFileFormat graph_file_format = io::GraphFileFormat::METIS; int num_threads = 1; CLI::App app("Shared-memory graph compression tool"); app.add_option("-G,--graph", graph_filename, "Input graph in METIS format")->required(); app.add_option("--out", compressed_graph_filename, "Ouput file for saving the compressed graph") ->required(); + app.add_option("-f,--graph-file-format", graph_file_format) + ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) + ->description(R"(Graph file formats: + - metis + - parhip)"); app.add_option("-t,--threads", num_threads, "Number of threads"); CLI11_PARSE(app, argc, argv); diff --git a/apps/tools/shm_graph_properties_tool.cc b/apps/tools/shm_graph_properties_tool.cc index 35c2e82a..0872808a 100644 --- a/apps/tools/shm_graph_properties_tool.cc +++ b/apps/tools/shm_graph_properties_tool.cc @@ -85,8 +85,7 @@ int main(int argc, char *argv[]) { ->transform(CLI::CheckedTransformer(io::get_graph_file_formats()).description("")) ->description(R"(Graph file formats: - metis - - parhip)") - ->capture_default_str(); + - parhip)"); create_graph_compression_options(&app, ctx); CLI11_PARSE(app, argc, argv); From 4bbb1010bc84804adec60b5f393d4d11b241a000 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 12:05:39 +0200 Subject: [PATCH 5/7] refactor(compressed-graph-binary): restructure code --- apps/io/parhip_parser.cc | 1 + apps/io/shm_compressed_graph_binary.cc | 337 +++++++++++++++---------- apps/io/shm_compressed_graph_binary.h | 6 +- 3 files changed, 213 insertions(+), 131 deletions(-) diff --git a/apps/io/parhip_parser.cc b/apps/io/parhip_parser.cc index 06ad71e9..5201971d 100644 --- a/apps/io/parhip_parser.cc +++ b/apps/io/parhip_parser.cc @@ -15,6 +15,7 @@ #include #include #include +#include #include "kaminpar-common/logger.h" diff --git a/apps/io/shm_compressed_graph_binary.cc b/apps/io/shm_compressed_graph_binary.cc index b1c3ad23..95d259e7 100644 --- a/apps/io/shm_compressed_graph_binary.cc +++ b/apps/io/shm_compressed_graph_binary.cc @@ -14,10 +14,99 @@ namespace kaminpar::shm::io::compressed_binary { +struct CompressedBinaryHeader { + bool has_node_weights; + bool has_edge_weights; + + bool has_64_bit_node_id; + bool has_64_bit_edge_id; + + bool has_64_bit_node_weight; + bool has_64_bit_edge_weight; + + bool use_degree_bucket_order; + + bool use_high_degree_encoding; + bool use_interval_encoding; + bool use_run_length_encoding; + bool use_stream_vbyte_encoding; + bool use_isolated_nodes_separation; + + std::uint64_t high_degree_threshold; + std::uint64_t high_degree_part_length; + std::uint64_t interval_length_threshold; + + std::uint64_t num_nodes; + std::uint64_t num_edges; + std::uint64_t max_degree; + + std::uint64_t num_high_degree_nodes; + std::uint64_t num_high_degree_parts; + std::uint64_t num_interval_nodes; + std::uint64_t num_intervals; +}; + +CompressedBinaryHeader create_header(const CompressedGraph &graph) { + return { + graph.node_weighted(), + graph.edge_weighted(), + + sizeof(CompressedGraph::NodeID) == 8, + sizeof(CompressedGraph::EdgeID) == 8, + + sizeof(CompressedGraph::NodeWeight) == 8, + sizeof(CompressedGraph::EdgeWeight) == 8, + + graph.sorted(), + + CompressedGraph::kHighDegreeEncoding, + CompressedGraph::kIntervalEncoding, + CompressedGraph::kRunLengthEncoding, + CompressedGraph::kStreamEncoding, + CompressedGraph::kIsolatedNodesSeparation, + + CompressedGraph::kHighDegreeThreshold, + CompressedGraph::kHighDegreePartLength, + CompressedGraph::kIntervalLengthTreshold, + + graph.n(), + graph.m(), + graph.max_degree(), + + graph.num_high_degree_nodes(), + graph.num_high_degree_parts(), + graph.num_interval_nodes(), + graph.num_intervals()}; +} + template static void write_int(std::ofstream &out, const T id) { out.write(reinterpret_cast(&id), sizeof(T)); } +static void write_header(std::ofstream &out, const CompressedBinaryHeader header) { + const std::uint16_t boolean_values = + (header.use_isolated_nodes_separation << 12) | (header.use_stream_vbyte_encoding << 11) | + (header.use_run_length_encoding << 9) | (header.use_interval_encoding << 8) | + (header.use_high_degree_encoding << 7) | (header.use_degree_bucket_order << 6) | + (header.has_64_bit_edge_weight << 5) | (header.has_64_bit_node_weight << 4) | + (header.has_64_bit_edge_id << 3) | (header.has_64_bit_node_id << 2) | + (header.has_edge_weights << 1) | (header.has_node_weights); + write_int(out, boolean_values); + + write_int(out, header.high_degree_threshold); + write_int(out, header.high_degree_part_length); + write_int(out, header.interval_length_threshold); + + write_int(out, header.num_nodes); + write_int(out, header.num_edges); + write_int(out, header.max_degree); + + write_int(out, header.num_high_degree_nodes); + write_int(out, header.num_high_degree_parts); + write_int(out, header.num_interval_nodes); + write_int(out, header.num_intervals); +} + template static void write_compact_static_array(std::ofstream &out, const CompactStaticArray &array) { write_int(out, array.byte_width()); @@ -27,42 +116,18 @@ static void write_compact_static_array(std::ofstream &out, const CompactStaticAr template static void write_static_array(std::ofstream &out, const StaticArray &static_array) { + write_int(out, static_array.size()); out.write(reinterpret_cast(static_array.data()), static_array.size() * sizeof(T)); } void write(const std::string &filename, const CompressedGraph &graph) { std::ofstream out(filename, std::ios::binary); - write_int(out, kMagicNumber); - write_int(out, static_cast(sizeof(CompressedGraph::NodeID))); - write_int(out, static_cast(sizeof(CompressedGraph::EdgeID))); - write_int(out, static_cast(sizeof(CompressedGraph::NodeWeight))); - write_int(out, static_cast(sizeof(CompressedGraph::EdgeWeight))); - - write_int(out, static_cast(CompressedGraph::kHighDegreeEncoding)); - write_int(out, CompressedGraph::kHighDegreeThreshold); - write_int(out, CompressedGraph::kHighDegreePartLength); - write_int(out, static_cast(CompressedGraph::kIntervalEncoding)); - write_int(out, CompressedGraph::kIntervalLengthTreshold); - write_int(out, static_cast(CompressedGraph::kRunLengthEncoding)); - write_int(out, static_cast(CompressedGraph::kStreamEncoding)); - write_int(out, static_cast(CompressedGraph::kIsolatedNodesSeparation)); - - write_int(out, graph.n()); - write_int(out, graph.m()); - write_int(out, graph.max_degree()); - write_int(out, static_cast(graph.sorted())); - write_int(out, static_cast(graph.node_weighted())); - write_int(out, static_cast(graph.edge_weighted())); - - write_int(out, graph.high_degree_count()); - write_int(out, graph.part_count()); - write_int(out, graph.interval_count()); + CompressedBinaryHeader header = create_header(graph); + write_header(out, header); write_compact_static_array(out, graph.raw_nodes()); - - write_int(out, graph.raw_compressed_edges().size()); write_static_array(out, graph.raw_compressed_edges()); if (graph.node_weighted()) { @@ -80,68 +145,78 @@ template static T read_int(std::ifstream &in) { return t; } -template static CompactStaticArray read_compact_static_array(std::ifstream &in) { - std::uint8_t byte_width = read_int(in); - std::size_t allocated_size = read_int(in); - - auto data = std::make_unique(allocated_size); - in.read(reinterpret_cast(data.get()), allocated_size); - return CompactStaticArray(byte_width, allocated_size, std::move(data)); -} - -template -static StaticArray read_static_array(std::ifstream &in, const std::size_t size) { - T *ptr = static_cast(std::malloc(sizeof(T) * size)); - in.read(reinterpret_cast(ptr), sizeof(T) * size); - return StaticArray(ptr, size); +CompressedBinaryHeader read_header(std::ifstream &in) { + const auto boolean_values = read_int(in); + return { + (boolean_values & 1) != 0, (boolean_values & 2) != 0, (boolean_values & 4) != 0, + (boolean_values & 8) != 0, (boolean_values & 16) != 0, (boolean_values & 32) != 0, + (boolean_values & 64) != 0, (boolean_values & 128) != 0, (boolean_values & 256) != 0, + (boolean_values & 512) != 0, (boolean_values & 1024) != 0, (boolean_values & 2048) != 0, + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), read_int(in), read_int(in), + read_int(in), + }; } -CompressedGraph read(const std::string &filename) { +void verify_header(const CompressedBinaryHeader header) { using NodeID = CompressedGraph::NodeID; using EdgeID = CompressedGraph::EdgeID; using NodeWeight = CompressedGraph::NodeWeight; using EdgeWeight = CompressedGraph::EdgeWeight; - std::ifstream in(filename, std::ios::binary); - - if (kMagicNumber != read_int(in)) { - LOG_ERROR << "The magic number of the file is not correct!"; - std::exit(1); - } - - std::uint8_t stored_node_id_size = read_int(in); - if (stored_node_id_size != sizeof(NodeID)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_node_id_size * 8) - << "-Bit NodeIDs but this build uses " << (sizeof(NodeID) * 8) << "-Bit NodeIDs."; + if (header.has_64_bit_node_id) { + if (sizeof(NodeID) != 8) { + LOG_ERROR << "The stored compressed graph uses 64-Bit node IDs but this build uses 32-Bit " + "node IDs."; + std::exit(1); + } + } else if (sizeof(NodeID) != 4) { + LOG_ERROR + << "The stored compressed graph uses 32-Bit node IDs but this build uses 64-Bit node IDs."; std::exit(1); } - std::uint8_t stored_edge_id_size = read_int(in); - if (stored_edge_id_size != sizeof(EdgeID)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_edge_id_size * 8) - << "-Bit EdgeIDs but this build uses " << (sizeof(EdgeID) * 8) << "-Bit EdgeIDs."; + if (header.has_64_bit_edge_id) { + if (sizeof(EdgeID) != 8) { + LOG_ERROR << "The stored compressed graph uses 64-Bit edge IDs but this build uses 32-Bit " + "edge IDs."; + std::exit(1); + } + } else if (sizeof(EdgeID) != 4) { + LOG_ERROR + << "The stored compressed graph uses 32-Bit edge IDs but this build uses 64-Bit edge IDs."; std::exit(1); } - std::uint8_t stored_node_weight_size = read_int(in); - if (stored_node_weight_size != sizeof(NodeWeight)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_node_weight_size * 8) - << "-Bit NodeWeights but this build uses " << (sizeof(NodeWeight) * 8) - << "-Bit NodeWeights."; + if (header.has_64_bit_node_weight) { + if (sizeof(NodeWeight) != 8) { + LOG_ERROR + << "The stored compressed graph uses 64-Bit node weights but this build uses 32-Bit " + "node weights."; + std::exit(1); + } + } else if (sizeof(NodeWeight) != 4) { + LOG_ERROR << "The stored compressed graph uses 32-Bit node weights but this build uses 64-Bit " + "node weights."; std::exit(1); } - std::uint8_t stored_edge_weight_size = read_int(in); - if (stored_edge_weight_size != sizeof(EdgeWeight)) { - LOG_ERROR << "The stored compressed graph uses " << (stored_edge_weight_size * 8) - << "-Bit EdgeWeights but this build uses " << (sizeof(EdgeWeight) * 8) - << "-Bit EdgeWeights."; + if (header.has_64_bit_edge_weight) { + if (sizeof(EdgeWeight) != 8) { + LOG_ERROR + << "The stored compressed graph uses 64-Bit edge weights but this build uses 32-Bit " + "edge weights."; + std::exit(1); + } + } else if (sizeof(EdgeWeight) != 4) { + LOG_ERROR << "The stored compressed graph uses 32-Bit edge weights but this build uses 64-Bit " + "edge weights."; std::exit(1); } - bool high_degree_encoding = static_cast(read_int(in)); - if (high_degree_encoding != CompressedGraph::kHighDegreeEncoding) { - if (high_degree_encoding) { + if (header.use_high_degree_encoding != CompressedGraph::kHighDegreeEncoding) { + if (header.use_high_degree_encoding) { LOG_ERROR << "The stored compressed graph uses high degree encoding but this build does not."; } else { LOG_ERROR @@ -150,25 +225,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID high_degree_threshold = read_int(in); - if (high_degree_threshold != CompressedGraph::kHighDegreeThreshold) { - LOG_ERROR << "The stored compressed graph uses " << high_degree_threshold - << " as the high degree threshold but this build uses " - << (CompressedGraph::kHighDegreeThreshold) << " as the high degree threshold."; - std::exit(1); - } - - NodeID high_degree_part_length = read_int(in); - if (high_degree_part_length != CompressedGraph::kHighDegreePartLength) { - LOG_ERROR << "The stored compressed graph uses " << high_degree_part_length - << " as the high degree part length but this build uses " - << (CompressedGraph::kHighDegreePartLength) << " as the high degree part length."; - std::exit(1); - } - - bool interval_encoding = static_cast(read_int(in)); - if (interval_encoding != CompressedGraph::kIntervalEncoding) { - if (interval_encoding) { + if (header.use_interval_encoding != CompressedGraph::kIntervalEncoding) { + if (header.use_interval_encoding) { LOG_ERROR << "The stored compressed graph uses interval encoding but this build does not."; } else { LOG_ERROR @@ -177,17 +235,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID interval_length_threshold = read_int(in); - if (interval_length_threshold != CompressedGraph::kIntervalLengthTreshold) { - LOG_ERROR << "The stored compressed graph uses " << interval_length_threshold - << " as the interval length threshold but this build uses " - << (CompressedGraph::kIntervalLengthTreshold) << " as the interval length threshold."; - std::exit(1); - } - - bool run_length_encoding = static_cast(read_int(in)); - if (run_length_encoding != CompressedGraph::kRunLengthEncoding) { - if (run_length_encoding) { + if (header.use_run_length_encoding != CompressedGraph::kRunLengthEncoding) { + if (header.use_run_length_encoding) { LOG_ERROR << "The stored compressed graph uses run-length encoding but this build does not."; } else { LOG_ERROR @@ -196,9 +245,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - bool stream_encoding = static_cast(read_int(in)); - if (stream_encoding != CompressedGraph::kStreamEncoding) { - if (stream_encoding) { + if (header.use_stream_vbyte_encoding != CompressedGraph::kStreamEncoding) { + if (header.use_stream_vbyte_encoding) { LOG_ERROR << "The stored compressed graph uses stream encoding but this build does not."; } else { LOG_ERROR << "The stored compressed graph does not use stream encoding but this build does."; @@ -206,9 +254,8 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - bool isolated_nodes_separation = static_cast(read_int(in)); - if (isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) { - if (isolated_nodes_separation) { + if (header.use_isolated_nodes_separation != CompressedGraph::kIsolatedNodesSeparation) { + if (header.use_isolated_nodes_separation) { LOG_ERROR << "The stored compressed graph uses isolated nodes separation but this build does not."; } else { @@ -218,40 +265,74 @@ CompressedGraph read(const std::string &filename) { std::exit(1); } - NodeID n = read_int(in); - EdgeID m = read_int(in); - NodeID max_degree = read_int(in); - bool sorted = static_cast(read_int(in)); - bool is_node_weighted = static_cast(read_int(in)); - bool is_edge_weighted = static_cast(read_int(in)); + if (header.high_degree_threshold != CompressedGraph::kHighDegreeThreshold) { + LOG_ERROR << "The stored compressed graph uses " << header.high_degree_threshold + << " as the high degree threshold but this build uses " + << (CompressedGraph::kHighDegreeThreshold) << " as the high degree threshold."; + std::exit(1); + } - std::size_t high_degree_count = read_int(in); - std::size_t part_count = read_int(in); - std::size_t interval_count = read_int(in); + if (header.high_degree_part_length != CompressedGraph::kHighDegreePartLength) { + LOG_ERROR << "The stored compressed graph uses " << header.high_degree_part_length + << " as the high degree part length but this build uses " + << (CompressedGraph::kHighDegreePartLength) << " as the high degree part length."; + std::exit(1); + } - CompactStaticArray nodes = read_compact_static_array(in); + if (header.interval_length_threshold != CompressedGraph::kIntervalLengthTreshold) { + LOG_ERROR << "The stored compressed graph uses " << header.interval_length_threshold + << " as the interval length threshold but this build uses " + << (CompressedGraph::kIntervalLengthTreshold) << " as the interval length threshold."; + std::exit(1); + } +} - std::size_t compressed_edges_size = read_int(in); - StaticArray compressed_edges = - read_static_array(in, compressed_edges_size); - StaticArray node_weights = +template static CompactStaticArray read_compact_static_array(std::ifstream &in) { + const auto byte_width = read_int(in); + const auto allocated_size = read_int(in); + + auto data = std::make_unique(allocated_size); + in.read(reinterpret_cast(data.get()), allocated_size); + return CompactStaticArray(byte_width, allocated_size, std::move(data)); +} + +template static StaticArray read_static_array(std::ifstream &in) { + const auto size = read_int(in); + T *ptr = static_cast(std::malloc(sizeof(T) * size)); + in.read(reinterpret_cast(ptr), sizeof(T) * size); + return StaticArray(ptr, size); +} + +CompressedGraph read(const std::string &filename) { + std::ifstream in(filename, std::ios::binary); + if (kMagicNumber != read_int(in)) { + LOG_ERROR << "The magic number of the file is not correct!"; + std::exit(1); + } - is_node_weighted ? read_static_array(in, n) : StaticArray(); + CompressedBinaryHeader header = read_header(in); + verify_header(header); + CompactStaticArray nodes = read_compact_static_array(in); + StaticArray compressed_edges = read_static_array(in); + + StaticArray node_weights = + header.has_node_weights ? read_static_array(in) : StaticArray(); StaticArray edge_weights = - is_edge_weighted ? read_static_array(in, m) : StaticArray(); + header.has_edge_weights ? read_static_array(in) : StaticArray(); return CompressedGraph( std::move(nodes), std::move(compressed_edges), std::move(node_weights), std::move(edge_weights), - m, - max_degree, - sorted, - high_degree_count, - part_count, - interval_count + header.num_edges, + header.max_degree, + header.use_degree_bucket_order, + header.num_high_degree_nodes, + header.num_high_degree_parts, + header.num_interval_nodes, + header.num_intervals ); } diff --git a/apps/io/shm_compressed_graph_binary.h b/apps/io/shm_compressed_graph_binary.h index 0362e3d2..ef6567a3 100644 --- a/apps/io/shm_compressed_graph_binary.h +++ b/apps/io/shm_compressed_graph_binary.h @@ -17,7 +17,7 @@ namespace kaminpar::shm::io::compressed_binary { constexpr std::uint64_t kMagicNumber = 0x434F4D5052455353; /*! - * Writes a graph to a file as a compressed graph binary. + * Writes a compressed graph to a file in binary format. * * @param filename The name of the file to write to. * @param graph The compressed graph to write. @@ -25,7 +25,7 @@ constexpr std::uint64_t kMagicNumber = 0x434F4D5052455353; void write(const std::string &filename, const CompressedGraph &graph); /*! - * Reads the graph from a compressed graph binary file. If the paramters of the compressed graph + * Reads a compressed graph from a file with binary format. If the paramters of the compressed graph * stored in the file do not match with this build, exit is called. * * @param filename The name of the file to read from. @@ -34,7 +34,7 @@ void write(const std::string &filename, const CompressedGraph &graph); CompressedGraph read(const std::string &filename); /*! - * Checks whether a graph is stored in compressed format. + * Checks whether a graph is stored in compressed binary format. * * @param filename The name of the file to check. * @return Whether the graph is stored in compressed format. From 55495f9285216102b19b080e1eac9098ce2ac880 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 13:27:06 +0200 Subject: [PATCH 6/7] fix(label-propagation): use unsigned integers for the first level of the two-level vector --- .../concurrent_two_level_vector.h | 40 ++++++++++++++----- kaminpar-shm/context_io.cc | 8 ++-- kaminpar-shm/label_propagation.h | 2 +- 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/kaminpar-common/datastructures/concurrent_two_level_vector.h b/kaminpar-common/datastructures/concurrent_two_level_vector.h index 056f2c9f..63d12a50 100644 --- a/kaminpar-common/datastructures/concurrent_two_level_vector.h +++ b/kaminpar-common/datastructures/concurrent_two_level_vector.h @@ -114,7 +114,7 @@ class ConcurrentTwoLevelVector { tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { for (Size pos = r.begin(); pos != r.end(); ++pos) { - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value == kMaxFirstValue) { Size new_pos = mapping[pos] - 1; @@ -143,7 +143,7 @@ class ConcurrentTwoLevelVector { [[nodiscard]] Value operator[](const Size pos) { KASSERT(pos < _values.size()); - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value < kMaxFirstValue) { return value; } @@ -167,7 +167,7 @@ class ConcurrentTwoLevelVector { KASSERT(pos < _values.size()); if (value < kMaxFirstValue) { - _values[pos] = value; + _values[pos] = static_cast(value); } else { _values[pos] = kMaxFirstValue; _table.get_handle().insert(pos, value); @@ -196,7 +196,12 @@ class ConcurrentTwoLevelVector { const Value new_value = static_cast(value) + delta; if (new_value < kMaxFirstValue) { success = __atomic_compare_exchange_n( - &_values[pos], &value, new_value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(new_value), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } else { success = __atomic_compare_exchange_n( @@ -234,7 +239,12 @@ class ConcurrentTwoLevelVector { } success = __atomic_compare_exchange_n( - &_values[pos], &value, value - delta, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(value - delta), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } while (!success); } @@ -327,7 +337,7 @@ class ConcurrentTwoLevelVector { tbb::parallel_for(tbb::blocked_range(0, _values.size()), [&](const auto &r) { for (Size pos = r.begin(); pos != r.end(); ++pos) { - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value == kMaxFirstValue) { Size new_pos = mapping[pos] - 1; @@ -363,7 +373,7 @@ class ConcurrentTwoLevelVector { [[nodiscard]] Value operator[](const Size pos) { KASSERT(pos < _values.size()); - const Value value = _values[pos]; + const auto value = static_cast(_values[pos]); if (value < kMaxFirstValue) { return value; } @@ -387,7 +397,7 @@ class ConcurrentTwoLevelVector { KASSERT(pos < _values.size()); if (value < kMaxFirstValue) { - _values[pos] = value; + _values[pos] = static_cast(value); } else { _values[pos] = kMaxFirstValue; @@ -423,7 +433,12 @@ class ConcurrentTwoLevelVector { const Value new_value = static_cast(value) + delta; if (new_value < kMaxFirstValue) { success = __atomic_compare_exchange_n( - &_values[pos], &value, new_value, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(new_value), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } else { success = __atomic_compare_exchange_n( @@ -469,7 +484,12 @@ class ConcurrentTwoLevelVector { } success = __atomic_compare_exchange_n( - &_values[pos], &value, value - delta, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED + &_values[pos], + &value, + static_cast(value - delta), + false, + __ATOMIC_RELAXED, + __ATOMIC_RELAXED ); } while (!success); } diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc index 5be44de6..2f5a7819 100644 --- a/kaminpar-shm/context_io.cc +++ b/kaminpar-shm/context_io.cc @@ -478,8 +478,8 @@ void print(const LabelPropagationCoarseningContext &lp_ctx, std::ostream &out) { out << " Cluster weights struct: " << lp_ctx.cluster_weights_structure << "\n"; out << " Use two phases: " << (lp_ctx.use_two_phases ? "yes" : "no") << "\n"; if (lp_ctx.use_two_phases) { - out << " Selection strategy: " << lp_ctx.second_phase_selection_strategy << '\n'; - out << " Aggregation strategy: " << lp_ctx.second_phase_aggregation_strategy << '\n'; + out << " Selection strategy: " << lp_ctx.second_phase_selection_strategy << '\n'; + out << " Aggregation strategy: " << lp_ctx.second_phase_aggregation_strategy << '\n'; out << " Relabel: " << (lp_ctx.relabel_before_second_phase ? "yes" : "no") << '\n'; } @@ -500,8 +500,8 @@ void print(const RefinementContext &r_ctx, std::ostream &out) { out << " Number of iterations: " << r_ctx.lp.num_iterations << "\n"; out << " Uses two phases: " << (r_ctx.lp.use_two_phases ? "yes" : "no") << "\n"; if (r_ctx.lp.use_two_phases) { - out << " Selection strategy: " << r_ctx.lp.second_phase_selection_strategy << '\n'; - out << " Aggregation strategy: " << r_ctx.lp.second_phase_aggregation_strategy << '\n'; + out << " Selection strategy: " << r_ctx.lp.second_phase_selection_strategy << '\n'; + out << " Aggregation strategy: " << r_ctx.lp.second_phase_aggregation_strategy << '\n'; } } if (r_ctx.includes_algorithm(RefinementAlgorithm::KWAY_FM)) { diff --git a/kaminpar-shm/label_propagation.h b/kaminpar-shm/label_propagation.h index a0a82fff..7710a358 100644 --- a/kaminpar-shm/label_propagation.h +++ b/kaminpar-shm/label_propagation.h @@ -1742,7 +1742,7 @@ template class OwnedRelaxedClusterW using SmallClusterWeightVec = StaticArray; using FirstLevelClusterWeight = typename std:: - conditional_t, std::int16_t, std::int32_t>; + conditional_t, std::uint16_t, std::uint32_t>; using ClusterWeightTwoLevelVec = ConcurrentTwoLevelVector; From 97ab05803f7c89cf4d69fd6c1163c29b10e15244 Mon Sep 17 00:00:00 2001 From: Daniel Salwasser Date: Fri, 26 Apr 2024 14:14:00 +0200 Subject: [PATCH 7/7] feat(contraction): reuse the memory space of a clustering for the mapping --- kaminpar-shm/coarsening/cluster_coarsener.cc | 14 +-- kaminpar-shm/coarsening/cluster_coarsener.h | 1 - .../buffered_cluster_contraction.cc | 6 +- .../buffered_cluster_contraction.h | 2 +- .../contraction/cluster_contraction.cc | 14 +-- .../contraction/cluster_contraction.h | 4 +- .../cluster_contraction_preprocessing.cc | 111 ++++++++---------- .../cluster_contraction_preprocessing.h | 6 +- .../legacy_buffered_cluster_contraction.cc | 7 +- .../legacy_buffered_cluster_contraction.h | 3 +- .../naive_unbuffered_cluster_contraction.cc | 6 +- .../naive_unbuffered_cluster_contraction.h | 2 +- .../unbuffered_cluster_contraction.cc | 9 +- .../unbuffered_cluster_contraction.h | 3 +- 14 files changed, 86 insertions(+), 102 deletions(-) diff --git a/kaminpar-shm/coarsening/cluster_coarsener.cc b/kaminpar-shm/coarsening/cluster_coarsener.cc index d512568c..42f534fe 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.cc +++ b/kaminpar-shm/coarsening/cluster_coarsener.cc @@ -31,11 +31,9 @@ bool ClusteringCoarsener::coarsen() { SCOPED_HEAP_PROFILER("Level", std::to_string(_hierarchy.size())); SCOPED_TIMER("Level", std::to_string(_hierarchy.size())); - if (_clustering.size() < current().n()) { - SCOPED_HEAP_PROFILER("Allocation"); - SCOPED_TIMER("Allocation"); - _clustering.resize(current().n()); - } + START_HEAP_PROFILER("Allocation"); + RECORD("clustering") StaticArray clustering(current().n(), static_array::noinit); + STOP_HEAP_PROFILER(); const bool free_allocated_memory = !keep_allocated_memory(); const NodeWeight total_node_weight = current().total_node_weight(); @@ -47,13 +45,15 @@ bool ClusteringCoarsener::coarsen() { compute_max_cluster_weight(_c_ctx, _p_ctx, prev_n, total_node_weight) ); _clustering_algorithm->set_desired_cluster_count(0); - _clustering_algorithm->compute_clustering(_clustering, current(), free_allocated_memory); + _clustering_algorithm->compute_clustering(clustering, current(), free_allocated_memory); STOP_TIMER(); STOP_HEAP_PROFILER(); START_HEAP_PROFILER("Contract graph"); auto coarsened = TIMED_SCOPE("Contract graph") { - return contract_clustering(current(), _clustering, _c_ctx.contraction, _contraction_m_ctx); + return contract_clustering( + current(), std::move(clustering), _c_ctx.contraction, _contraction_m_ctx + ); }; _hierarchy.push_back(std::move(coarsened)); STOP_HEAP_PROFILER(); diff --git a/kaminpar-shm/coarsening/cluster_coarsener.h b/kaminpar-shm/coarsening/cluster_coarsener.h index 833cca35..6f443a02 100644 --- a/kaminpar-shm/coarsening/cluster_coarsener.h +++ b/kaminpar-shm/coarsening/cluster_coarsener.h @@ -49,7 +49,6 @@ class ClusteringCoarsener : public Coarsener { const Graph *_input_graph; std::vector> _hierarchy; - StaticArray _clustering{}; std::unique_ptr _clustering_algorithm; contraction::MemoryContext _contraction_m_ctx{}; diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc index 41f6f5b3..f56f7528 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.cc @@ -277,18 +277,18 @@ std::unique_ptr contract_clustering_buffered( std::unique_ptr contract_clustering_buffered( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ) { if (con_ctx.use_compact_mapping) { - auto [c_n, mapping] = compute_mapping(graph, clustering, m_ctx); + auto [c_n, mapping] = compute_mapping(graph, std::move(clustering), m_ctx); fill_cluster_buckets(c_n, graph, mapping, m_ctx.buckets_index, m_ctx.buckets); return graph.reified([&](auto &graph) { return contract_clustering_buffered(graph, c_n, std::move(mapping), con_ctx, m_ctx); }); } else { - auto [c_n, mapping] = compute_mapping(graph, clustering, m_ctx); + auto [c_n, mapping] = compute_mapping(graph, std::move(clustering), m_ctx); fill_cluster_buckets(c_n, graph, mapping, m_ctx.buckets_index, m_ctx.buckets); return graph.reified([&](auto &graph) { return contract_clustering_buffered(graph, c_n, std::move(mapping), con_ctx, m_ctx); diff --git a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h index 78937eb3..a41da0ce 100644 --- a/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h +++ b/kaminpar-shm/coarsening/contraction/buffered_cluster_contraction.h @@ -18,7 +18,7 @@ namespace kaminpar::shm::contraction { std::unique_ptr contract_clustering_buffered( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction.cc b/kaminpar-shm/coarsening/contraction/cluster_contraction.cc index 5b68379d..50d6f050 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction.cc +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction.cc @@ -25,27 +25,27 @@ namespace kaminpar::shm { using namespace contraction; std::unique_ptr contract_clustering( - const Graph &graph, StaticArray &clustering, const ContractionCoarseningContext &con_ctx + const Graph &graph, StaticArray clustering, const ContractionCoarseningContext &con_ctx ) { MemoryContext m_ctx; - return contract_clustering(graph, clustering, con_ctx, m_ctx); + return contract_clustering(graph, std::move(clustering), con_ctx, m_ctx); } std::unique_ptr contract_clustering( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, MemoryContext &m_ctx ) { switch (con_ctx.mode) { case ContractionMode::BUFFERED: - return contract_clustering_buffered(graph, clustering, con_ctx, m_ctx); + return contract_clustering_buffered(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::BUFFERED_LEGACY: - return contract_clustering_buffered_legacy(graph, clustering, con_ctx, m_ctx); + return contract_clustering_buffered_legacy(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::UNBUFFERED: - return contract_clustering_unbuffered(graph, clustering, con_ctx, m_ctx); + return contract_clustering_unbuffered(graph, std::move(clustering), con_ctx, m_ctx); case ContractionMode::UNBUFFERED_NAIVE: - return contract_clustering_unbuffered_naive(graph, clustering, con_ctx, m_ctx); + return contract_clustering_unbuffered_naive(graph, std::move(clustering), con_ctx, m_ctx); } __builtin_unreachable(); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction.h b/kaminpar-shm/coarsening/contraction/cluster_contraction.h index b119785c..65af9328 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction.h +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction.h @@ -41,12 +41,12 @@ struct MemoryContext { } // namespace contraction std::unique_ptr contract_clustering( - const Graph &graph, StaticArray &clustering, const ContractionCoarseningContext &con_ctx + const Graph &graph, StaticArray clustering, const ContractionCoarseningContext &con_ctx ); std::unique_ptr contract_clustering( const Graph &graph, - StaticArray &clustering, + StaticArray clustering, const ContractionCoarseningContext &con_ctx, contraction::MemoryContext &m_ctx ); diff --git a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc index a5fbec31..b32ba773 100644 --- a/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc +++ b/kaminpar-shm/coarsening/contraction/cluster_contraction_preprocessing.cc @@ -21,50 +21,40 @@ namespace kaminpar::shm::contraction { void fill_leader_mapping( const Graph &graph, const StaticArray &clustering, StaticArray &leader_mapping ) { - START_TIMER("Allocation"); - if (leader_mapping.size() < graph.n()) { - leader_mapping.resize(graph.n()); - } - STOP_TIMER(); - - RECORD("leader_mapping"); - RECORD_LOCAL_DATA_STRUCT("StaticArray", leader_mapping.size() * sizeof(NodeID)); + TIMED_SCOPE("Allocation") { + if (leader_mapping.size() < graph.n()) { + RECORD("leader_mapping") leader_mapping.resize(graph.n(), static_array::noinit); + RECORD_LOCAL_DATA_STRUCT("StaticArray", leader_mapping.size() * sizeof(NodeID)); + } + }; - START_TIMER("Preprocessing"); - graph.pfor_nodes([&](const NodeID u) { leader_mapping[u] = 0; }); - graph.pfor_nodes([&](const NodeID u) { - __atomic_store_n(&leader_mapping[clustering[u]], 1, __ATOMIC_RELAXED); - }); - parallel::prefix_sum( - leader_mapping.begin(), leader_mapping.begin() + graph.n(), leader_mapping.begin() - ); - STOP_TIMER(); + TIMED_SCOPE("Preprocessing") { + graph.pfor_nodes([&](const NodeID u) { leader_mapping[u] = 0; }); + graph.pfor_nodes([&](const NodeID u) { + __atomic_store_n(&leader_mapping[clustering[u]], 1, __ATOMIC_RELAXED); + }); + parallel::prefix_sum( + leader_mapping.begin(), leader_mapping.begin() + graph.n(), leader_mapping.begin() + ); + }; } template <> StaticArray compute_mapping( - const Graph &graph, - const StaticArray &clustering, - const StaticArray &leader_mapping + const Graph &graph, StaticArray clustering, const StaticArray &leader_mapping ) { - START_TIMER("Allocation"); - RECORD("mapping") StaticArray mapping(graph.n()); - STOP_TIMER(); - START_TIMER("Preprocessing"); graph.pfor_nodes([&](const NodeID u) { - mapping[u] = __atomic_load_n(&leader_mapping[clustering[u]], __ATOMIC_RELAXED) - 1; + clustering[u] = __atomic_load_n(&leader_mapping[clustering[u]], __ATOMIC_RELAXED) - 1; }); STOP_TIMER(); - return mapping; + return std::move(clustering); } template <> CompactStaticArray compute_mapping( - const Graph &graph, - const StaticArray &clustering, - const StaticArray &leader_mapping + const Graph &graph, StaticArray clustering, const StaticArray &leader_mapping ) { const NodeID c_n = leader_mapping[graph.n() - 1]; @@ -83,25 +73,27 @@ CompactStaticArray compute_mapping( template