fix: make refactored gain cache more like the original to track perfo…

…rmance degregation
KaHIP · Aug 9, 2024 · 4d9ba81 · 4d9ba81
1 parent c6b1e49
commit 4d9ba81
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 21 deletions.
diff --git a/kaminpar-shm/refinement/fm/fm_refiner.cc b/kaminpar-shm/refinement/fm/fm_refiner.cc
@@ -451,11 +451,11 @@ template <typename Graph, typename GainCache> class FMRefinerCore : public Refin
     SCOPED_HEAP_PROFILER("FM");
     SCOPED_TIMER("FM");
 
-    const auto &graph = p_graph.concretize<Graph>();
+    const Graph &graph = p_graph.concretize<Graph>();
 
-    START_TIMER("Initialize gain cache");
-    _shared->gain_cache.initialize(graph, p_graph);
-    STOP_TIMER();
+    TIMED_SCOPE("Initialize gain cache") {
+      _shared->gain_cache.initialize(graph, p_graph);
+    };
 
     const EdgeWeight initial_cut = metrics::edge_cut(p_graph);
     EdgeWeight cut_before_current_iteration = initial_cut;
@@ -576,7 +576,7 @@ FMRefiner::~FMRefiner() = default;
 
 void FMRefiner::initialize(const PartitionedGraph &p_graph) {
   if (!_core) {
-    p_graph.graph().reified([&]<typename Graph>(Graph &graph) {
+    p_graph.reified([&]<typename Graph>(Graph &graph) {
       switch (_ctx.refinement.kway_fm.gain_cache_strategy) {
       case GainCacheStrategy::SPARSE:
         _core = std::make_unique<FMRefinerCore<Graph, NormalSparseGainCache<Graph>>>(_ctx);

diff --git a/kaminpar-shm/refinement/gains/sparse_gain_cache.h b/kaminpar-shm/refinement/gains/sparse_gain_cache.h
@@ -120,6 +120,9 @@ class SparseGainCache {
     _graph = &graph;
     _p_graph = &p_graph;
 
+    _n = _graph->n();
+    _k = _p_graph->k();
+
     _node_threshold = 0;
     _bucket_threshold = 0;
     _cache_offsets[0] = 0;
@@ -129,7 +132,7 @@ class SparseGainCache {
     // value in the same 32 bit / 64 bit integer (depending on the size of the EdgeWeight data
     // type).
     // Thus, we compute the number of bits that we must reserve for the block IDs.
-    _bits_for_key = math::ceil_log2(_p_graph->k());
+    _bits_for_key = math::ceil_log2(_k);
     DBG << "Reserve " << (sizeof(UnsignedEdgeWeight) * 8 - _bits_for_key) << " of "
         << sizeof(UnsignedEdgeWeight) * 8 << " bits for gain values, " << _bits_for_key
         << " bits for block IDs";
@@ -144,13 +147,13 @@ class SparseGainCache {
       // up to the one determined by this degree are assigned to the dense part, the other ones to
       // the sparse part.
       const EdgeID degree_threshold = std::max<EdgeID>(
-          _p_graph->k() * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // Usually k * 1
+          _k * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // Usually k * 1
           _ctx.refinement.kway_fm.constant_high_degree_threshold                 // Usually 0
       );
 
       // (i) compute size of the dense part (== hash tables) ...
       for (_bucket_threshold = 0;
-           _node_threshold < _graph->n() && _graph->degree(_node_threshold) < degree_threshold;
+           _node_threshold < _n && _graph->degree(_node_threshold) < degree_threshold;
            ++_bucket_threshold) {
         _cache_offsets[_bucket_threshold] = gc_size;
         _node_threshold += _graph->bucket_size(_bucket_threshold);
@@ -160,14 +163,14 @@ class SparseGainCache {
       std::fill(_cache_offsets.begin() + _bucket_threshold, _cache_offsets.end(), gc_size);
 
       // + ... (ii) size of the sparse part (table with k entries per node)
-      gc_size += static_cast<std::size_t>(_graph->n() - _node_threshold) * _p_graph->k();
+      gc_size += static_cast<std::size_t>(_n - _node_threshold) * _k;
 
       DBG << "Initialized with degree threshold: " << degree_threshold
           << ", node threshold: " << _node_threshold << ", bucket threshold: " << _bucket_threshold;
       DBG << "Cache offsets: " << _cache_offsets;
     } else {
       // For graphs that do not have degree buckets, assign all nodes to the sparse part
-      gc_size = 1ul * _graph->n() * _p_graph->k();
+      gc_size = 1ul * _n * _k;
 
       DBG << "Graph was *not* rearranged by degree buckets: using the sparse strategy only (i.e., "
              "using node threshold: "
@@ -186,9 +189,9 @@ class SparseGainCache {
       _gain_cache.resize(gc_size);
     }
 
-    if (_weighted_degrees.size() < _graph->n()) {
+    if (_weighted_degrees.size() < _n) {
       SCOPED_TIMER("Allocation");
-      _weighted_degrees.resize(_graph->n());
+      _weighted_degrees.resize(_n);
     }
 
     init_buckets();
@@ -220,7 +223,7 @@ class SparseGainCache {
     if (in_sparse_part(node)) {
       const EdgeWeight conn_from = kIteratesExactGains ? conn_sparse(node, from) : 0;
 
-      for (BlockID to = 0; to < _p_graph->k(); ++to) {
+      for (BlockID to = 0; to < _k; ++to) {
         if (from == to) {
           continue;
         }
@@ -239,15 +242,12 @@ class SparseGainCache {
 
       if constexpr (kIteratesNonadjacentBlocks) {
         auto &buffer = _dense_buffer_ets.local();
-        if (buffer.capacity() < _p_graph->k()) {
-          buffer.resize(_p_graph->k());
-        }
 
         create_dense_wrapper(node).for_each([&](const BlockID to, const EdgeWeight conn_to) {
           buffer.set(to, conn_to);
         });
 
-        for (BlockID to = 0; to < _p_graph->k(); ++to) {
+        for (BlockID to = 0; to < _k; ++to) {
           if (from != to) {
             lambda(to, [&] { return buffer.get(to) - conn_from; });
           }
@@ -438,7 +438,7 @@ class SparseGainCache {
 
   [[nodiscard]] KAMINPAR_INLINE std::size_t
   index_sparse(const NodeID node, const BlockID block) const {
-    return _sparse_offset + 1ull * (node - _node_threshold) * _p_graph->k() + block;
+    return _sparse_offset + 1ull * (node - _node_threshold) * _k + block;
   }
 
   [[nodiscard]] KAMINPAR_INLINE EdgeWeight
@@ -508,7 +508,7 @@ class SparseGainCache {
 
   [[nodiscard]] bool dbg_check_cached_gain_for_node(const NodeID u) const {
     const BlockID block_u = _p_graph->block(u);
-    std::vector<EdgeWeight> actual_external_degrees(_p_graph->k(), 0);
+    std::vector<EdgeWeight> actual_external_degrees(_k, 0);
     EdgeWeight actual_weighted_degree = 0;
 
     _graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) {
@@ -518,7 +518,7 @@ class SparseGainCache {
       actual_external_degrees[block_v] += weight;
     });
 
-    for (BlockID b = 0; b < _p_graph->k(); ++b) {
+    for (BlockID b = 0; b < _k; ++b) {
       if (actual_external_degrees[b] != weighted_degree_to(u, b)) {
         LOG_WARNING << "For node " << u << ": cached weighted degree to block " << b << " is "
                     << weighted_degree_to(u, b) << " but should be " << actual_external_degrees[b];
@@ -540,6 +540,9 @@ class SparseGainCache {
   const Graph *_graph = nullptr;
   const PartitionedGraph *_p_graph = nullptr;
 
+  NodeID _n = kInvalidNodeID;
+  BlockID _k = kInvalidBlockID;
+
   // First node ID assigned to the sparse part of the gain cache
   NodeID _node_threshold = kInvalidNodeID;
 
@@ -562,7 +565,7 @@ class SparseGainCache {
 
   mutable tbb::enumerable_thread_specific<Statistics> _stats_ets;
   mutable tbb::enumerable_thread_specific<FastResetArray<EdgeWeight>> _dense_buffer_ets{[&] {
-    return FastResetArray<EdgeWeight>(0);
+    return FastResetArray<EdgeWeight>(_k);
   }};
 };