From 5c530ba3323889018031b71f76047d97335d5c24 Mon Sep 17 00:00:00 2001
From: Daniel Seemaier <daniel+github@seemaier.de>
Date: Wed, 10 Jul 2024 12:58:30 +0200
Subject: [PATCH] wip(shm-fm): more duplicated gain cache

---
 kaminpar-shm/context_io.cc                    |   9 +-
 kaminpar-shm/kaminpar.h                       |   3 +-
 kaminpar-shm/presets.cc                       |   2 +-
 kaminpar-shm/refinement/fm/fm_refiner.cc      |   7 +-
 .../refinement/gains/denser_gain_cache.h      | 136 ++++--------------
 5 files changed, 38 insertions(+), 119 deletions(-)
diff --git a/kaminpar-shm/context_io.cc b/kaminpar-shm/context_io.cc
index bfbcf603..a246d76a 100644
--- a/kaminpar-shm/context_io.cc
+++ b/kaminpar-shm/context_io.cc
@@ -262,8 +262,9 @@ std::unordered_map<std::string, GainCacheStrategy> get_gain_cache_strategies() {
   return {
       {"sparse", GainCacheStrategy::SPARSE},
       {"dense", GainCacheStrategy::DENSE},
+      {"dense-largek", GainCacheStrategy::DENSE_LARGE_K},
       {"denser", GainCacheStrategy::DENSER},
-      {"largek", GainCacheStrategy::LARGE_K},
+      {"denser-largek", GainCacheStrategy::DENSER_LARGE_K},
       {"on-the-fly", GainCacheStrategy::ON_THE_FLY},
       {"hybrid", GainCacheStrategy::HYBRID},
       {"tracing", GainCacheStrategy::TRACING},
@@ -276,10 +277,12 @@ std::ostream &operator<<(std::ostream &out, const GainCacheStrategy strategy) {
     return out << "sparse";
   case GainCacheStrategy::DENSE:
     return out << "dense";
+  case GainCacheStrategy::DENSE_LARGE_K:
+    return out << "dense-largek";
   case GainCacheStrategy::DENSER:
     return out << "denser";
-  case GainCacheStrategy::LARGE_K:
-    return out << "largek";
+  case GainCacheStrategy::DENSER_LARGE_K:
+    return out << "denser-largek";
   case GainCacheStrategy::ON_THE_FLY:
     return out << "on-the-fly";
   case GainCacheStrategy::HYBRID:
diff --git a/kaminpar-shm/kaminpar.h b/kaminpar-shm/kaminpar.h
index 3ef089cc..b5e79ddb 100644
--- a/kaminpar-shm/kaminpar.h
+++ b/kaminpar-shm/kaminpar.h
@@ -208,8 +208,9 @@ enum class FMStoppingRule {
 enum class GainCacheStrategy {
   SPARSE,
   DENSE,
+  DENSE_LARGE_K,
   DENSER,
-  LARGE_K,
+  DENSER_LARGE_K,
   ON_THE_FLY,
   HYBRID,
   TRACING,
diff --git a/kaminpar-shm/presets.cc b/kaminpar-shm/presets.cc
index 27eb3e52..ddceb2e5 100644
--- a/kaminpar-shm/presets.cc
+++ b/kaminpar-shm/presets.cc
@@ -313,7 +313,7 @@ Context create_largek_fm_context() {
       RefinementAlgorithm::GREEDY_BALANCER,
   };
 
-  ctx.refinement.kway_fm.gain_cache_strategy = GainCacheStrategy::LARGE_K;
+  ctx.refinement.kway_fm.gain_cache_strategy = GainCacheStrategy::DENSE_LARGE_K;
 
   return ctx;
 }
diff --git a/kaminpar-shm/refinement/fm/fm_refiner.cc b/kaminpar-shm/refinement/fm/fm_refiner.cc
index 46904249..c4101471 100644
--- a/kaminpar-shm/refinement/fm/fm_refiner.cc
+++ b/kaminpar-shm/refinement/fm/fm_refiner.cc
@@ -39,12 +39,15 @@ std::unique_ptr<Refiner> create_fm_refiner(const Context &ctx) {
   case GainCacheStrategy::DENSE:
     return std::make_unique<FMRefiner<DenseGainCache<true, DenseDeltaGainCache>>>(ctx);
 
-  case GainCacheStrategy::LARGE_K:
+  case GainCacheStrategy::DENSE_LARGE_K:
     return std::make_unique<FMRefiner<DenseGainCache<false, LargeKDenseDeltaGainCache>>>(ctx);
 
 #ifdef KAMINPAR_EXPERIMENTAL
   case GainCacheStrategy::DENSER:
-    return std::make_unique<FMRefiner<DenserGainCache<true, DenserDeltaGainCache>>>(ctx);
+    return std::make_unique<FMRefiner<DenserGainCache<true, DenseDeltaGainCache>>>(ctx);
+
+  case GainCacheStrategy::DENSER_LARGE_K:
+    return std::make_unique<FMRefiner<DenserGainCache<true, LargeKDenseDeltaGainCache>>>(ctx);
 
   case GainCacheStrategy::SPARSE:
     return std::make_unique<FMRefiner<SparseGainCache<true>>>(ctx);
diff --git a/kaminpar-shm/refinement/gains/denser_gain_cache.h b/kaminpar-shm/refinement/gains/denser_gain_cache.h
index 52afbafa..70f66be9 100644
--- a/kaminpar-shm/refinement/gains/denser_gain_cache.h
+++ b/kaminpar-shm/refinement/gains/denser_gain_cache.h
@@ -22,10 +22,10 @@
 #include <tbb/parallel_invoke.h>
 
 #include "kaminpar-shm/datastructures/partitioned_graph.h"
+#include "kaminpar-shm/refinement/gains/dense_gain_cache.h"
 
 #include "kaminpar-common/assert.h"
 #include "kaminpar-common/datastructures/compact_hash_map.h"
-#include "kaminpar-common/datastructures/dynamic_map.h"
 #include "kaminpar-common/datastructures/fast_reset_array.h"
 #include "kaminpar-common/datastructures/static_array.h"
 #include "kaminpar-common/degree_buckets.h"
@@ -34,12 +34,9 @@
 #include "kaminpar-common/timer.h"
 
 namespace kaminpar::shm {
-template <typename DeltaPartitionedGraph, typename GainCache> class DenserDeltaGainCache;
-template <typename DeltaPartitionedGraph, typename GainCache> class LargeKDenserDeltaGainCache;
-
 template <
     bool iterate_nonadjacent_blocks = true,
-    template <typename, typename> typename DeltaGainCache = DenserDeltaGainCache,
+    template <typename, typename> typename DeltaGainCache = DenseDeltaGainCache,
     bool iterate_exact_gains = false>
 class DenserGainCache {
   SET_DEBUG(false);
@@ -120,43 +117,31 @@ class DenserGainCache {
     // Size of the gain cache (dense + sparse part)
     std::size_t gc_size = 0;
 
-    if (p_graph.sorted()) {
-      DBG << "Graph was rearranged by degree buckets: using the mixed dense-sparse strategy";
-
-      // Compute the degree that we use to determine the threshold degree bucket: nodes in buckets
-      // up to the one determined by this degree are assigned to the dense part, the other ones to
-      // the sparse part.
-      const EdgeID degree_threshold = std::max<EdgeID>(
-          _k * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // usually k * 1
-          _ctx.refinement.kway_fm.constant_high_degree_threshold      // usually 0
-      );
-
-      // (i) compute size of the dense part (== hash tables) ...
-      for (_bucket_threshold = 0;
-           _node_threshold < p_graph.n() && p_graph.degree(_node_threshold) < degree_threshold;
-           ++_bucket_threshold) {
-        _cache_offsets[_bucket_threshold] = gc_size;
-        _node_threshold += p_graph.bucket_size(_bucket_threshold);
-        gc_size += p_graph.bucket_size(_bucket_threshold) *
-                   (lowest_degree_in_bucket<NodeID>(_bucket_threshold + 1));
-      }
-      std::fill(_cache_offsets.begin() + _bucket_threshold, _cache_offsets.end(), gc_size);
+    // Compute the degree that we use to determine the threshold degree bucket: nodes in buckets
+    // up to the one determined by this degree are assigned to the dense part, the other ones to
+    // the sparse part.
+    const EdgeID degree_threshold = std::max<EdgeID>(
+        _k * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // usually k * 1
+        _ctx.refinement.kway_fm.constant_high_degree_threshold      // usually 0
+    );
 
-      // + ... (ii) size of the sparse part (table with k entries per node)
-      gc_size += static_cast<std::size_t>(p_graph.n() - _node_threshold) * _k;
+    // (i) compute size of the dense part (== hash tables) ...
+    for (_bucket_threshold = 0;
+         _node_threshold < p_graph.n() && p_graph.degree(_node_threshold) < degree_threshold;
+         ++_bucket_threshold) {
+      _cache_offsets[_bucket_threshold] = gc_size;
+      _node_threshold += p_graph.bucket_size(_bucket_threshold);
+      gc_size += p_graph.bucket_size(_bucket_threshold) *
+                 (lowest_degree_in_bucket<NodeID>(_bucket_threshold + 1));
+    }
+    std::fill(_cache_offsets.begin() + _bucket_threshold, _cache_offsets.end(), gc_size);
 
-      DBG << "Initialized with degree threshold: " << degree_threshold
-          << ", node threshold: " << _node_threshold << ", bucket threshold: " << _bucket_threshold;
-      DBG << "Cache offsets: " << _cache_offsets;
-    } else {
-      // For graphs that do not have degree buckets, assign all nodes to the sparse part
-      gc_size = 1ul * _n * _k;
+    // + ... (ii) size of the sparse part (table with k entries per node)
+    gc_size += static_cast<std::size_t>(p_graph.n() - _node_threshold) * _k;
 
-      DBG << "Graph was *not* rearranged by degree buckets: using the sparse strategy only (i.e., "
-             "using node threshold: "
-          << _node_threshold << ", bucket threshold: " << _bucket_threshold << ")";
-      DBG << "Cache offsets: " << _cache_offsets;
-    }
+    DBG << "Initialized with degree threshold: " << degree_threshold
+        << ", node threshold: " << _node_threshold << ", bucket threshold: " << _bucket_threshold;
+    DBG << "Cache offsets: " << _cache_offsets;
 
     _sparse_offset = _cache_offsets[_bucket_threshold];
 
@@ -552,77 +537,4 @@ class DenserGainCache {
     return FastResetArray<EdgeWeight>(_k);
   }};
 };
-
-template <typename _DeltaPartitionedGraph, typename _GainCache> class DenserDeltaGainCache {
-public:
-  using DeltaPartitionedGraph = _DeltaPartitionedGraph;
-  using GainCache = _GainCache;
-
-  // Delta gain caches should only be used with GainCaches that iterate over all blocks, since there
-  // might be new connections to non-adjacent blocks in the delta graph. These connections might be
-  // missed if the gain cache does not iterate over all blocks.
-  constexpr static bool kIteratesExactGains = GainCache::kIteratesExactGains;
-  static_assert(GainCache::kIteratesNonadjacentBlocks);
-
-  DenserDeltaGainCache(const GainCache &gain_cache, const DeltaPartitionedGraph &d_graph)
-      : _k(d_graph.k()),
-        _gain_cache(gain_cache) {}
-
-  [[nodiscard]] KAMINPAR_INLINE EdgeWeight conn(const NodeID node, const BlockID block) const {
-    return _gain_cache.conn(node, block) + conn_delta(node, block);
-  }
-
-  [[nodiscard]] KAMINPAR_INLINE EdgeWeight
-  gain(const NodeID node, const BlockID from, const BlockID to) const {
-    return _gain_cache.gain(node, from, to) + conn_delta(node, to) - conn_delta(node, from);
-  }
-
-  [[nodiscard]] KAMINPAR_INLINE std::pair<EdgeWeight, EdgeWeight>
-  gain(const NodeID node, const BlockID b_node, const std::pair<BlockID, BlockID> &targets) {
-    return {gain(node, b_node, targets.first), gain(node, b_node, targets.second)};
-  }
-
-  template <typename Lambda>
-  KAMINPAR_INLINE void gains(const NodeID node, const BlockID from, Lambda &&lambda) const {
-    const EdgeWeight conn_from_delta = kIteratesExactGains ? conn_delta(node, from) : 0;
-
-    _gain_cache.gains(node, from, [&](const BlockID to, auto &&gain) {
-      lambda(to, [&] { return gain() + conn_delta(node, to) - conn_from_delta; });
-    });
-  }
-
-  KAMINPAR_INLINE void move(
-      const DeltaPartitionedGraph &d_graph,
-      const NodeID u,
-      const BlockID block_from,
-      const BlockID block_to
-  ) {
-    d_graph.adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) {
-      _gain_cache_delta[index(v, block_from)] -= weight;
-      _gain_cache_delta[index(v, block_to)] += weight;
-    });
-  }
-
-  KAMINPAR_INLINE void clear() {
-    _gain_cache_delta.clear();
-  }
-
-private:
-  [[nodiscard]] KAMINPAR_INLINE std::size_t index(const NodeID node, const BlockID block) const {
-    // Note: this increases running times substantially due to the shifts
-    // return index_sparse(node, block);
-
-    return 1ull * node * _k + block;
-  }
-
-  [[nodiscard]] KAMINPAR_INLINE EdgeWeight
-  conn_delta(const NodeID node, const BlockID block) const {
-    const auto it = _gain_cache_delta.get_if_contained(index(node, block));
-    return it != _gain_cache_delta.end() ? *it : 0;
-  }
-
-  BlockID _k;
-  const GainCache &_gain_cache;
-  DynamicFlatMap<std::size_t, EdgeWeight> _gain_cache_delta;
-};
 } // namespace kaminpar::shm