Skip to content

Commit

Permalink
fix: make refactored gain cache more like the original to track perfo…
Browse files Browse the repository at this point in the history
…rmance degregation
  • Loading branch information
DanielSeemaier committed Aug 9, 2024
1 parent c6b1e49 commit 4d9ba81
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 21 deletions.
10 changes: 5 additions & 5 deletions kaminpar-shm/refinement/fm/fm_refiner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -451,11 +451,11 @@ template <typename Graph, typename GainCache> class FMRefinerCore : public Refin
SCOPED_HEAP_PROFILER("FM");
SCOPED_TIMER("FM");

const auto &graph = p_graph.concretize<Graph>();
const Graph &graph = p_graph.concretize<Graph>();

START_TIMER("Initialize gain cache");
_shared->gain_cache.initialize(graph, p_graph);
STOP_TIMER();
TIMED_SCOPE("Initialize gain cache") {
_shared->gain_cache.initialize(graph, p_graph);
};

const EdgeWeight initial_cut = metrics::edge_cut(p_graph);
EdgeWeight cut_before_current_iteration = initial_cut;
Expand Down Expand Up @@ -576,7 +576,7 @@ FMRefiner::~FMRefiner() = default;

void FMRefiner::initialize(const PartitionedGraph &p_graph) {
if (!_core) {
p_graph.graph().reified([&]<typename Graph>(Graph &graph) {
p_graph.reified([&]<typename Graph>(Graph &graph) {
switch (_ctx.refinement.kway_fm.gain_cache_strategy) {
case GainCacheStrategy::SPARSE:
_core = std::make_unique<FMRefinerCore<Graph, NormalSparseGainCache<Graph>>>(_ctx);
Expand Down
35 changes: 19 additions & 16 deletions kaminpar-shm/refinement/gains/sparse_gain_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,9 @@ class SparseGainCache {
_graph = &graph;
_p_graph = &p_graph;

_n = _graph->n();
_k = _p_graph->k();

_node_threshold = 0;
_bucket_threshold = 0;
_cache_offsets[0] = 0;
Expand All @@ -129,7 +132,7 @@ class SparseGainCache {
// value in the same 32 bit / 64 bit integer (depending on the size of the EdgeWeight data
// type).
// Thus, we compute the number of bits that we must reserve for the block IDs.
_bits_for_key = math::ceil_log2(_p_graph->k());
_bits_for_key = math::ceil_log2(_k);
DBG << "Reserve " << (sizeof(UnsignedEdgeWeight) * 8 - _bits_for_key) << " of "
<< sizeof(UnsignedEdgeWeight) * 8 << " bits for gain values, " << _bits_for_key
<< " bits for block IDs";
Expand All @@ -144,13 +147,13 @@ class SparseGainCache {
// up to the one determined by this degree are assigned to the dense part, the other ones to
// the sparse part.
const EdgeID degree_threshold = std::max<EdgeID>(
_p_graph->k() * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // Usually k * 1
_k * _ctx.refinement.kway_fm.k_based_high_degree_threshold, // Usually k * 1
_ctx.refinement.kway_fm.constant_high_degree_threshold // Usually 0
);

// (i) compute size of the dense part (== hash tables) ...
for (_bucket_threshold = 0;
_node_threshold < _graph->n() && _graph->degree(_node_threshold) < degree_threshold;
_node_threshold < _n && _graph->degree(_node_threshold) < degree_threshold;
++_bucket_threshold) {
_cache_offsets[_bucket_threshold] = gc_size;
_node_threshold += _graph->bucket_size(_bucket_threshold);
Expand All @@ -160,14 +163,14 @@ class SparseGainCache {
std::fill(_cache_offsets.begin() + _bucket_threshold, _cache_offsets.end(), gc_size);

// + ... (ii) size of the sparse part (table with k entries per node)
gc_size += static_cast<std::size_t>(_graph->n() - _node_threshold) * _p_graph->k();
gc_size += static_cast<std::size_t>(_n - _node_threshold) * _k;

DBG << "Initialized with degree threshold: " << degree_threshold
<< ", node threshold: " << _node_threshold << ", bucket threshold: " << _bucket_threshold;
DBG << "Cache offsets: " << _cache_offsets;
} else {
// For graphs that do not have degree buckets, assign all nodes to the sparse part
gc_size = 1ul * _graph->n() * _p_graph->k();
gc_size = 1ul * _n * _k;

DBG << "Graph was *not* rearranged by degree buckets: using the sparse strategy only (i.e., "
"using node threshold: "
Expand All @@ -186,9 +189,9 @@ class SparseGainCache {
_gain_cache.resize(gc_size);
}

if (_weighted_degrees.size() < _graph->n()) {
if (_weighted_degrees.size() < _n) {
SCOPED_TIMER("Allocation");
_weighted_degrees.resize(_graph->n());
_weighted_degrees.resize(_n);
}

init_buckets();
Expand Down Expand Up @@ -220,7 +223,7 @@ class SparseGainCache {
if (in_sparse_part(node)) {
const EdgeWeight conn_from = kIteratesExactGains ? conn_sparse(node, from) : 0;

for (BlockID to = 0; to < _p_graph->k(); ++to) {
for (BlockID to = 0; to < _k; ++to) {
if (from == to) {
continue;
}
Expand All @@ -239,15 +242,12 @@ class SparseGainCache {

if constexpr (kIteratesNonadjacentBlocks) {
auto &buffer = _dense_buffer_ets.local();
if (buffer.capacity() < _p_graph->k()) {
buffer.resize(_p_graph->k());
}

create_dense_wrapper(node).for_each([&](const BlockID to, const EdgeWeight conn_to) {
buffer.set(to, conn_to);
});

for (BlockID to = 0; to < _p_graph->k(); ++to) {
for (BlockID to = 0; to < _k; ++to) {
if (from != to) {
lambda(to, [&] { return buffer.get(to) - conn_from; });
}
Expand Down Expand Up @@ -438,7 +438,7 @@ class SparseGainCache {

[[nodiscard]] KAMINPAR_INLINE std::size_t
index_sparse(const NodeID node, const BlockID block) const {
return _sparse_offset + 1ull * (node - _node_threshold) * _p_graph->k() + block;
return _sparse_offset + 1ull * (node - _node_threshold) * _k + block;
}

[[nodiscard]] KAMINPAR_INLINE EdgeWeight
Expand Down Expand Up @@ -508,7 +508,7 @@ class SparseGainCache {

[[nodiscard]] bool dbg_check_cached_gain_for_node(const NodeID u) const {
const BlockID block_u = _p_graph->block(u);
std::vector<EdgeWeight> actual_external_degrees(_p_graph->k(), 0);
std::vector<EdgeWeight> actual_external_degrees(_k, 0);
EdgeWeight actual_weighted_degree = 0;

_graph->adjacent_nodes(u, [&](const NodeID v, const EdgeWeight weight) {
Expand All @@ -518,7 +518,7 @@ class SparseGainCache {
actual_external_degrees[block_v] += weight;
});

for (BlockID b = 0; b < _p_graph->k(); ++b) {
for (BlockID b = 0; b < _k; ++b) {
if (actual_external_degrees[b] != weighted_degree_to(u, b)) {
LOG_WARNING << "For node " << u << ": cached weighted degree to block " << b << " is "
<< weighted_degree_to(u, b) << " but should be " << actual_external_degrees[b];
Expand All @@ -540,6 +540,9 @@ class SparseGainCache {
const Graph *_graph = nullptr;
const PartitionedGraph *_p_graph = nullptr;

NodeID _n = kInvalidNodeID;
BlockID _k = kInvalidBlockID;

// First node ID assigned to the sparse part of the gain cache
NodeID _node_threshold = kInvalidNodeID;

Expand All @@ -562,7 +565,7 @@ class SparseGainCache {

mutable tbb::enumerable_thread_specific<Statistics> _stats_ets;
mutable tbb::enumerable_thread_specific<FastResetArray<EdgeWeight>> _dense_buffer_ets{[&] {
return FastResetArray<EdgeWeight>(0);
return FastResetArray<EdgeWeight>(_k);
}};
};

Expand Down

0 comments on commit 4d9ba81

Please sign in to comment.