Skip to content

Commit

Permalink
KaMinPar: clean up timer tree, track time for free'ing the dense gain…
Browse files Browse the repository at this point in the history
… cache
  • Loading branch information
DanielSeemaier committed Sep 4, 2023
1 parent d7f41ae commit 7ea7006
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 35 deletions.
9 changes: 9 additions & 0 deletions common/datastructures/static_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@
#include "common/parallel/tbb_malloc.h"

namespace kaminpar {
namespace static_array {
constexpr struct noinit_t {
} noinit;
} // namespace static_array

template <typename T> class StaticArray {
public:
class StaticArrayIterator {
Expand Down Expand Up @@ -125,6 +130,10 @@ template <typename T> class StaticArray {
resize(size, no_init{});
}

StaticArray(static_array::noinit_t, const std::size_t size) {
resize(size, no_init{});
}

template <typename Iterator>
StaticArray(Iterator first, Iterator last) : StaticArray(std::distance(first, last)) {
tbb::parallel_for<std::size_t>(0, _size, [&](const std::size_t i) { _data[i] = *(first + i); });
Expand Down
4 changes: 2 additions & 2 deletions common/timer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,12 @@ Timer &Timer::global() {

void __attribute__((noinline)) Timer::start_timer_impl() {
asm volatile("" ::: "memory");
_tree.current->start = timer::now();
_tree.current->start = std::chrono::high_resolution_clock::now();
}

void __attribute__((noinline)) Timer::stop_timer_impl() {
asm volatile("" ::: "memory");
const TimePoint end = timer::now();
const TimePoint end = std::chrono::high_resolution_clock::now();
_tree.current->elapsed += end - _tree.current->start;
}

Expand Down
15 changes: 11 additions & 4 deletions kaminpar/graphutils/subgraph_extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include "kaminpar/definitions.h"

#include "common/datastructures/scalable_vector.h"
#include "common/timer.h"

namespace kaminpar::shm::graph {
struct SubgraphMemoryStartPosition {
Expand All @@ -41,10 +42,16 @@ struct SubgraphMemory {
const bool is_node_weighted = true,
const bool is_edge_weighted = true
)
: nodes(n + k),
edges(m),
node_weights(is_node_weighted * (n + k)),
edge_weights(is_edge_weighted * m) {}
: nodes(),
edges(),
node_weights(),
edge_weights() {
SCOPED_TIMER("Allocation");
nodes.resize(n + k);
edges.resize(m);
node_weights.resize(is_node_weighted * (n + k));
edge_weights.resize(is_edge_weighted * m);
}

explicit SubgraphMemory(const PartitionedGraph &p_graph)
: SubgraphMemory(
Expand Down
2 changes: 2 additions & 0 deletions kaminpar/kaminpar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "kaminpar/definitions.h"
#include "kaminpar/graphutils/permutator.h"
#include "kaminpar/metrics.h"
#include "kaminpar/partitioning/deep_multilevel.h"
#include "kaminpar/partitioning/partitioning.h"
#include "kaminpar/presets.h"

Expand Down Expand Up @@ -173,6 +174,7 @@ EdgeWeight KaMinPar::compute_partition(const int seed, const BlockID k, BlockID
graph::integrate_isolated_nodes(*_graph_ptr, original_epsilon, _ctx);
p_graph = graph::assign_isolated_nodes(std::move(p_graph), num_isolated_nodes, _ctx.partition);
}
STOP_TIMER();

START_TIMER("IO");
if (_graph_ptr->permuted()) {
Expand Down
3 changes: 1 addition & 2 deletions kaminpar/partitioning/deep_multilevel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,12 @@ DeepMultilevelPartitioner::DeepMultilevelPartitioner(
_current_p_ctx(input_ctx.partition),
_coarsener(factory::create_coarsener(input_graph, input_ctx.coarsening)),
_refiner(factory::create_refiner(input_ctx)),
_subgraph_memory(input_graph.n(), input_ctx.partition.k, input_graph.m(), true, true) {}
_subgraph_memory(input_graph.n(), input_ctx.partition.k, input_graph.m()) {}

PartitionedGraph DeepMultilevelPartitioner::partition() {
cio::print_delimiter("Partitioning");

const Graph *c_graph = coarsen();

PartitionedGraph p_graph = initial_partition(c_graph);

bool refined;
Expand Down
1 change: 1 addition & 0 deletions kaminpar/partitioning/deep_multilevel.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class DeepMultilevelPartitioner {

DeepMultilevelPartitioner(const DeepMultilevelPartitioner &) = delete;
DeepMultilevelPartitioner &operator=(const DeepMultilevelPartitioner &) = delete;

DeepMultilevelPartitioner(DeepMultilevelPartitioner &&) = delete;
DeepMultilevelPartitioner &operator=(DeepMultilevelPartitioner &&) = delete;

Expand Down
16 changes: 4 additions & 12 deletions kaminpar/partitioning/partitioning.cc
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,14 @@ namespace kaminpar::shm::partitioning {
PartitionedGraph partition(const Graph &graph, const Context &ctx) {
switch (ctx.mode) {
case PartitioningMode::DEEP: {
START_TIMER("Partitioning");
START_TIMER("Allocation");
DeepMultilevelPartitioner rb{graph, ctx};
STOP_TIMER();
auto p_graph = rb.partition();
STOP_TIMER();
DeepMultilevelPartitioner deep(graph, ctx);
PartitionedGraph p_graph = deep.partition();
return p_graph;
}

case PartitioningMode::RB: {
START_TIMER("Partitioning");
START_TIMER("Allocation");
RBMultilevelPartitioner rb{graph, ctx};
STOP_TIMER();
auto p_graph = rb.partition();
STOP_TIMER();
RBMultilevelPartitioner rb(graph, ctx);
PartitionedGraph p_graph = rb.partition();
return p_graph;
}

Expand Down
39 changes: 28 additions & 11 deletions kaminpar/refinement/fm_refiner.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

#include <cmath>

#include <tbb/parallel_invoke.h>

#include "kaminpar/context.h"
#include "kaminpar/datastructures/delta_partitioned_graph.h"
#include "kaminpar/datastructures/graph.h"
Expand Down Expand Up @@ -69,9 +71,7 @@ class NodeTracker {
static constexpr int MOVED_LOCALLY = -1;
static constexpr int MOVED_GLOBALLY = -2;

NodeTracker(const NodeID max_n) : _state(max_n) {
tbb::parallel_for<std::size_t>(0, max_n, [&](const std::size_t i) { _state[i] = 0; });
}
NodeTracker(const NodeID max_n) : _state(max_n) {}

bool lock(const NodeID u, const int id) {
int free = 0;
Expand All @@ -89,8 +89,12 @@ class NodeTracker {
__atomic_store_n(&_state[node], value, __ATOMIC_RELAXED);
}

void free() {
_state.free();
}

private:
NoinitVector<int> _state;
StaticArray<int> _state;
};

class BorderNodes {
Expand Down Expand Up @@ -169,18 +173,31 @@ struct SharedData {
: node_tracker(max_n),
gain_cache(max_n, max_k),
border_nodes(gain_cache, node_tracker),
shared_pq_handles(max_n),
target_blocks(max_n) {
tbb::parallel_for<std::size_t>(0, shared_pq_handles.size(), [&](std::size_t i) {
shared_pq_handles[i] = SharedBinaryMaxHeap<EdgeWeight>::kInvalidID;
});
shared_pq_handles(max_n, SharedBinaryMaxHeap<EdgeWeight>::kInvalidID),
target_blocks(static_array::noinit, max_n) {}

SharedData(const SharedData &) = delete;
SharedData &operator=(const SharedData &) = delete;

SharedData(SharedData &&) noexcept = default;
SharedData &operator=(SharedData &&) = delete;

~SharedData() {
START_TIMER("Free shared FM refiner state");
tbb::parallel_invoke(
[&] { shared_pq_handles.free(); },
[&] { target_blocks.free(); },
[&] { node_tracker.free(); },
[&] { gain_cache.free(); }
);
STOP_TIMER();
}

NodeTracker node_tracker;
DenseGainCache gain_cache;
BorderNodes border_nodes;
NoinitVector<std::size_t> shared_pq_handles;
NoinitVector<BlockID> target_blocks;
StaticArray<std::size_t> shared_pq_handles;
StaticArray<BlockID> target_blocks;
GlobalStats stats;
GlobalBatchStats batch_stats;
};
Expand Down
21 changes: 17 additions & 4 deletions kaminpar/refinement/gain_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include <kassert/kassert.hpp>
#include <tbb/parallel_for.h>
#include <tbb/parallel_invoke.h>

#include "kaminpar/context.h"
#include "kaminpar/datastructures/delta_partitioned_graph.h"
Expand All @@ -18,6 +19,7 @@
#include "common/datastructures/dynamic_map.h"
#include "common/datastructures/noinit_vector.h"
#include "common/logger.h"
#include "common/timer.h"

namespace kaminpar::shm {
template <typename GainCache, bool use_sparsehash = false> class DeltaGainCache;
Expand All @@ -29,8 +31,11 @@ class DenseGainCache {
DenseGainCache(const NodeID max_n, const BlockID max_k)
: _max_n(max_n),
_max_k(max_k),
_gain_cache(static_cast<std::size_t>(_max_n) * static_cast<std::size_t>(_max_k)),
_weighted_degrees(_max_n) {}
_gain_cache(
static_array::noinit,
static_cast<std::size_t>(_max_n) * static_cast<std::size_t>(_max_k)
),
_weighted_degrees(static_array::noinit, _max_n) {}

void initialize(const PartitionedGraph &p_graph) {
KASSERT(p_graph.n() <= _max_n, "gain cache is too small");
Expand All @@ -39,8 +44,16 @@ class DenseGainCache {
_n = p_graph.n();
_k = p_graph.k();

START_TIMER("Reset");
reset();
STOP_TIMER();
START_TIMER("Recompute");
recompute_all(p_graph);
STOP_TIMER();
}

void free() {
tbb::parallel_invoke([&] { _gain_cache.free(); }, [&] { _weighted_degrees.free(); });
}

EdgeWeight gain(const NodeID node, const BlockID block_from, const BlockID block_to) const {
Expand Down Expand Up @@ -153,8 +166,8 @@ class DenseGainCache {
NodeID _n;
BlockID _k;

NoinitVector<EdgeWeight> _gain_cache;
NoinitVector<EdgeWeight> _weighted_degrees;
StaticArray<EdgeWeight> _gain_cache;
StaticArray<EdgeWeight> _weighted_degrees;
};

template <typename GainCache, bool use_sparsehash> class DeltaGainCache {
Expand Down

0 comments on commit 7ea7006

Please sign in to comment.