Skip to content

Commit

Permalink
integrate community detection
Browse files Browse the repository at this point in the history
  • Loading branch information
N-Maas committed Oct 7, 2024
1 parent 46fe967 commit 14feffd
Show file tree
Hide file tree
Showing 8 changed files with 111 additions and 39 deletions.
3 changes: 2 additions & 1 deletion mt-kahypar/datastructures/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ set(ToolsDatastructureSources
dynamic_graph.cpp
dynamic_graph_factory.cpp
dynamic_adjacency_array.cpp
fixed_vertex_support.cpp)
fixed_vertex_support.cpp
graph.cpp)

foreach(modtarget IN LISTS TOOLS_TARGETS)
target_sources(${modtarget} PRIVATE ${ToolsDatastructureSources})
Expand Down
1 change: 1 addition & 0 deletions mt-kahypar/datastructures/buffered_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#pragma once

#include <vector>
#include <cassert>
#include <tbb/scalable_allocator.h>
#include <tbb/enumerable_thread_specific.h>

Expand Down
2 changes: 1 addition & 1 deletion mt-kahypar/partition/partitioner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ namespace mt_kahypar {
}
timer.stop_timer("construct_graph");
timer.start_timer("perform_community_detection", "Perform Community Detection");
ds::Clustering communities = community_detection::run_parallel_louvain(graph, context);
ds::Clustering communities = community_detection::run_parallel_louvain(graph, context).back().first;
graph.restrictClusteringToHypernodes(hypergraph, communities);
hypergraph.setCommunityIDs(std::move(communities));
timer.stop_timer("perform_community_detection");
Expand Down
6 changes: 5 additions & 1 deletion mt-kahypar/partition/preprocessing/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,8 @@ set(PreprocessingSources

foreach(modtarget IN LISTS PARTITIONING_SUITE_TARGETS)
target_sources(${modtarget} PRIVATE ${PreprocessingSources})
endforeach()
endforeach()

foreach(modtarget IN LISTS TOOLS_TARGETS)
target_sources(${modtarget} PRIVATE ${PreprocessingSources})
endforeach()
Original file line number Diff line number Diff line change
Expand Up @@ -34,49 +34,66 @@
namespace mt_kahypar::community_detection {

template<typename Hypergraph>
ds::Clustering local_moving_contract_recurse(Graph<Hypergraph>& fine_graph,
ParallelLocalMovingModularity<Hypergraph>& mlv,
const Context& context) {
std::vector<std::pair<ds::Clustering, double>> local_moving_contract_recurse(Graph<Hypergraph>& fine_graph,
ParallelLocalMovingModularity<Hypergraph>& mlv,
const Context& context) {
utils::Timer& timer = utils::Utilities::instance().getTimer(context.utility_id);
timer.start_timer("local_moving", "Local Moving");
ds::Clustering communities(fine_graph.numNodes());
bool communities_changed = mlv.localMoving(fine_graph, communities);
ds::Clustering own_communities(fine_graph.numNodes());
bool communities_changed = mlv.localMoving(fine_graph, own_communities);
timer.stop_timer("local_moving");
std::vector<std::pair<ds::Clustering, double>> result;

if (communities_changed) {
timer.start_timer("contraction_cd", "Contraction");
// Contract Communities
Graph<Hypergraph> coarse_graph = fine_graph.contract(communities, context.preprocessing.community_detection.low_memory_contraction);
Graph<Hypergraph> coarse_graph = fine_graph.contract(own_communities, context.preprocessing.community_detection.low_memory_contraction);
ASSERT(coarse_graph.totalVolume() == fine_graph.totalVolume());
timer.stop_timer("contraction_cd");

double new_modularity = 0;
double factor = 1 / coarse_graph.totalVolume();
for (NodeID node: coarse_graph.nodes()) {
double contribution = coarse_graph.nodeVolume(node);
for (const Arc& arc : coarse_graph.arcsOf(node)) {
contribution -= arc.weight; // only count internal edges
}
contribution -= factor * coarse_graph.nodeVolume(node) * coarse_graph.nodeVolume(node);
new_modularity += factor * contribution;
}
result.emplace_back(own_communities, new_modularity);

// Recurse on contracted graph
ds::Clustering coarse_communities = local_moving_contract_recurse(coarse_graph, mlv, context);
auto coarse_communities = local_moving_contract_recurse(coarse_graph, mlv, context);

timer.start_timer("project", "Project");
// Prolong Clustering
tbb::parallel_for(UL(0), fine_graph.numNodes(), [&](const NodeID u) {
ASSERT(communities[u] < static_cast<PartitionID>(coarse_communities.size()));
communities[u] = coarse_communities[communities[u]];
});
for (const auto& [comm, modularity]: coarse_communities) {
ds::Clustering communities(own_communities); // yes, this is an intentional copy
tbb::parallel_for(UL(0), fine_graph.numNodes(), [&](const NodeID u) {
ASSERT(communities[u] < static_cast<PartitionID>(comm.size()));
communities[u] = comm[communities[u]];
});
result.emplace_back(std::move(communities), modularity);
}
timer.stop_timer("project");
}

return communities;
return result;
}

template<typename Hypergraph>
ds::Clustering run_parallel_louvain(Graph<Hypergraph>& graph,
std::vector<std::pair<ds::Clustering, double>> run_parallel_louvain(Graph<Hypergraph>& graph,
const Context& context,
bool disable_randomization) {
ParallelLocalMovingModularity<Hypergraph> mlv(context, graph.numNodes(), disable_randomization);
ds::Clustering communities = local_moving_contract_recurse(graph, mlv, context);
return communities;
auto result = local_moving_contract_recurse(graph, mlv, context);
return result;
}

namespace {
#define LOCAL_MOVING(X) ds::Clustering local_moving_contract_recurse(Graph<X>&, ParallelLocalMovingModularity<X>&, const Context&)
#define PARALLEL_LOUVAIN(X) ds::Clustering run_parallel_louvain(Graph<X>&, const Context&, bool)
#define LOCAL_MOVING(X) std::vector<std::pair<ds::Clustering, double>> local_moving_contract_recurse(Graph<X>&, ParallelLocalMovingModularity<X>&, const Context&)
#define PARALLEL_LOUVAIN(X) std::vector<std::pair<ds::Clustering, double>> run_parallel_louvain(Graph<X>&, const Context&, bool)
}

INSTANTIATE_FUNC_WITH_HYPERGRAPHS(LOCAL_MOVING)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,11 +32,11 @@

namespace mt_kahypar::community_detection {
template<typename Hypergraph>
ds::Clustering local_moving_contract_recurse(Graph<Hypergraph>& fine_graph,
std::vector<std::pair<ds::Clustering, double>> local_moving_contract_recurse(Graph<Hypergraph>& fine_graph,
ParallelLocalMovingModularity<Hypergraph>& mlv,
const Context& context);
template<typename Hypergraph>
ds::Clustering run_parallel_louvain(Graph<Hypergraph>& graph,
std::vector<std::pair<ds::Clustering, double>> run_parallel_louvain(Graph<Hypergraph>& graph,
const Context& context,
bool disable_randomization = false);
}
75 changes: 62 additions & 13 deletions tools/ml_graph_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "mt-kahypar/partition/context.h"
#include "mt-kahypar/io/hypergraph_factory.h"
#include "mt-kahypar/io/hypergraph_io.h"
#include "mt-kahypar/partition/preprocessing/community_detection/parallel_louvain.h"
#include "mt-kahypar/utils/cast.h"
#include "mt-kahypar/utils/delete.h"
#include "mt-kahypar/utils/hypergraph_statistics.h"
Expand All @@ -59,8 +60,8 @@
using namespace mt_kahypar;
namespace po = boost::program_options;

using Graph = ds::StaticGraph;

using StaticGraph = ds::StaticGraph;
using LouvainGraph = ds::Graph<ds::StaticGraph>;

enum class FeatureType {
floatingpoint,
Expand Down Expand Up @@ -468,7 +469,8 @@ bool float_eq(double left, double right) {
}


std::pair<GlobalFeatures, std::vector<uint64_t>> computeGlobalFeatures(const Graph& graph) {
std::pair<GlobalFeatures, std::vector<uint64_t>> computeGlobalFeatures(const StaticGraph& graph,
std::vector<std::pair<ds::Clustering, double>>& community_stack) {
GlobalFeatures features;

std::vector<uint64_t> hn_degrees;
Expand All @@ -479,7 +481,7 @@ std::pair<GlobalFeatures, std::vector<uint64_t>> computeGlobalFeatures(const Gra
});

HypernodeID num_nodes = graph.initialNumNodes();
HyperedgeID num_edges = Graph::is_graph ? graph.initialNumEdges() / 2 : graph.initialNumEdges();
HyperedgeID num_edges = StaticGraph::is_graph ? graph.initialNumEdges() / 2 : graph.initialNumEdges();
Statistic degree_stats = createStats(hn_degrees, true);
features.n = num_nodes;
features.m = num_edges;
Expand All @@ -497,11 +499,33 @@ std::pair<GlobalFeatures, std::vector<uint64_t>> computeGlobalFeatures(const Gra
}
}

// TODO: modularity
// modularity features
ds::DynamicSparseMap<PartitionID, uint32_t> comm_set;
auto modularity_features = [&](size_t i) {
const auto& [clustering, modularity] = community_stack.at(community_stack.size() - i - 1);
comm_set.clear();
for (PartitionID c: clustering) {
comm_set[c] = 0;
}
uint64_t n_comms = 0;
for (auto _: comm_set) {
n_comms++;
}
return std::make_pair(n_comms, modularity);
};
std::tie(features.n_communities_0, features.modularity_0) = modularity_features(0);
std::tie(features.n_communities_1, features.modularity_1) = modularity_features(1);
std::tie(features.n_communities_2, features.modularity_2) = modularity_features(2);
if (community_stack.size() > 3 && features.n_communities_1 < 2 * features.n_communities_0) {
// small hack to get more meaningful features
std::tie(features.n_communities_1, features.modularity_1) = modularity_features(2);
std::tie(features.n_communities_2, features.modularity_2) = modularity_features(3);
}

return {features, hn_degrees};
}

N1Features n1FeaturesFromNeighborhood(const Graph& graph, const std::vector<uint64_t>& global_degrees, const NeighborhoodResult& data, CliqueComputation* c_comp) {
N1Features n1FeaturesFromNeighborhood(const StaticGraph& graph, const std::vector<uint64_t>& global_degrees, const NeighborhoodResult& data, CliqueComputation* c_comp) {
N1Features result;
HypernodeID num_nodes = data.n1_list.size();
result.degree = num_nodes;
Expand Down Expand Up @@ -562,7 +586,7 @@ N1Features n1FeaturesFromNeighborhood(const Graph& graph, const std::vector<uint
return result;
}

N2Features n2FeaturesFromNeighborhood(const Graph& graph, const NeighborhoodResult& data) {
N2Features n2FeaturesFromNeighborhood(const StaticGraph& graph, const NeighborhoodResult& data) {
ALWAYS_ASSERT(data.includes_two_hop);
N2Features result;
HypernodeID num_nodes = data.n2_list.size();
Expand Down Expand Up @@ -604,7 +628,7 @@ N2Features n2FeaturesFromNeighborhood(const Graph& graph, const NeighborhoodResu
return result;
}

std::vector<std::tuple<HypernodeID, N1Features, N2Features>> computeNodeFeatures(const Graph& graph, const std::vector<uint64_t>& global_degrees) {
std::vector<std::tuple<HypernodeID, N1Features, N2Features>> computeNodeFeatures(const StaticGraph& graph, const std::vector<uint64_t>& global_degrees) {
std::vector<std::tuple<HypernodeID, N1Features, N2Features>> result;
result.resize(graph.initialNumNodes());

Expand All @@ -622,7 +646,8 @@ std::vector<std::tuple<HypernodeID, N1Features, N2Features>> computeNodeFeatures
return result;
}

std::vector<std::tuple<HypernodeID, HypernodeID, EdgeFeatures>> computeEdgeFeatures(const Graph& graph, const std::vector<uint64_t>& global_degrees) {
std::vector<std::tuple<HypernodeID, HypernodeID, EdgeFeatures>> computeEdgeFeatures(const StaticGraph& graph, const std::vector<uint64_t>& global_degrees,
const std::vector<std::pair<ds::Clustering, double>>& community_stack) {
tbb::enumerable_thread_specific<std::vector<std::tuple<HypernodeID, HypernodeID, EdgeFeatures>>> result_list;
tbb::enumerable_thread_specific<NeighborhoodComputation> base_neighborhood(graph.initialNumNodes());
tbb::enumerable_thread_specific<NeighborhoodComputation> result_neighborhood(graph.initialNumNodes());
Expand Down Expand Up @@ -670,6 +695,16 @@ std::vector<std::tuple<HypernodeID, HypernodeID, EdgeFeatures>> computeEdgeFeatu
HypernodeID dice_divisor = result.intersect_features.degree + result.intersect_features.to_n1_edges + result.intersect_features.to_n2_edges;
result.dice_similarity = intersect_size / static_cast<double>(dice_divisor);
}

// community detection
auto equal_communities = [&](size_t i) {
const auto& [clustering, _] = community_stack.at(community_stack.size() - i - 1);
return clustering[u] == clustering[v];
};
result.comm_0_equal = equal_communities(0);
result.comm_1_equal = equal_communities(1);
result.comm_2_equal = equal_communities(2);

result_list.local().emplace_back(u, v, result);
}
});
Expand Down Expand Up @@ -719,7 +754,15 @@ int main(int argc, char* argv[]) {
})->default_value("metis"),
"Input file format: \n"
" - hmetis : hMETIS hypergraph file format \n"
" - metis : METIS graph file format");
" - metis : METIS graph file format")
("p-louvain-min-vertex-move-fraction",
po::value<long double>(&context.preprocessing.community_detection.min_vertex_move_fraction)->value_name(
"<long double>")->default_value(0.01),
"Louvain pass terminates if less than that fraction of nodes moves during a pass")
("p-max-louvain-pass-iterations",
po::value<uint32_t>(&context.preprocessing.community_detection.max_pass_iterations)->value_name(
"<uint32_t>")->default_value(5),
"Maximum number of iterations over all nodes of one louvain pass");

po::variables_map cmd_vm;
po::store(po::parse_command_line(argc, argv, options), cmd_vm);
Expand All @@ -741,17 +784,23 @@ int main(int argc, char* argv[]) {
mt_kahypar::io::readInputFile(
context.partition.graph_filename, PresetType::default_preset,
InstanceType::graph, context.partition.file_format, true);
Graph& graph = utils::cast<Graph>(hypergraph);
StaticGraph& graph = utils::cast<StaticGraph>(hypergraph);

double time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
std::cout << "Starting global feature computation [" << time << "s]" << std::endl;
auto [global_features, degrees] = computeGlobalFeatures(graph); // does not contain locality
LouvainGraph louvain_graph(graph, LouvainEdgeWeight::uniform, StaticGraph::is_graph);
auto community_stack = community_detection::run_parallel_louvain(louvain_graph, context);
ALWAYS_ASSERT(community_stack.size() > 0);
while (community_stack.size() < 3) {
community_stack.insert(community_stack.begin(), community_stack.front());
}
auto [global_features, degrees] = computeGlobalFeatures(graph, community_stack); // does not contain locality
time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
std::cout << "Starting node feature computation [" << time << "s]" << std::endl;
auto node_features = computeNodeFeatures(graph, degrees);
time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
std::cout << "Starting Edge feature computation [" << time << "s]" << std::endl;
auto edge_features = computeEdgeFeatures(graph, degrees);
auto edge_features = computeEdgeFeatures(graph, degrees, community_stack);
time = std::chrono::duration<double>(std::chrono::high_resolution_clock::now() - start).count();
std::cout << "Feature computation complete [" << time << "s]" << std::endl;

Expand Down
8 changes: 4 additions & 4 deletions tools/neighborhood_computation.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
using namespace mt_kahypar;
using FastResetArray = kahypar::ds::FastResetFlagArray<>;

using Graph = ds::StaticGraph;
using StaticGraph = ds::StaticGraph;

struct NeighborhoodResult {
std::array<HypernodeID, 2> roots;
Expand Down Expand Up @@ -83,12 +83,12 @@ class NeighborhoodComputation {
}

template<size_t N>
NeighborhoodResult computeNeighborhood(const Graph& graph, std::array<HypernodeID, N> roots, bool include_two_hop) {
NeighborhoodResult computeNeighborhood(const StaticGraph& graph, std::array<HypernodeID, N> roots, bool include_two_hop) {
return computeNeighborhood(graph, roots, include_two_hop, [](HypernodeID){ return true; });
}

template<size_t N, typename F>
NeighborhoodResult computeNeighborhood(const Graph& graph, std::array<HypernodeID, N> roots, bool include_two_hop, F filter) {
NeighborhoodResult computeNeighborhood(const StaticGraph& graph, std::array<HypernodeID, N> roots, bool include_two_hop, F filter) {
static_assert(N > 0 && N <= 2);
ALWAYS_ASSERT(n1_list.empty());
NeighborhoodResult result {{roots[0], roots[0]}, n1_list, n1_set, n2_list, n2_set, include_two_hop};
Expand Down Expand Up @@ -143,7 +143,7 @@ class CliqueComputation {
child = nullptr;
}

uint64_t computeMaxCliqueSize(const Graph& graph, const std::vector<HypernodeID>& nodes) {
uint64_t computeMaxCliqueSize(const StaticGraph& graph, const std::vector<HypernodeID>& nodes) {
current_set.reset();
forbidden.reset();
list.resize(nodes.size());
Expand Down

0 comments on commit 14feffd

Please sign in to comment.