Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
N-Maas committed Oct 1, 2024
1 parent 2caae05 commit dc4584c
Showing 1 changed file with 41 additions and 0 deletions.
41 changes: 41 additions & 0 deletions tools/ml_graph_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

#include "mt-kahypar/macros.h"
#include "mt-kahypar/datastructures/static_graph.h"
#include "mt-kahypar/datastructures/sparse_map.h"
#include "mt-kahypar/partition/context.h"
#include "mt-kahypar/io/hypergraph_factory.h"
#include "mt-kahypar/io/hypergraph_io.h"
Expand Down Expand Up @@ -377,6 +378,43 @@ struct EdgeFeatures {


// ################ Feature Computation ################
double ScaledEntropyFromOccurenceCounts(const ds::DynamicSparseMap<int64_t, int64_t>& occurence, size_t total) {
// collect and sort summands
std::vector<double> summands;
for (auto& element : occurence) {
double p_x = (double)element.value / (double)total;
double summand = p_x * log2(p_x);
// double summand = (pair.second * log2(pair.second) - pair.second * log2(total)) / total;
summands.push_back(summand);
}
std::sort(summands.begin(), summands.end(), [] (double a, double b) { return abs(a) < abs(b); });
// calculate entropy
double entropy = 0;
for (double summand : summands) {
entropy -= summand;
}
// scale by log of number of categories
return log2(summands.size()) == 0 ? 0 : (double)entropy / log2(summands.size());
}

double ScaledEntropy(const std::vector<double>& distribution) {
ds::DynamicSparseMap<int64_t, int64_t> occurence;
for (double value : distribution) {
// snap to 3 digits after decimal point
int64_t snap = static_cast<int64_t>(std::round(1000 * value));
occurence[snap]++;
}
return ScaledEntropyFromOccurenceCounts(occurence, distribution.size());
}

double ScaledEntropy(const std::vector<uint64_t>& distribution) {
ds::DynamicSparseMap<int64_t, int64_t> occurence;
for (auto value : distribution) {
occurence[value]++;
}
return ScaledEntropyFromOccurenceCounts(occurence, distribution.size());
}



template <typename T>
Expand Down Expand Up @@ -676,6 +714,9 @@ int main(int argc, char* argv[]) {
}
po::notify(cmd_vm);

// context.shared_memory.num_threads = 1;
// TBBInitializer::instance(context.shared_memory.num_threads);

std::ofstream global(global_out);
std::ofstream nodes(nodes_out);
std::ofstream edges(edges_out);
Expand Down

0 comments on commit dc4584c

Please sign in to comment.