forked from Geophagus96/Manifold-EM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
nnstomerge.cpp
69 lines (61 loc) · 1.74 KB
/
nnstomerge.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#include <RcppArmadillo.h>
#include <math.h>
#include <iostream>
using namespace Rcpp;
using namespace arma;
//[[Rcpp::plugins(cpp11)]]
//[[Rcpp::depends(RcppArmadillo)]]
int closenp(const mat&tempdistm);
/*
Selecting initial guess for EM algorithm by merging the important nodes
until there are exactly k left (we assume that there are k clusters)
*/
//[[Rcpp::export]]
uvec IPstomerge(const mat&knng, const mat&distm, const uvec&nnnums, const int&cats, const int&tresh){
/*
cats: number of clusters
tresh: treshold for labeling a point as important, i.e. with more than a certain number of neighbors
output a uvec, denoting the indices of the centers
*/
int n = knng.n_rows;
int del;
double r;
uvec imnode_ind = find(nnnums >= tresh);
int imnode_num = imnode_ind.n_elem;
mat tempdistm(n, n);
tempdistm = distm + (n+1) * eye(n,n);
tempdistm = tempdistm(imnode_ind, imnode_ind);
for (int i = imnode_num; i > cats ; i--){
int nodelabel = closenp(tempdistm) + 1;
int nodencol = nodelabel / i;
int nodenrow = nodelabel - i * nodencol;
if (nodenrow == 0){
nodencol = nodencol - 1;
nodenrow = i;
}
r = randu();
if (r >= 0.5){
del = nodenrow - 1;
}
else{
del = nodencol;
}
tempdistm.shed_row(del);
tempdistm.shed_col(del);
imnode_ind = imnode_ind(find(imnode_ind != imnode_ind(del)));
}
return imnode_ind;
}
int closenp(const mat&tempdistm){
/*
To find nodes pair with closest distance
every return a pair of nodes with smallest distance in the matrix
input: tempdistm is a s-by-s matrix
*/
uvec nodepair;
double mindist;
mindist = min(min(tempdistm));
nodepair = find(tempdistm == mindist, 1, "first");
int out = nodepair[0];
return out;
}