forked from jduffield65/iss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathScaledKMedians.m
75 lines (60 loc) · 2.57 KB
/
ScaledKMedians.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
function [k, v, s2] = ScaledKMedians(x, v0, ScoreThresh)
% [k, v] = ScaledKMedians(x, v0);
%
% does a clustering that minimizes the norm of x_i - g_i * v_{k_i}, where:
% x_i is the i'th row of input matrix x (nPoints by nDims)
% g_i is a gain (not explicitly computed or returned)
% v_k is the k'th row of output v containing cluster medians (nClusters by nDims)
% k_i is the i'th entry of output k giving the cluster of each point (nPoints by 1)
% s2_k is the squared norm of v_k.
% ScoreThresh: x_i will only contribute to v_k if dot product > ScoreThresh
%
% input v0 (nClusters by nDims) is the starting point. (Required.)
%
% This assigns each row of x to a row of v based on dot product between
% normalised x and v. Then update v by taking median of all x assigned to
% that row of v.
if nargin<3 || isempty(ScoreThresh)
ScoreThresh = 0; % only keep good matches %INCREASE THIS AS A HACK IF NOT DIAGONAL
end
MinClusterSize = 10; % delete clusters with too few points
ConvergenceCriterion = 0; % if this many or less changed, terminate
ScoreDiffThresh = 1e-4; % if spot equally likely two classes then ignore.
% normalize starting points and original data
vNorm = bsxfun(@rdivide, v0, sqrt(sum(v0.^2,2)));
v = vNorm;
xNorm = bsxfun(@rdivide, x, sqrt(sum(x.^2,2)));
[nClusters, nDims] = size(v);
s2 = zeros(nClusters,1);
MaxIter = 100;
k = nan; % to make sure it doesn't finish on first iteration
for i=1:MaxIter
kOld = k;
vOld = v;
score = xNorm*v'; % project each point onto each cluster. Use normalized so we can interpret score
[SortScore,ClusterIndex] = sort(score,2,'descend');
k = ClusterIndex(:,1); % find best cluster for each point
k(SortScore(:,1)<ScoreThresh | SortScore(:,1)-SortScore(:,2)<ScoreDiffThresh)=0; % unclusterable points
if sum(k~=kOld)<=ConvergenceCriterion % need a better criterion!
break;
end
% find median for points assigned to each cluster
for c=1:nClusters
MyPoints = x(k==c,:); % don't use normalized, to avoid overweighting weak points
nMyPoints = length(MyPoints);
if nMyPoints<MinClusterSize
v(c,:) = 0;
continue;
end
TopEvec = median(MyPoints)';
%[TopEvec, s2(c)] = eigs(robustcov(MyPoints), 1); %More Proper but gives too large values.
s2(c) = vecnorm(TopEvec);
TopEvec = TopEvec/s2(c);
v(c,:) = TopEvec*sign(mean(TopEvec)); % make them positive
end
% figure(9046);
% hist(k,0:max(k));
% drawnow
end
%Square the norm of v2 so consistent with output of ScaledKMeans.
s2 = s2.^2;