-
Notifications
You must be signed in to change notification settings - Fork 1
/
kmedoids.py
45 lines (41 loc) · 1.28 KB
/
kmedoids.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import numpy as np
def kMedoids(D, k, tmax=100):
# determine dimensions of distance matrix D
m, n = D.shape
if k > n:
raise Exception('too many medoids')
# randomly initialize an array of k medoid indices
M = np.arange(n)
np.random.shuffle(M)
M = np.sort(M[:k])
# create a copy of the array of medoid indices
Mnew = np.copy(M)
# initialize a dictionary to represent clusters
C = {}
for t in range(tmax):
if t % 5 == 0:
print('\tIteration ', t)
# determine clusters, i. e. arrays of data indices
J = np.argmin(D[:, M], axis=1)
for kappa in range(k):
C[kappa] = np.where(J == kappa)[0]
# update cluster medoids
for kappa in range(k):
try:
J = np.mean(D[np.ix_(C[kappa], C[kappa])], axis=1)
j = np.argmin(J)
Mnew[kappa] = C[kappa][j]
except:
continue
np.sort(Mnew)
# check for convergence
if np.array_equal(M, Mnew):
break
M = np.copy(Mnew)
else:
# final update of cluster memberships
J = np.argmin(D[:, M], axis=1)
for kappa in range(k):
C[kappa] = np.where(J == kappa)[0]
# return results
return M, C