-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmyKMeans.py
69 lines (58 loc) · 2.14 KB
/
myKMeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#importing the libraries
import numpy as np
import random as rd
class myKMeans:
def __init__(self,X,K):
self.X=X
self.Output={}
self.Centroids=np.array([]).reshape(self.X.shape[1],0)
self.K=K
self.m=self.X.shape[0]
def kmeanspp(self,X,K):
i=rd.randint(0,X.shape[0])
Centroid_temp=np.array([X[i]])
for k in range(1,K):
D=np.array([])
for x in X:
D=np.append(D,np.min(np.sum((x-Centroid_temp)**2)))
prob=D/np.sum(D)
cummulative_prob=np.cumsum(prob)
r=rd.random()
i=0
for j,p in enumerate(cummulative_prob):
if r<p:
i=j
break
Centroid_temp=np.append(Centroid_temp,[X[i]],axis=0)
return Centroid_temp.T
def fit(self,n_iter):
#randomly initialize the centroids
self.Centroids=self.kmeanspp(self.X,self.K)
"""for i in range(self.K):
rand=rd.randint(0,self.m-1)
self.Centroids=np.c_[self.Centroids,self.X[rand]]"""
#compute euclidean distances and assign clusters
for n in range(n_iter):
EuclideanDistance=np.array([]).reshape(self.m,0)
for k in range(self.K):
tempDist=np.sum((self.X-self.Centroids[:,k])**2,axis=1)
EuclideanDistance=np.c_[EuclideanDistance,tempDist]
C=np.argmin(EuclideanDistance,axis=1)+1
#adjst the centroids
Y={}
for k in range(self.K):
Y[k+1]=np.array([]).reshape(2,0)
for i in range(self.m):
Y[C[i]]=np.c_[Y[C[i]],self.X[i]]
for k in range(self.K):
Y[k+1]=Y[k+1].T
for k in range(self.K):
self.Centroids[:,k]=np.mean(Y[k+1],axis=0)
self.Output=Y
def predict(self):
return self.Output,self.Centroids.T
def WCSS(self):
wcss=0
for k in range(self.K):
wcss+=np.sum((self.Output[k+1]-self.Centroids[:,k])**2)
return wcss