-
Notifications
You must be signed in to change notification settings - Fork 10
/
code.py
119 lines (99 loc) · 4.08 KB
/
code.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""Subtractive Clustering Algorithm
"""
__author__ = 'Bhavesh Kumar'
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import sklearn.preprocessing as skp
ra = 0.3
rb = ra * 1.15
Eup = 0.5
Edown = 0.15
colors_options = 'rgb'
cluster_center = []
def set_colors(labels, colors=colors_options):
colored_labels = []
for label in labels:
colored_labels.append(colors[label])
return colored_labels
DataFrame = pd.read_csv('Project2_dataset.csv', header=None)
DataMatrix = DataFrame.as_matrix()
normalized_data_matrix = skp.scale(DataMatrix, axis=0)
kmeans = KMeans(n_clusters=3).fit(normalized_data_matrix)
labels = kmeans.labels_
colors = set_colors(labels)
centroids = kmeans.cluster_centers_
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(normalized_data_matrix[:, 0], normalized_data_matrix[:, 1], normalized_data_matrix[:, 2], c=colors,
alpha=0.1)
for idx, centroid in enumerate(centroids):
ax.scatter(*centroid, c=colors_options[idx], s=150, marker='X', label='Cluster '+str(idx+1))
#plt.title('K-means Clusters')
# plt.show()
print('Summary of the data set')
print(DataFrame.describe(), '\n')
print('K-means cluster centers')
print('c1 = {} \nc2 = {} \nc3 = {}\n'.format(kmeans.cluster_centers_[0],
kmeans.cluster_centers_[1],
kmeans.cluster_centers_[2]))
# initialize potentials
size = len(normalized_data_matrix)
potential = [0.0] * size
for i in range(size):
Xi = normalized_data_matrix[i]
for j in range(i + 1, size):
Xj = normalized_data_matrix[j]
value = np.exp(-4.0 * ((Xi[0] - Xj[0]) ** 2 + (Xi[1] - Xj[1]) ** 2 + (Xi[2] - Xj[2]) ** 2) / (ra / 2) ** 2)
potential[i] += value
potential[j] += value
max_potential_value = max(potential) # p1
max_potential_index = potential.index(max_potential_value)
# filter through accept and reject criteria
current_max_value = max_potential_value
criteria = 1
while criteria and current_max_value:
criteria = 0
max_potential_vector = normalized_data_matrix[max_potential_index] # x1
potential_ratio = current_max_value / max_potential_value # Pk and MaxPValue
if potential_ratio > Eup:
criteria = 1
elif potential_ratio > Edown:
dmin = np.min([(max_potential_vector[0] - cc[0]) ** 2 + (max_potential_vector[1] - cc[1]) ** 2 +
(max_potential_vector[2] - cc[2]) ** 2 for cc in cluster_center])
if ((dmin / ra) + potential_ratio) >= 1:
criteria = 1
else:
criteria = 2
elif potential_ratio < Edown:
break
if criteria is 1:
cluster_center.append(max_potential_vector)
for i in range(size):
Xj = normalized_data_matrix[i]
potential_value = potential[i]
potential_value = potential_value - (current_max_value * np.exp(-4.0 *
((max_potential_vector[0] - Xj[0]) ** 2 +
(max_potential_vector[1] - Xj[1]) ** 2 +
(max_potential_vector[2] - Xj[2]) ** 2)) / (rb / 2) ** 2)
if potential_value < 0:
potential_value = 0
potential[i] = potential_value
current_max_value = max(potential) # p1
max_potential_index = potential.index(current_max_value)
elif criteria is 2:
potential[max_potential_index] = 0
current_max_value = max(potential) # p1
max_potential_index = potential.index(current_max_value)
print('Subtractive cluster centers')
print('c1 = {} \nc2 = {} \nc3 = {}'.format(cluster_center[0],
cluster_center[1],
cluster_center[2]))
# plot centers produced by subtractive cluster algorithm
for idx, centroid in enumerate(cluster_center):
ax.scatter(*centroid, c='k', s=150, marker='X')
plt.title('Subtractive and K-means clusters')
ax.legend()
plt.show()