-
Notifications
You must be signed in to change notification settings - Fork 0
/
k_means_clustering_sklearn.py
128 lines (108 loc) · 2.58 KB
/
k_means_clustering_sklearn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# -*- coding: utf-8 -*-
"""k_means_clustering_sklearn.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1jsjfWtTovQRlzikm61V6XkjEo79yIHz1
"""
from google.colab import drive
drive.mount("/content/gdrive")
from numpy import genfromtxt
data_path = "/content/gdrive/My Drive/data.csv"
my_data = genfromtxt(data_path, delimiter=",")
centers_path = "/content/gdrive/My Drive/centers.csv"
centers_data = genfromtxt(centers_path, delimiter=",")
lines_data = len(list(my_data))
print(lines_data)
lines_centers = len(list(centers_data))
print(lines_centers)
sample_cluster = lines_data / lines_centers
print(sample_cluster)
sample_cluster = int(sample_cluster)
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
X, y = make_blobs(
n_samples=sample_cluster,
n_features=2,
centers=lines_centers,
cluster_std=1,
shuffle=True,
random_state=0,
)
plt.scatter(X[:, 0], X[:, 1], c="green", marker="o", edgecolor="black", s=50)
plt.show()
from sklearn.cluster import KMeans
K_Means_clustering = KMeans(
n_clusters=lines_centers,
init="random",
n_init=10,
max_iter=100,
tol=1e-04,
random_state=0,
)
cluster_Centers_predict = K_Means_clustering.fit_predict(X)
plt.scatter(
X[cluster_Centers_predict == 0, 0],
X[cluster_Centers_predict == 0, 1],
s=50,
c="lightgreen",
marker="o",
edgecolor="black",
label="Cluster 1",
)
plt.scatter(
X[cluster_Centers_predict == 1, 0],
X[cluster_Centers_predict == 1, 1],
s=50,
c="orange",
marker="o",
edgecolor="black",
label="Cluster 2",
)
plt.scatter(
X[cluster_Centers_predict == 2, 0],
X[cluster_Centers_predict == 2, 1],
s=50,
c="lightblue",
marker="o",
edgecolor="black",
label="Cluster 3",
)
plt.scatter(
X[cluster_Centers_predict == 3, 0],
X[cluster_Centers_predict == 3, 1],
s=50,
c="gray",
marker="o",
edgecolor="black",
label="Cluster 4",
)
plt.scatter(
X[cluster_Centers_predict == 4, 0],
X[cluster_Centers_predict == 4, 1],
s=50,
c="purple",
marker="o",
edgecolor="black",
label="Cluster 5",
)
plt.scatter(
X[cluster_Centers_predict == 5, 0],
X[cluster_Centers_predict == 5, 1],
s=50,
c="yellow",
marker="o",
edgecolor="black",
label="Cluster 6",
)
plt.scatter(
K_Means_clustering.cluster_centers_[:, 0],
K_Means_clustering.cluster_centers_[:, 1],
s=250,
marker="*",
c="red",
edgecolor="black",
label="Centroids",
)
plt.legend(scatterpoints=1)
plt.grid()
plt.show()