-
Notifications
You must be signed in to change notification settings - Fork 0
/
Exercise 16- Nearest neighbor.py
executable file
·120 lines (84 loc) · 3.57 KB
/
Exercise 16- Nearest neighbor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#Beginner C A B
#Intermediate
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
# create random data with two classes
X, y = make_blobs(n_samples=16, n_features=2, centers=2, center_box=(-2, 2))
# scale the data so that all values are between 0.0 and 1.0
X = MinMaxScaler().fit_transform(X)
# split two data points from the data as test data and
# use the remaining n-2 points as the training data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=2)
# place-holder for the predicted classes
y_predict = np.empty(len(y_test), dtype=np.int64)
# produce line segments that connect the test data points
# to the nearest neighbors for drawing the chart
lines = []
# distance function
def dist(a, b):
sum = 0
for ai, bi in zip(a, b):
sum = sum + (ai - bi)**2
return np.sqrt(sum)
def main(X_train, X_test, y_train, y_test):
global y_predict
global lines
# process each of the test data points
for i, test_item in enumerate(X_test):
# calculate the distances to all training points
distances = [dist(train_item, test_item) for train_item in X_train]
# find the index of the nearest neighbor
nearest = np.argmin(distances)
# create a line connecting the points for the chart
lines.append(np.stack((test_item, X_train[nearest])))
# add your code here:
y_predict[i] = y_train[nearest] # this just classifies everything as 0
print(y_predict)
main(X_train, X_test, y_train, y_test)
#Advanced
import numpy as np
from sklearn.datasets import make_blobs
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
# create random data with two classes
X, Y = make_blobs(n_samples=16, n_features=2, centers=2, center_box=(-2, 2))
# scale the data so that all values are between 0.0 and 1.0
X = MinMaxScaler().fit_transform(X)
# split two data points from the data as test data and
# use the remaining n-2 points as the training data
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=2)
# place-holder for the predicted classes
y_predict = np.empty(len(y_test), dtype=np.int64)
# produce line segments that connect the test data points
# to the nearest neighbors for drawing the chart
lines = []
# distance function
def dist(a, b):
sum = 0
for ai, bi in zip(a, b):
sum = sum + (ai - bi)**2
return np.sqrt(sum)
def main(X_train, X_test, y_train, y_test):
global y_predict
global lines
k = 3 # classify our test items based on the classes of 3 nearest neighbors
# process each of the test data points
for i, test_item in enumerate(X_test):
# calculate the distances to all training points
distances = [dist(train_item, test_item) for train_item in X_train]
# add your code here
nearest_distance = np.argsort(distances) # this just finds the nearest neighbour (so k=1)
nearest_index = y_train[nearest_distance[:k]]
# create a line connecting the points for the chart
# you may change this to do the same for all the k nearest neigbhors if you like
# but it will not be checked in the tests
nearest = np.argmin(distances)
lines.append(np.stack((test_item, X_train[nearest])))
y_predict[i] = np.round(np.mean(nearest_index))
# this just classifies everything as 0
# As this is the case, you can get the majority where class by np.round(np.mean(y))
#where y is the list containing the class labels of the nearest neighbors
print(y_predict)
main(X_train, X_test, y_train, y_test)