-
Notifications
You must be signed in to change notification settings - Fork 1
/
DegitRecognationKNN
89 lines (62 loc) · 2.41 KB
/
DegitRecognationKNN
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#Handwritten Degit Recognation in Python3 Using MNIST Dataset
import os
import struct
import numpy as np
def read(dataset = "training", path = "."):
if dataset is "training":
fname_img = os.path.join(path, 'train-images.idx3-ubyte')
fname_lbl = os.path.join(path, 'train-labels.idx1-ubyte')
elif dataset is "testing":
fname_img = os.path.join(path, 't10k-images.idx3-ubyte')
fname_lbl = os.path.join(path, 't10k-labels.idx1-ubyte')
else:
raise ValueError + "dataset must be 'testing' or 'training'"
# Load everything in some numpy arrays
with open(fname_lbl, 'rb') as flbl:
magic, num = struct.unpack(">II", flbl.read(8))
lbl = np.fromfile(flbl, dtype=np.int8)
with open(fname_img, 'rb') as fimg:
magic, num, rows, cols = struct.unpack(">IIII", fimg.read(16))
img = np.fromfile(fimg, dtype=np.uint8).reshape(len(lbl), rows, cols)
get_img = lambda idx: (lbl[idx], img[idx])
# Create an iterator which returns each image in turn
for i in range(len(lbl)):
yield get_img(i)
def show(image):
from matplotlib import pyplot
import matplotlib as mpl
fig = pyplot.figure()
ax = fig.add_subplot(1,1,1)
imgplot = ax.imshow(image, cmap=mpl.cm.Greys)
imgplot.set_interpolation('nearest')
ax.xaxis.set_ticks_position('top')
ax.yaxis.set_ticks_position('left')
pyplot.show()
training_data = list(read(dataset='training', path='.'))
testing_data = list(read(dataset='testing', path='.'))
print (len(training_data))
print (len(testing_data))
tr_dt = np.zeros(shape=(60000,784))
tr_lbl = np.zeros(shape=(60000,1))
ts_dt = np.zeros(shape=(10000,784))
ts_lbl = np.zeros(shape=(10000,1))
for i in range(len(training_data)):
label, pixels = training_data[i]
tr_dt[i,:] = pixels.reshape((1,784))
tr_lbl[i,:] = label
for i in range(len(testing_data)):
label, pixels = testing_data[i]
ts_dt[i,:] = pixels.reshape((1,784))
ts_lbl[i,:] = label
import random
k = 1500
list_tr_data = random.sample(range(0,59999),k)
trainset = tr_dt[list_tr_data,:]
trainlabel = tr_lbl[list_tr_data,:]
l = 250
list_ts_data = random.sample(range(0,9999),l)
testset = ts_dt[list_ts_data,:]
testlabel = ts_lbl[list_ts_data,:]
from sklearn import neighbors
knn = neighbors.KNeighborsClassifier(n_neighbors=1)
print('KNN score: %f' % knn.fit(trainset, trainlabel.ravel()).score(testset, testlabel.ravel()))