-
Notifications
You must be signed in to change notification settings - Fork 0
/
ml2.py
87 lines (81 loc) · 1.76 KB
/
ml2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import csv
import operator
import numpy
import time
## the csv files arent working, eird format issues
## template from stack overflow
class Digit:
def __init__(self):
self.label=''
self.data=[]
def init(self):
self.ndata=numpy.array(self.data)
def distance(self,digit):
return numpy.sum((self.ndata-digit.ndata)**2)**.5
def get_training_set():
dataset=[]
with open('trainingset.csv','rb') as csvfile:
csvreader=csv.reader(csvfile)
rownumber=0
for row in csvreader:
if not rownumber==0:
data=Digit()
dataset.append(data)
colnumber=0
for col in row:
pixels=[]
if colnumber==0:
data.label=col
else:
data.data.append(int(col))
colnumber=colnumber+1
data.init()
rownumber=rownumber+1
return dataset
def get_test_set():
dataset=[]
with open('testset.csv','rb') as csvfile:
csvreader=csv.reader(csvfile)
rownumber=0
for row in csvreader:
if not rownumber==0:
data=Digit()
dataset.append(data)
for col in row:
data.data.append(int(col))
data.init()
rownumber=rownumber+1
return dataset
trainingset=get_training_set()
testset=get_test_set()
f=open('./out1.csv','w')
f.write('ImageId,Label\n')
i=1
for test in testset:
results=[]
start=time.time()
for train in trainingset:
distance=test.distance(train)
touple=distance,train
results.append(touple)
results=sorted(results,key=lambda result:result[0])
labelmap={}
for y in range(0,100):
result=results[y]
l=result[1].label
if l in labelmap:
labelmap[l]+=1
else:
labelmap[l]=1
labelmap=sorted(labelmap.iteritems(),key=operator.itemgetter(1),reverse=True)
f.write(str(i))
f.write(',')
f.write(labelmap[0][0])
f.write('\n')
print i
print labelmap
i=i+1
end=time.time()
print end-start
f.close()
print "Done"