-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathaccuracyKnn.py
106 lines (82 loc) · 2.9 KB
/
accuracyKnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import csv
import random
import math
import operator
def loadDataset(filename, filename1, trainingSet=[] , testSet=[]):
with open(filename, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
for y in range(14):
dataset[x][y] = float(dataset[x][y])
trainingSet.append(dataset[x])
with open(filename1, 'rb') as csvfile:
lines = csv.reader(csvfile)
dataset = list(lines)
for x in range(len(dataset)):
for y in range(14):
dataset[x][y] = float(dataset[x][y])
testSet.append(dataset[x])
def getAccuracy(trainingSet,testSet):
correct = 0
correct1=0
tots1=0
tots2=0
deviation=[]
list(deviation)
#print len(testSet)
for x in range(len(trainingSet)):
for y in range(14):
if trainingSet[x][y] == testSet[x][y]:
correct += 1
if(trainingSet[x][y]<5):
#print(str(trainingSet[x][y])+" "+str(testSet[x][y]))
tots1 +=1
if trainingSet[x][y] == testSet[x][y]:
correct1 += 1
if(trainingSet[x][y]>=5):
tots2 +=1
deviation.append(((abs(float(trainingSet[x][y]) - float(testSet[x][y])))/(trainingSet[x][y]))*100)
sum1=0.0
sum1=sum(deviation)
p=0
f3=open('missCount.txt')
missingvalueCount=int(f3.readline())
f3.close()
file4=open('statistics.txt','w+')
file4.write('Total number of training data entries:'+str(len(trainingSet))+'\n')
file4.write( 'Total number of Data entries:'+str(len(testSet))+'\n')
file4.write('Total number of values'+str(len(testSet*14))+'\n')
file4.write('Total number of missing values:'+str(missingvalueCount)+'\n')
for i in range(len(deviation)):
if(deviation[i]!=0):
p=p+1
avg=float(sum1)/p
#print 'Number of fuzzy missing values:'+str(p)
file4.write('Number of fuzzy missing values:'+str(p)+'\n')
j=missingvalueCount-p
file4.write('Number of fixed missing values:'+str(j)+'\n')
#print 'Number of fixed missing values:'+str(j)
file4.write("Fuzzy value Error:"+str(avg)+'%'+"\n")
#print "Fuzzy value Error:"+str(avg)+'%'
l=(len(testSet)*14-missingvalueCount)
g=(((tots1-float(correct1)))/j)*100
file4.write("Fixed value Error:"+str(g)+'%'+"\n")
#print "Fixed value Error:"+str(g)+'%'
file4.write('Accuracy after Cleaning: ' +str(100.0-float(g+avg)/2)+'%'+'\n')
file4.write('Overall Data Accuracy: ' + repr(((correct)/(float(len(testSet))*14)) * 100.0) + '%')
file4.close()
#print l
return ((correct)/(float(len(testSet))*14)) * 100.0
#print(correct-l)
#return ((correct-l))/(float(missingvalueCount)) * 100.0
def main():
# prepare data
trainingSet=[]
testSet=[]
loadDataset('trainingSet.txt','missingvalue.txt', trainingSet, testSet)
#print len(trainingSet)
#print len(testSet)
accuracy = getAccuracy(trainingSet, testSet)
#print('Overall Data Accuracy: ' + repr(accuracy) + '%')
main()