-
Notifications
You must be signed in to change notification settings - Fork 2
/
SVMVS.py
114 lines (91 loc) · 3.9 KB
/
SVMVS.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from collections import defaultdict
from os.path import expanduser
import csv
import nltk
from sklearn import cross_validation
from sklearn.svm import LinearSVC
from sklearn import metrics
def read_file():
home = expanduser("~")
path_to_file = ""
# file_name = "training.csv"
# file_name = "training_uniform.csv"
file_name = "training_limited.csv"
with open(path_to_file + file_name, 'rb') as f:
for row in csv.reader(f, delimiter='#', quoting=csv.QUOTE_ALL):
#print(row)
try:
sentences.append(row[0])
polar.append(row[1])
except Exception as e:
print(e)
if ('neg' in row[0]):
polar.append('neg')
if ('pos' in row[0]):
polar.append('pos')
if ('ind' in row[0]):
polar.append('ind')
def unigrams(sentence):
features = defaultdict(list)
words = sentence.split()
for w in words:
features[w] = True
return features
def feature_extractor(sentence):
return unigrams(sentence)
def features_from_sentences(sentences, polar):
feature_labels = []
for i in range(len(sentences)):
features = feature_extractor(sentences[i])
feature_labels.append((features, polar[i]))
return feature_labels
def train_model(sentences, polar):
train_set = features_from_sentences(sentences, polar)
# classifier = nltk.SklearnClassifier(LinearSVC()).train(train_set[traincv[0]:traincv[len(traincv) - 1]])
# classifier.train(train_set)
# return classifier
for par in ['no', 'not', 'yet', 'never', 'none', 'nobody', 'nowhere', 'nothing', 'neither']:
train_set = features_from_sentences(sentences, polar)
cv = cross_validation.KFold(len(train_set), n_folds=10)
sum_accuracy = 0
k = 0
for traincv, testcv in cv:
# classifier = nltk.SklearnClassifier(naive_bayes.MultinomialNB()).train(train_set[traincv[0]:traincv[len(traincv)-1]])
#classifier = nltk.MaxentClassifier.train(train_set[traincv[0]:traincv[len(traincv)-1]], max_iter=10)
#classifier = nltk.SklearnClassifier(LogisticRegression()).train(train_set[traincv[0]:traincv[len(traincv)-1]])
#lassifier = nltk.SklearnClassifier(DecisionTreeClassifier()).train(train_set[traincv[0]:traincv[len(traincv) - 1]])
classifier = nltk.SklearnClassifier(LinearSVC()).train(train_set[traincv[0]:traincv[len(traincv) - 1]])
#'no','not','yet','never','none','nobody','nowhere','nothing','neither'
shifters = []
#shifters.append(par)
#shifters = []
y_true = []
y_pred = []
for i in range(len(testcv)):
y_true.append(train_set[testcv[i]][1])
label = classifier.classify(train_set[testcv[i]][0])
# if any(word in 'some one long two phrase three' for word in list_):
if any(parole in str(train_set[testcv[i]][0]) for parole in shifters):
# if("saumya" in str(train_set[testcv[i]][0])):
if (label == 0):
y_pred.append(1)
elif (label == 1):
y_pred.append(1)
else:
y_pred.append(2)
else:
y_pred.append(label)
acc = metrics.accuracy_score(y_true, y_pred)
sum_accuracy += acc
k += 1
# print(str(k) + ')accuracy: ' + str(acc))
# print('true labels: ' + str(y_true))
# print('predicted labels: ' + str(y_pred))
# print('')
print('ACCURACY shifting ' + par + ' : ' + str(sum_accuracy / k))
classifier.train(train_set) # aggiungere max_iter per maxent
return classifier
sentences = []
polar = []
read_file()
train_model(sentences, polar)