-
Notifications
You must be signed in to change notification settings - Fork 0
/
FrameWorkSVM.py
134 lines (114 loc) · 5.08 KB
/
FrameWorkSVM.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from pyexpat import features
from sklearn.svm import SVC
from Context import Strategy, Context
from sklearn import svm
from matplotlib import pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_recall_fscore_support
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.model_selection import GridSearchCV
class FrameWorkSVM(Strategy):
def __init__(self, model,TrainPath,TestPath,param=None):
super().__init__(model, "SVM", TrainPath, TestPath, param)
self.recall = None
self.prec = None
self.f1 = None
def train(self):
super().train()
print(self.accur)
return self.accur
def metrics(self):
scores = precision_recall_fscore_support(self.y_test, self.prediction, average='weighted')
self.prec = scores[0]
self.recall = scores[1]
self.f1 = scores[2]
# self.f_importances()
# print(confusion_matrix(self.y_test, self.prediction))
# print(classification_report(self.y_test, self.prediction))
# = average_precision_score(self.y_test, self.prediction)
# self.recall = recall_score(self.y_test, self.prediction, average='micro')
def grid_search(self):
model_GS = GridSearchCV(self.model, self.param, cv=6)
self.model = model_GS
'''
load data from csv to dataframe
'''
def getCsvData(self):
self.metrics()
train = self.x_test.columns.values
columnAsList = list(train)
# data_df = {'Model name': ["SVM"],
# 'Number of features': [len(columnAsList)],
# 'Accurancy': [self.accur],
# 'Precision': [self.prec],
# 'Recall': [self.recall],
# 'Fscore': [self.f1],
# 'gamma' : [self.model.best_params_['gamma']],
# 'C' :[self.model.best_params_['C']]
# }
data_df = {'Model name': ["SVM"],
'Number of features': [len(columnAsList)],
'Accurancy': [self.accur],
'Precision': [self.prec],
'Recall': [self.recall],
'Fscore': [self.f1],
'gamma' : [self.model.gamma],
'C' :[self.model.C],
'kernel': [self.model.kernel]
}
# # df = df.iloc[1:]
df = pd.DataFrame(data_df)
# train = self.x_test.columns.values
return df
'''
Plots the importance values to graph
'''
def f_importances(self):
# top =2
# features_names = ['defencePressure', 'buildUpPlaySpeed','buildUpPlayPassing','chanceCreationPassing','chanceCreationCrossing',
# 'chanceCreationShooting', 'defencePressure', 'defenceAggression','defenceTeamWidth',
# 'crossing', 'finishing', 'heading_accuracy', 'volleys', 'dribbling', 'curve', 'long_passing',
# 'aggression', 'short_passing', 'potential', 'overall_rating', 'long_shots','ball_control']
features_names = self.x_test.columns.values
imp = self.model.coef_[0]
imp, features_names = zip(*sorted(zip(imp, features_names)))
plt.barh(range(len(features_names)), imp, align='center')
plt.yticks(range(len(features_names)), features_names)
# plt.barh(range(top), imp[::-1][0:top], align='center')
# plt.yticks(range(top), features_names[::-1][0:top])
plt.show()
'''
1.use frame work class to build a model type - svc
2. define parameters of model
2.write to xl the results
'''
def checkSVC():
# clf = svm.SVC()
clf = svm.SVC()
# parameter_space = [{'kernel': ['linear'], 'gamma': [1e-3, 1e-4],
# 'C': [1, 10, 100, 1000]},
# {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
parameter_space = {'C': [0.1, 1, 10, 100, 1000],
'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
'kernel': ['linear', 'rbf', 'sigmoid']}
model = Context(FrameWorkSVM(clf, 'trainData1.csv', 'testData1.csv', param=parameter_space))
# model.strategy.x_train = SelectKBest(chi2, k=7).fit_transform(model.strategy.x_train, model.strategy.y_train)
# model.strategy.x_train = pd.DataFrame(model.strategy.x_train)
# model.strategy.x_test = SelectKBest(chi2, k=7).fit_transform(model.strategy.x_test, model.strategy.y_test)
# model.strategy.x_test = pd.DataFrame(model.strategy.x_test)
model.strategy.grid_search()
accurancy_model = model.run_model()
# print(model.strategy.model.best_params_)
data = model.strategy.getCsvData()
model.strategy.insertDataToCSV(data, "4")
# pd.Series(abs(svm.coef_[0]), index=features.columns).nlargest(10).plot(kind='barh')
checkSVC()
# def f_importances(coef, names):
# imp = coef
# imp,names = zip(*sorted(zip(imp,names)))
# plt.barh(range(len(names)), imp, align='center')
# plt.yticks(range(len(names)), names)
# plt.show()