-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel_evaluation.py
122 lines (96 loc) · 4.5 KB
/
model_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# Copyright (C) 2023 Antonio Rodriguez
#
# This file is part of Transfer_Learning_for_CVD_risk_calculators.
#
# Transfer_Learning_for_CVD_risk_calculators is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# Transfer_Learning_for_CVD_risk_calculators is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Transfer_Learning_for_CVD_risk_calculators.
# If not, see <http://www.gnu.org/licenses/>.
from sklearn import base
from sklearn.metrics import plot_roc_curve, confusion_matrix, plot_confusion_matrix, f1_score, accuracy_score, precision_score, recall_score, roc_auc_score
import matplotlib.pyplot as plt
from typing import List, Tuple, Dict
import pandas as pd
def perc_clas_eval(model : base.BaseEstimator, X : pd.DataFrame, y : pd.DataFrame) -> Tuple[float, float, float, float, float]:
"""Computes accuracy (acc), Area Under the Curve (AUC), Reciever Operating
Characteristics (ROC), F1-score, Recall and Precission for a given
SDGCLassifier (https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html)".
Args:
-----
models: sklearn SVM instance.
X : test feature set.
y : test target variable set.
Returns:
--------
acc: accuracy on the validation set.
auc: AUC on the validation set.
f1_sc: F1-score on the validation set.
recall: Recall on the validation set.
prec: Precision on the validation set.
"""
# Prediction
y_pred = model.predict(X)
# Accuracy
acc = accuracy_score(y, y_pred)
# ROC
disp = plot_roc_curve(model, X, y)
# AUC
auc = disp.roc_auc
# F1 Score
f1_sc = f1_score(y, y_pred)
# Recall
recall = recall_score(y, y_pred)
# Precision
prec = precision_score(y, y_pred)
return acc, auc, f1_sc, recall, prec
# def conf_matrix(models):
# for model, data in models:
# y_pred = model[0].predict(model[1])
# confusion_matrix(data[0], y_pred)
# title = "Confusion Matrix - %s" % data[1]
# disp = plot_confusion_matrix(model[0], model[1], data[0],
# cmap=plt.cm.Blues)
# disp.ax_.set_title(title)
# plt.show()
# conf_matrix = disp.confusion_matrix
# return conf_matrix
def get_eval_dictionaries(datasets: List, classifiers : Dict, regressors : Dict,
class_metrics : List, reg_metrics : List) -> Dict:
"""This function generates and returns one dictionary to evaluate the
classification performance employing and its associated hyperparameters
the studied Machine Learning (ML) models. There are two datasets that approach
a classification task and another one that approaches a regression task.
F1-score, ROC and AUC has been used for the former and MAE for the later.
Args:
-----
datasets: list of the datasets used to evaluate these classifiers.
classifiers: empty dictionary containing the abovementioned ML classifiers.
regressors: empty dictionary the abovementioned ML regressors (only for Steno dataset)
class_metrics: classification metrics to be computed
reg_metrics: regression metrics to be computed
Returns:
--------
metrics: dictionary containing the metrics
"""
metrics = {}
hp = {'best_hyperparameters':{}}
for data in datasets :
# Steno is a regression approach, the rest are classification ones
if 'Steno' in data:
metrics[data] = regressors
for regressor in regressors:
metrics[data][regressor] = reg_metrics, hp
else:
metrics[data] = classifiers
for classifier in classifiers:
metrics[data][classifier] = class_metrics, hp
return metrics