-
Notifications
You must be signed in to change notification settings - Fork 0
/
tree_evaluation.py
69 lines (61 loc) · 2.29 KB
/
tree_evaluation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#tree_evaluation.py
import pandas as pd
import numpy as np
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
import pdb
from sklearn import tree
from plot_confusion_matrix import plot_confusion_matrix
##can optimize this to numpy but it works so i'm going to leave it
def mask(df, key, value):
# print(df[key] > value)
return df[np.abs(df[key]) > value]
def reg_eval(reg_tree, y_test, y_pred):
rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
# print('Mean Absolute Error: ', metrics.mean_absolute_error(y_test, y_pred))
# print('Mean Squared Error: ', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error: ', rmse)
print('')
# print('============================== RESULT ===============================')
#########finding assholes with biggest errors, fyi
# diff = pd.DataFrame({'diff' : (y_test - y_pred)})
diff = np.array((y_test - y_pred))
# pd.DataFrame.mask = mask
# big_errors = diff.mask('diff', 80000) # pick out the ones with errors bigger than 80000
# print('your big errors culprits:')
# print(big_errors)
# for y_test
plt.figure()
plt.scatter(range(0,np.shape(diff)[0]),diff, s=1)
plt.xlabel('Distribution of y- y_pred Error')
plt.ylabel('prediction error')
# filename = './output/regressor_result.png'
# plt.savefig(filename)
plt.show()
# # big_errors.index.tolist()
#
# # # dope graphics -- NEED LABELS
# dot_data = tree.export_graphviz(reg, feature_names = labels,
# filled=True,
# special_characters=True)
# graph = graphviz.Source(dot_data)
# graph.format = 'png'
# # graph = pydotplus.graph_from_dot_data(dot_data)
# # graph.render("./output/salary-predict", view=True) #plt.show
return rmse
def clf_eval(clf_tree, y_test, y_pred):
acc_score =accuracy_score(y_test, y_pred)
# print('accuracy score (train set): {}'.format(accuracy_score(y_train, y_pred_train_clf)))
print('accuracy score (test set): {}'.format(acc_score))
# cnf_matrix = confusion_matrix(y_test_np, y_test_np)
cnf_matrix = confusion_matrix(y_test, y_pred)
# pdb.set_trace()
plt.figure()
plt.matshow(cnf_matrix)
plt.title('Confusion matrix of the classifier')
plt.colorbar()
filename = './output/confusion_matrix.png'
plt.savefig(filename)
return acc_score