-
Notifications
You must be signed in to change notification settings - Fork 0
/
rf_tune.py
153 lines (124 loc) · 4.69 KB
/
rf_tune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
##rf_tune.py
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from tree_evaluation import reg_eval,clf_eval
def find_n_estimators(treetype, tree, x_train, x_test, y_train, y_test):
print('finding best number of estimators....')
print('-----------------------------------------')
# estimators = np.logspace(1, 4, 10).astype(int)
estimators = np.linspace(1,2000,10).astype(int)
rmse_train= []
rmse_test= []
if treetype == 'reg':
maketree = RandomForestRegressor
elif treetype == 'clf':
maketree = RandomForestClassifier
else:
print('wrong tree type')
exit()
for est in estimators:
rf = maketree(n_estimators = est)
rf.fit(x_train, y_train)
y_pred_train = rf.predict(x_train)
rmse_train.append(reg_eval(rf, y_train, y_pred_train))
y_pred_test = rf.predict(x_test)
rmse_test.append(reg_eval(rf, y_test, y_pred_test))
plt.figure()
x = range(0,len(estimators))
plt.plot(estimators,rmse_train, 'b', label='train rmse')
plt.plot(estimators,rmse_test, 'r', label='test rmse')
plt.legend()
plt.xlabel('Number of estimators')
plt.ylabel('RMSE error')
# plt.show()
filename = './output/rf_tuning/n_estimators_opt_'+ treetype+'.png'
plt.savefig(filename)
def find_max_depth(treetype, tree, x_train, x_test, y_train, y_test):
print('finding maximum depth....')
print('-----------------------------------------')
depth_list = np.linspace(1,50,25).astype(int)
rmse_train= []
rmse_test= []
if treetype == 'reg':
maketree = RandomForestRegressor
elif treetype == 'clf':
maketree = RandomForestClassifier
else:
print('wrong tree type')
exit()
for depth in depth_list:
rf = maketree(max_depth = depth)
rf.fit(x_train, y_train)
y_pred_train = rf.predict(x_train)
rmse_train.append(reg_eval(rf, y_train, y_pred_train))
y_pred_test = rf.predict(x_test)
rmse_test.append(reg_eval(rf, y_test, y_pred_test))
plt.figure()
plt.plot(depth_list,rmse_train, 'b', label='train rmse')
plt.plot(depth_list,rmse_test, 'r', label='test rmse')
plt.legend()
plt.xlabel('Maximum Depth')
plt.ylabel('RMSE error')
# plt.show()
filename = './output/rf_tuning/Maximum_depth_opt_'+ treetype+'.png'
plt.savefig(filename)
def find_min_sample_split(treetype, tree, x_train, x_test, y_train, y_test):
print('finding minimum sample split...')
print('-----------------------------------------')
how_many_split = np.linspace(0.1, 1.0, 10)
rmse_train= []
rmse_test= []
if treetype == 'reg':
maketree = RandomForestRegressor
elif treetype == 'clf':
maketree = RandomForestClassifier
else:
print('ERROR:: wrong tree type')
exit()
for split in how_many_split:
rf = maketree(min_samples_split = split)
rf.fit(x_train, y_train)
y_pred_train = rf.predict(x_train)
rmse_train.append(reg_eval(rf, y_train, y_pred_train))
y_pred_test = rf.predict(x_test)
rmse_test.append(reg_eval(rf, y_test, y_pred_test))
plt.figure()
plt.plot(how_many_split,rmse_train, 'b', label='train rmse')
plt.plot(how_many_split,rmse_test, 'r', label='test rmse')
plt.legend()
plt.xlabel('Minimum Sample Split')
plt.ylabel('RMSE error')
# plt.show()
filename = './output/rf_tuning/min_split_'+ treetype+'.png'
plt.savefig(filename)
def find_max_features(treetype, tree, x_train, x_test, y_train, y_test):
print('finding maximum features...')
print('-----------------------------------------')
how_many_features = np.linspace(1,np.shape(x_train)[1],np.shape(x_train)[1]).astype(int)
rmse_train= []
rmse_test= []
if treetype == 'reg':
maketree = RandomForestRegressor
elif treetype == 'clf':
maketree = RandomForestClassifier
else:
print('ERROR:: wrong tree type')
exit()
for ftr in how_many_features:
rf = maketree(max_features = ftr)
rf.fit(x_train, y_train)
y_pred_train = rf.predict(x_train)
rmse_train.append(reg_eval(rf, y_train, y_pred_train))
y_pred_test = rf.predict(x_test)
rmse_test.append(reg_eval(rf, y_test, y_pred_test))
plt.figure()
plt.plot(how_many_features,rmse_train, 'b', label='train rmse')
plt.plot(how_many_features,rmse_test, 'r', label='test rmse')
plt.legend()
plt.xlabel('Maximum Number of Features')
plt.ylabel('RMSE error')
# plt.show()
filename = './output/rf_tuning/max_features_'+ treetype+'.png'
plt.savefig(filename)