-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathgenetic_other.py
58 lines (49 loc) · 2.53 KB
/
genetic_other.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestRegressor
from boruta import BorutaPy
SEED = 2018
random.seed(SEED)
np.random.seed(SEED)
import warnings
warnings.filterwarnings("ignore")
#==============================================================================
# Data
#==============================================================================
dataset = load_boston()
X, y = dataset.data, dataset.target
features = dataset.feature_names
#==============================================================================
# CV MSE before feature selection
#==============================================================================
est = LinearRegression()
score = -1.0 * cross_val_score(est, X, y, cv=5, scoring="neg_mean_squared_error")
print("CV MSE before feature selection: {:.2f}".format(np.mean(score)))
#==============================================================================
# CV MSE after feature selection: RFE
#==============================================================================
rfe = RFECV(est, cv=5, scoring="neg_mean_squared_error")
rfe.fit(X, y)
score = -1.0 * cross_val_score(est, X[:,rfe.support_], y, cv=5, scoring="neg_mean_squared_error")
print("CV MSE after RFE feature selection: {:.2f}".format(np.mean(score)))
#==============================================================================
# CV MSE after feature selection: Feature Importance
#==============================================================================
rf = RandomForestRegressor(n_estimators=500, random_state=SEED)
rf.fit(X, y)
support = rf.feature_importances_ > 0.01
score = -1.0 * cross_val_score(est, X[:,support], y, cv=5, scoring="neg_mean_squared_error")
print("CV MSE after Feature Importance feature selection: {:.2f}".format(np.mean(score)))
#==============================================================================
# CV MSE after feature selection: Boruta
#==============================================================================
rf = RandomForestRegressor(n_estimators=500, random_state=SEED)
boruta = BorutaPy(rf, n_estimators='auto')
boruta.fit(X, y)
score = -1.0 * cross_val_score(est, X[:,boruta.support_], y, cv=5, scoring="neg_mean_squared_error")
print("CV MSE after Boruta feature selection: {:.2f}".format(np.mean(score)))