-
Notifications
You must be signed in to change notification settings - Fork 0
/
test script1.py
56 lines (43 loc) · 2.06 KB
/
test script1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
from sklearn.model_selection import train_test_split
import preprocessing as pre
import pickle as pick
import pandas as pd
# regression
regression_data = pd.read_csv("megastore-regression-dataset.csv")
selected_features = pick.load(open('models/features.pkl', 'rb'))
print(regression_data.dtypes)
Y = regression_data.loc[:, 'Profit']
X = regression_data.drop('Profit', axis=1)
X = pre.pre_processing(X)
X = X[selected_features.columns]
X_test_num, X_test_cat = pre.numerical_Categorical(X)
statistics = pick.load(open('models/statistics1.pkl', 'rb'))
# handel Na values
skew_values = X_test_num.skew()
for column in X_test_num.columns[X_test_num.isnull().any()]:
skewness = skew_values[column]
if abs(skewness) < 0.5: # Assuming a skewness threshold of 0.5
X[column].fillna(statistics.at[column, 'mean'], inplace=True)
else:
X[column].fillna(statistics.at[column, 'median'], inplace=True)
for col in X_test_cat.columns:
X[col].fillna(statistics.at[col, 'mode'])
if abs(Y.skew()) < 0.5: # Assuming a skewness threshold of 0.5
Y.fillna(statistics.at['Profit', 'mean'], inplace=True)
else:
Y.fillna(statistics.at['Profit', 'median'], inplace=True)
for col in X_test_cat:
encoder = pick.load(open('encoders/' + col + '.sav', 'rb'))
X.loc[:, col] = encoder.transform(X.loc[:, col])
poly_features = pick.load(open("models/Polynomial.sav", "rb"))
poly_model = pick.load(open("models/poly_model.sav", "rb"))
print("Polynomial model Score : ", poly_model.score(poly_features.transform(X), Y))
random_forest_reg = pick.load(open("models/random_forest.sav", "rb"))
multivariable = pick.load(open("models/multivariable.sav", "rb"))
elasticNet_model = pick.load(open("models/elasticNet_model.sav", "rb"))
print("random_forest_reg model Score : ", random_forest_reg.score(X, Y))
print("multivariable model Score : ", multivariable.score(X, Y))
print("elasticNet model Score : ", elasticNet_model.score(X, Y))
print("\n")
# ################################################################################################################## #