-
Notifications
You must be signed in to change notification settings - Fork 1
/
random_forest.py
59 lines (45 loc) · 1.85 KB
/
random_forest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import argparse
import pickle
import os
import numpy as np
from polyaxon.tracking import Run
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from my_project.data import load_data
# Polyaxon
experiment = Run()
def model(X, y, n_estimators, max_features, min_samples_leaf):
classifier = RandomForestClassifier(n_estimators=n_estimators,
max_features=max_features,
min_samples_leaf=min_samples_leaf)
return cross_val_score(classifier, X, y, cv=5), classifier
parser = argparse.ArgumentParser()
parser.add_argument('--n_estimators', type=int, default=3)
parser.add_argument('--max_features', type=int, default=3)
parser.add_argument('--min_samples_leaf', type=int, default=80)
args = parser.parse_args()
(X, y) = load_data()
# Polyaxon
# https://polyaxon.com/docs/experimentation/tracking/module/#log_data_ref
experiment.log_data_ref('dataset_X', content=X)
experiment.log_data_ref('dataset_y', content=y)
accuracies, classifier = model(X=X,
y=y,
n_estimators=args.n_estimators,
max_features=args.max_features,
min_samples_leaf=args.min_samples_leaf)
accuracy_mean, accuracy_std = (np.mean(accuracies), np.std(accuracies))
values, counts = np.histogram(accuracies)
# Polyaxon
experiment.log_metrics(accuracy_mean=accuracy_mean,
accuracy_std=accuracy_std)
for step in range(accuracies.size):
experiment.log_metrics(accuracy=accuracies[step], step=step)
outpath = os.path.join(experiment.get_outputs_path(), 'model.pkl')
with(open(outpath, 'wb')) as outfile:
pickle.dump(classifier, outfile)
experiment.log_model(
outpath,
name='top cross validation model',
framework='sklearn'
)