-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patha1_p1_rf.py
58 lines (49 loc) · 1.82 KB
/
a1_p1_rf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# -*- coding: utf-8 -*-
"""A1_p1_rf.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1mNWCMOVkL0KnFSdxLnKT-PBqaEvja8iY
"""
# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
#I put the data into a csv file, then upload and read the csv file
from google.colab import files
uploaded = files.upload()
data = pd.read_csv('testdata.csv', names=['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldspeak','slope','ca','thal','num'])
#split dataset into training set and test set
from sklearn.model_selection import train_test_split
#extrac data and print x and y
x = data.iloc[:,0:13]
y = data.iloc[:,13]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)
#used to find the best bumber of trees
import matplotlib.pyplot as plt
from matplotlib.pyplot import*
test=[]
for i in range(150):
rfc = RandomForestClassifier(n_estimators=i+10
,random_state=20
)
rfc = rfc.fit(x_train,y_train)
score = rfc.score(x_test, y_test)
test.append(score)
plt.plot(range(1,151),test,color="red",label="number_trees")
plt.legend()
plt.show()
rfc = RandomForestClassifier(n_estimators=50
,random_state=20
,max_depth=6
,min_samples_leaf=14
,min_samples_split=19
,max_features=8
)
rfc = rfc.fit(x_train,y_train)
score_r = rfc.score(x_test,y_test)
print("Random Forest:{}".format(score_r))