a1_p1_rf.py

# -*- coding: utf-8 -*-
"""A1_p1_rf.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1mNWCMOVkL0KnFSdxLnKT-PBqaEvja8iY
"""

# Commented out IPython magic to ensure Python compatibility.
# %matplotlib inline
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

#I put the data into a csv file, then upload and read the csv file
from google.colab import files
uploaded = files.upload()

data = pd.read_csv('testdata.csv', names=['age','sex','cp','trestbps','chol','fbs','restecg','thalach','exang','oldspeak','slope','ca','thal','num'])

#split dataset into training set and test set
from sklearn.model_selection import train_test_split
#extrac data and print x and y
x = data.iloc[:,0:13]
y = data.iloc[:,13]
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2)

#used to find the best bumber of trees
import matplotlib.pyplot as plt
from matplotlib.pyplot import*
test=[]
for i in range(150):
  rfc = RandomForestClassifier(n_estimators=i+10
                    ,random_state=20
                  )
  rfc = rfc.fit(x_train,y_train)
  score = rfc.score(x_test, y_test)
  test.append(score)
plt.plot(range(1,151),test,color="red",label="number_trees")
plt.legend()
plt.show()

rfc = RandomForestClassifier(n_estimators=50
                ,random_state=20
                ,max_depth=6
                ,min_samples_leaf=14
                ,min_samples_split=19
                ,max_features=8
                )
rfc = rfc.fit(x_train,y_train)

score_r = rfc.score(x_test,y_test)
print("Random Forest:{}".format(score_r))