-
Notifications
You must be signed in to change notification settings - Fork 16
/
Copy path3b2-DT-CART-R-sales.R
51 lines (40 loc) · 1.3 KB
/
3b2-DT-CART-R-sales.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# CART Models - HH Case Study - Regression
library(rpart)
library(rpart.plot)
library(forecast)
library(gsheet)
url='https://docs.google.com/spreadsheets/d/1PWWoMqE5o3ChwJbpexeeYkW6p4BHL9hubVb1fkKSBgA/edit#gid=1941519952'
data = as.data.frame(gsheet2tbl(url))
str(data)
# Summarize the dataset
summary(data)
names(data)
# Random Sampling
set.seed(777) # To ensure reproducibility
Index = sample(x = 1:nrow(data), size = 0.7*nrow(data))
Index
# Create Train dataset
train= data[Index, ]
nrow(train)
# Create Test dataset
test = data[-Index, ]
nrow(test)
nrow(test) + nrow(train)
########################### Modeling #################################
trainModel = rpart(Annual_Sales ~ . , data = train[,-1], method = "anova")
trainModel
mean(train$Annual_Sales)
# Plot the Regression Tree
rpart.plot(trainModel, type = 4,fallen.leaves = T, cex = 1.0, nn=T)
#cp selection
printcp(trainModel)
trainModel_prune = prune(trainModel, cp=0.01)
rpart.plot(trainModel_prune)
#Predict and check accuracy
predictSales_test = predict(trainModel_prune, newdata = test, type = "vector")
predictSales_test #vector to print values of sales predicted
library(forecast)
# Validate RMSE and MAPE calculation with a function in R
ModelAccuarcy = accuracy(predictSales_test, test$Annual_Sales)
ModelAccuarcy
#RMSE should be as less as possible