-
Notifications
You must be signed in to change notification settings - Fork 0
/
TimeSeriesAnalysis.R
157 lines (113 loc) · 3.69 KB
/
TimeSeriesAnalysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#Load data using read.csv
#install required packages
hotelData=read.csv('hotel_bookings.csv')
head(hData)
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(pROC))
library(lubridate)
#Time series Analysis with Seasonal Component using MLR
#Filter Bookings which are checkout and group by month
hdd = hotelData %>%
filter(reservation_status=='Check-Out') %>%
group_by(reservation_status_date,arrival_date_month )%>%
summarise(n=n())
hdd$reservation_status_date = as.Date(hdd$reservation_status_date)
hd = hdd %>% group_by(Date=floor_date(reservation_status_date, "month")) %>%
summarise(NumberOfBookings=sum(n)) %>%
mutate(Month = month(Date)) %>%
add_column(Timeperiod = 0 : 26)
hd$Month = as.factor(hd$Month)
hd = hd[-27,]
hd = hd[,c(1,4,3,2)]
#Linear Regression Model building for TS forecast
#Model1
Qmodel1 = glm(NumberOfBookings ~ Timeperiod + Month , data=hd)
summary(Qmodel1)
layout(matrix(c(1,2,3,4),2,2))
plot(Qmodel1)
#Model2 : Quadratic Non Linear Regression
hd$Timeperiod2 = hd$Timeperiod*hd$Timeperiod
Qmodel2 = glm(NumberOfBookings ~ Timeperiod + Timeperiod2 + Month , data=hd)
summary(Qmodel2)
layout(matrix(c(1,2,3,4),2,2))
plot(Qmodel2)
#Validaton of the model with testing and training
library(caret)
install.packages("boot")
install.packages("carData")
library(boot)
library(carData)
library(car)
set.seed(4)
n=nrow(hd)
shuffled=hd[sample(n),]
train=shuffled[1:round(0.85 * n),]
test = shuffled[(round(0.85 * n) + 1):n,]
Qmodel2 = glm(NumberOfBookings ~ Timeperiod + Timeperiod2 + Month , data=hd)
#Prediction
prediction=predict.lm(Qmodel2,newdata=test)
prediction
test$NumberOfBookings
#Compute metrics R2, RMSE, MAE
R2(prediction, test$NumberOfBookings)
RMSE(prediction, test$NumberOfBookings)
MAE(prediction, test$NumberOfBookings)
"""
> prediction
1 2 3 4
3375.965 3651.104 3034.035 2264.035
> test$NumberOfBookings
[1] 3216 3348 3194 2233
>
> R2(prediction, test$NumberOfBookings)
[1] 0.9012691
> RMSE(prediction, test$NumberOfBookings)
[1] 189.7452
> MAE(prediction, test$NumberOfBookings)
[1] 163.5173
>
"""
#TIME SERIES ANALYSIS WITH FORECASTS MODELS
#Filter Hotel Data with reservation status and date
hdd = hotelData %>%
filter(reservation_status=='Check-Out') %>%
group_by(reservation_status_date,arrival_date_month )%>%
summarise(n=n())
hdd$reservation_status_date = as.Date(hdd$reservation_status_date)
hd = hdd %>% group_by(Date=floor_date(reservation_status_date, "month")) %>%
summarise(NumberOfBookings=sum(n))
hd = hd[-27,]
ggplot(hd, aes(Date,NumberOfBookings)) + geom_line()
#Create TimeSeries for seasonal data
#hs = hotel data seasonal
n = length(hd$NumberOfBookings)
l = 2
hs = ts(hd$NumberOfBookings, start=c(2015, 7), end=c(2017,8), frequency= 12)
trainhs = ts(hd$NumberOfBookings[1: (n-l)], start=c(2015, 7) ,frequency= 12)
tesths = ts(hd$NumberOfBookings[(n-l+1) : n], end=c(2017,8) ,frequency= 12)
#Test for stationary time series
library(tseries)
adf.test(hs)
kpss.test(hs)
#See the components of time series
components = stl(hs, 'periodic')
plot(components)
library("forecast")
#Model 1 using auto.arima
Afit = auto.arima(hs, trace=TRUE)
checkresiduals(Afit)
Aforecast = forecast(Afit)
accuracy(Aforecast)
Aforecast
plot(forecast(auto.arima(hs)), sub = "Simple plot to forecast")
#Model 2 using HoltWinters
Hfit = HoltWinters(hs ,beta=TRUE, gamma=TRUE)
Hfit$fitted
checkresiduals(Hfit)
Hforecast = forecast(Hfit, h=8)
accuracy(Hforecast)
Hforecast
plot(Hforecast)
#Validation of the models
accuracy (Aforecast)
accuracy (Hforecast)