-
Notifications
You must be signed in to change notification settings - Fork 195
/
readData.r
355 lines (273 loc) · 9.07 KB
/
readData.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
# readData.r
#
# author: E. Zivot
# created: Sept 24, 2008
# revised: October 21, 2008
#
# comments
# 1. use packages zoo and xts for representing irregularly spaced time series
# note: only need to load xts since xts requires zoo
#
# R functions used
#
# as.Date() coerce to Date object
# as.numeric() coerce to numeric object
# class() return or set class of object
# colnames() extract column names
# format() format output
# head() show fist few rows of data object
# library() load package
# read.csv() read comma separated file into R
# seq() create sequence
# tail() show last few rows of data object
#
# R package zoo function sued
#
# read.zoo() read text file data and create zoo object
# zoo() create zoo object
#
# load the zoo and xts packages
# note: xts requires zoo
#
library("xts")
#
# read .csv files containing Yahoo! monthly adjusted closing price data on sbux
# and msft from March, 1993 through March 2008. The files sbuxPrices.csv and
# msftPrices are assumed to be in the directory C:\classes\econ424\fall2008
#
# read the sbux prices
sbux.df = read.csv("C:/classes/econ424/fall2008/sbuxPrices.csv",
header=TRUE, stringsAsFactors=FALSE)
# sbux.df is a data.frame object
class(sbux.df)
head(sbux.df)
colnames(sbux.df)
class(sbux.df$Date)
class(sbux.df$Adj.Close)
# now read the msft prices
msft.df = read.csv("C:/classes/econ424/fall2008/msftPrices.csv",
header=TRUE, stringsAsFactors=FALSE)
# notice how dates are not the end of month dates. We will fix that below
# find indices associated with the dates 3/1/1994 and 3/1/1995
which(sbux.df$Date == "3/1/1994")
which(sbux.df$Date == "3/1/1995")
# extract prices between 3/1/1994 and 3/1/1995
sbux.df[13:25,]
#
# representing time series as ts objects
#
# create ts object
# this is appropriate since monthly data is equally (regularly) spaced
?ts
sbux.ts = ts(data=sbux.df$Adj.Close, frequency = 12,
start=c(1993,3), end=c(2008,3))
class(sbux.ts)
str(sbux.ts)
msft.ts = ts(data=msft.df$Adj.Close, frequency = 12,
start=c(1993,3), end=c(2008,3))
# special functions for ts objects
head(sbux.ts)
start(sbux.ts)
end(sbux.ts)
frequency(sbux.ts)
deltat(sbux.ts)
time(sbux.ts)
tsp(sbux.ts)
# note: dates are not displayed with ts objects
#
# there are only a few methods implemented for ts objects and one cannot
# subset a ts object using dates. In general, subsetting creates a vector or matrix
#
# subset first 5 obvs. Extracted subset is not a ts object!
tmp = sbux.ts[1:5]
class(tmp)
tmp
# subsetting using the window() command retains the ts object information
?window
# extract first 5 obvs. using window
tmp = window(sbux.ts, start=c(1993, 3), end=c(1993,8))
class(tmp)
tmp
# merging ts objects
sbuxmsft.ts = cbind(sbux.ts, msft.ts)
class(sbuxmsft.ts)
window(sbuxmsft.ts, start=c(1993, 3), end=c(1993,7))
# plot the time series
plot(sbux.ts, col="blue", lwd=2, ylab="Adjusted close",
main="Monthly closing price of SBUX")
# add a legend
# note the x coordinate is in units of time and the y coordinate is in the
# units of the data
legend(1995,30, legend="SBUX", lty=1, lwd=2, col="blue")
# plot the last 8 years
plot(window(sbux.ts, start=c(2000,3), end=c(2008,3)), ylab="Adjusted close",
col="blue", lwd=2, main="Monthly closing price of SBUX")
# plot multiple time series
plot(sbuxmsft.ts)
plot(sbuxmsft.ts, plot.type="single",
main="Monthly closing prices on SBUX and MSFT",
ylab="Adjusted close price",
col=c("blue", "red"), lty=1:2)
legend(1995, 45, legend=c("SBUX","MSFT"), col=c("blue", "red"), lty=1:2)
# manipulating ts data
# lags
# notice how the lag shifts the time series forward
lag(sbux.ts)
lag(sbux.ts, k=12)
cbind(sbux.ts, lag(sbux.ts))
# to get usual lag, use negative k
lag(sbux.ts, k=-1)
lag(sbux.ts, k=-12)
cbind(sbux.ts, lag(sbux.ts, k=-1))
# differences
diff(sbux.ts)
diff(sbux.ts, lag=12)
cbind(sbux.ts, diff(sbux.ts))
# compute simple returns
n = length(sbux.ts)
tmp = (sbux.ts[2:n] - sbux.ts[1:(n-1)])/sbux.ts[1:(n-1)]
tmp2 = diff(sbux.ts)/lag(sbux.ts, k=-1)
sbuxRetSimple.ts = diff(sbux.ts)/lag(sbux.ts, k=-1)
msftRetSimple.ts = diff(msft.ts)/lag(msft.ts, k=-1)
window(cbind(sbuxRetSimple.ts, msftRetSimple.ts),
start=c(1993,4), end=c(1993,7))
# 12-period simple return
diff(sbux.ts, lag=12)/lag(sbux.ts, k=-12)
# compute cc returns
sbuxRet.ts = diff(log(sbux.ts))
msftRet.ts = diff(log(msft.ts))
window(cbind(sbuxRet.ts, msftRet.ts), start=c(1993,4), end=c(1993,7))
# 12-period cc return
diff(log(sbux.ts), lag=12)
#
# representing time series as zoo objects
#
?zoo
#
# Date class
# Internally, Date objects are represented as the number of days since
# January 1, 1970. Default date format is YYYY/m/d or YYYY-m-d
# main functions
# as.Date() coerce character string to Date object
# class() convert integer to Date object
# format()
my.date = as.Date("1970/1/1")
my.date
class(my.date)
as.numeric(my.date)
# use the format argument to specify in the input format
as.Date("1/1/1970", format="%m/%d/%Y")
as.Date("January 1, 1970", format="%B %d, %Y")
as.Date("01JAN70", format="%d%b%y")
# change the display format of a Date object
my.date
format(my.date, "%m/%d/%Y")
# convert integer variable to Date object
my.date = 0
class(my.date) = "Date"
my.date
# extract date components
weekdays(my.date)
months(my.date)
quarters(my.date)
# create a sequence of dates form March 1993 through March 2003
# dates are represented as year/month/day
td = seq(as.Date("1993/3/1"), as.Date("2008/3/1"), "months")
class(td)
head(td)
# alternatively, coerce the character dates to Date objects
# note: not the same since td starts every month on the first
# day of the month.
td2 = as.Date(sbux.df$Date, format="%m/%d/%Y")
head(td2)
# note: simple date arithmetic is available
td[2] - td[1]
#
# create zoo object from time index and data
#
sbux.z = zoo(x=sbux.df$Adj.Close, order.by=td)
msft.z = zoo(x=msft.df$Adj.Close, order.by=td)
class(sbux.z)
str(sbux.z)
head(sbux.z)
# extract time index and data
index(sbux.z)
coredata(sbux.z)
# show start and end dates
start(sbux.z)
end(sbux.z)
# zoo objects have certain advantages over ts objects
# subsetting does not strip away the object information
sbux.z[1:5]
class(sbux.z[1:5])
# subsetting can be done with a date index
sbux.z[as.Date(c("2003/3/1", "2004/3/1"))]
# subset using window
window(sbux.z, start=as.Date("2003/3/1"), end=as.Date("2004/3/1"))
# merge sbux.z and msft.z. Use cbind() when zoo objects have the same
# time index and use merge() when they have possibly different indices
sbuxmsft.z = cbind(sbux.z, msft.z)
class(sbuxmsft.z)
head(sbuxmsft.z)
# Change the date index to an object of class yearmon. This is more
# appropriate for monthly time series
# index(sbux.z) = as.yearmon(index(sbux.z))
# sbux.z
# plotting zoo objects
# plot one series at a time and add a legend
plot(sbux.z, col="blue", lty=1, lwd=2, ylim=c(0,50))
lines(msft.z, col="red", lty=2, lwd=2)
legend(x="topleft", legend=c("SBUX","MSFT"), col=c("blue","red"), lty=1:2)
# plot multiple series at once
plot(sbuxmsft.z, plot.type="single", col=c("blue","red"), lty=1:2, lwd=2)
legend(x="topleft", legend=c("SBUX","MSFT"), col=c("blue","red"), lty=1:2)
# import data directly using read.zoo()
?read.zoo
sbux.z2 = read.zoo("C:/classes/econ424/fall2008/sbuxPrices.csv",
format="%m/%d/%Y", sep=",", header=T)
# convert index to yearmon
index(sbux.z2) = as.yearmon(index(sbux.z2))
head(sbux.z2)
# convert ts object to zooreg object
# note: as.zoo() automatically creates zooreg object from ts object
sbux.zr = as.zoo(sbux.ts)
class(sbux.zr)
str(sbux.zr)
head(sbux.zr)
# extract the core data
coredata(sbux.zr)
# extract the time index
index(sbux.zr)
class(index(sbux.zr))
# zooreg objects have certain advantages over ts objects
# subsetting does not strip away the object information
sbux.zr[1:5]
class(sbux.zr[1:5])
# one can subset using date index information
# but with numeric index must use I() function
sbux.zr[I(c(1993.250, 1994.250, 1995.250))]
# It is better to have a Date or POSIXct object as the date index
# coerce date index to class "Date"
index(sbux.zr) <- as.Date(sbux.ts)
str(sbux.zr)
head(sbux.zr)
# now subscript with Date index
sbux.zr[as.Date(c("2003-03-01", "2004-03-01"))]
#
# importing data from Yahoo!
#
# install and load the tseries package
library(tseries)
help("tseries")
# get daily data for sbux between 3/1/1993 and 3/1/2008
SBUX.z = get.hist.quote(instrument="sbux", start="1993-03-01",
end="2008-03-01", quote="AdjClose",
provider="yahoo", origin="1970-01-01",
compression="d", retclass="zoo")
class(SBUX.z)
class(index(SBUX.z))
head(SBUX.z)
start(SBUX.z)
end(SBUX.z)
plot(SBUX.z, main="Daily closing prices on SBUX", ylab="Adjusted close price",
col="blue")