-
Notifications
You must be signed in to change notification settings - Fork 0
/
ex8_2_1.R
executable file
·69 lines (55 loc) · 2.08 KB
/
ex8_2_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# exercise 8.2.1
rm(list=ls())
graphics.off()
source("setup.R")
# Load data
library(R.matlab)
dat <- readMat(file.path('Data', 'synth6.mat'))
X <- dat$X
N <- dat$N
attributeNames <- as.vector(unlist(dat$attributeNames))
M <- dat$M
y <- dat$y
C <- dat$C
classNames <- as.vector(unlist(dat$classNames))
# substitute spaces with dots to make handling of columns in data matrix easier
attributeNames <- gsub(' ', '.', attributeNames)
## Fit model using bootstrap aggregation (bagging)
# Number of rounds of bagging
L = 100;
# Variable for model parameters
#make a empty list
w_est = vector('list', L)
# Weights for selecting samples in each bootstrap
weights = rep(1, times=N)/N;
(fmla <- as.formula(paste("y_train ~ ", paste(attributeNames, collapse= "+"))))
# For each round of bagging
for(l in 1:L){
# Choose data objects by random sampling with replacement
i = discreternd(weights, N);
# Extract training set
X_train = X[i, ];
y_train = y[i];
X_traindf <- data.frame(X_train)
colnames(X_traindf) <- attributeNames
# Fit logistic regression model to training data and save result
w_est[[l]] = glm(fmla, family=binomial(link="logit"), data=X_traindf);
}
Xdf <- data.frame(X)
# Xdf <- data.frame(X[,])
colnames(X) <- attributeNames
# Evaluate the logistic regression on the training data
plist <- lapply(w_est, FUN=function(model, newd, typ) predict(model, newdata=newd, type=typ), newd=Xdf, typ='response')
p <- matrix(unlist(plist), nrow=N, byrow=FALSE)
# Estimated value of class labels (using 0.5 as threshold) by majority voting
y_est = rowSums(p>.5)>L/2;
# Compute error rate
ErrorRate = sum(y!=y_est)/N;
print(paste('Error rate: ', ErrorRate*100, '%', sep=''));
## Plot decision boundary
predictionFunction <- function(Xgriddf, model, typ){
plist <- lapply(w_est, FUN=function(model, newd, typ) predict(model, newdata=newd, type=typ), newd=Xgriddf, typ=typ)
p <- matrix(unlist(plist), nrow=dim(Xgriddf)[1], byrow=FALSE)
rowMeans(p)
}
dbplot(X, attributeNames, predictionFunction, y=y, contourLevels=0.5, contourCols='white', model=w_est, typ='response')