week3_assi2_sol1_Dhar.r

# # -*- coding: utf-8 -*-
# """Week3 Assi2 Sol1.ipynb
# 
# Automatically generated by Colaboratory.
# 
# Original file is located at
#     https://colab.research.google.com/drive/1eOEr1XyJCE2RyKhWrOZoMupIwpIulA4o
# """
###########################################################################
## Week-4, Homework-2, Sol-1
## Sreya Dhar 
## Created: Sept 20, 2020
## Edited: Sept 27, 2020
###########################################################################

## installing all the libaries in R kernel
# 
# install.packages("corrplot")
# install.packages("forecast")
# install.packages("zoo")
# install.packages("rsample")
# install.packages("leaps")
# install.packages("car")
# install.packages("caret")
# install.packages("ROCR")
# install.packages("PerformanceAnalytics")
# install.packages("funModeling")
# install.packages("hrbrthemes")

rm(list = ls())
## importing the libraries in R kernel

library(ggplot2)
library(dplyr)
library(tidyverse)
library(tidyr)
library(corrplot)
library(repr)
library(ggplot2)
library(reshape2)
library(forecast)
library(zoo)
library(rsample)
library(ROCR)
library(class)
library(readr)
library(rsample) 
library(leaps)
library(car)
library(PerformanceAnalytics)
library(funModeling)
library(caret)
library(hrbrthemes)


# Set working directory to where data file is located
setwd("C:/File E/EAS 506 Statistical Mining I/Week 3/Assignment-2")

## upload the dataset
data_nut <- read.csv("cereal.csv",  header = TRUE)

## some exploratory data analysis for visualization

head(data_nut)

names(data_nut)

glimpse(data_nut)

data_nut_C <- data_nut[,-1]
glimpse(data_nut_C)

status(data_nut_C)

data_nut_n <- data_nut_C %>% mutate_if(is.factor, as.numeric) 
profiling_num(data_nut_n)

glimpse(data_nut_n)

status(data_nut_n)

## plotting the correlation values on chart matrix which also combined with histogram and scatter plots of different features.
options(repr.plot.width=10, repr.plot.height=10, repr.plot.res = 200)
chart.Correlation(data_nut_n, histogram=TRUE, pch=15)

plot_num(data_nut_n)

describe(data_nut_n)

summary(data_nut_n)

head(data.matrix(data_nut_n))

options(repr.plot.width=7, repr.plot.height=7, repr.plot.res = 200)
pairs(data_nut_n, main = "Pairwise plot")

# heatmap and correlation matrix 
options(repr.plot.width=6, repr.plot.height=6, repr.plot.res = 200)
data_nut_h <- as.data.frame(scale(data_nut_n,center=TRUE,scale=TRUE))
heatmap.2(as.matrix(data_nut_h), scale = "none", col = bluered(100), trace = "none", density.info = "none")

L <- cor(data_nut_n)
corrplot(L, method = "circle",  type = "lower")

## min-max scaling on boston dataset prior to regression ############################################
max <- apply(data_nut_n , 2 , max)
min <- apply(data_nut_n, 2 , min)
data_nut_s <- as.data.frame(scale(data_nut_n, center = min, scale = max - min))

################### splitting the whole data into train and test sets (75:25) ############################################
data_split <- initial_split(data_nut_s, prop = 0.75) ## spliting the data by library 'rsample'
data_train <- training(data_split)
data_test  <- testing(data_split)

#################################### Linear Regression #################################################
data_lm <- lm(rating~., data= data_train)
summary(data_lm)

options(repr.plot.width=5, repr.plot.height=5, repr.plot.res = 180)
par(mfrow=c(2,2))
plot(data_lm)

# # Other useful functions
coefficients(data_lm) # model coefficients
confint(data_lm, level=0.95) # CIs for predictors
fitted(data_lm) # predicted values
residuals(data_lm) # residuals
anova(data_lm) # anova table
vcov(data_lm) # covariance matrix for variables
influence(data_lm) # linear regression diagnostics

anova(data_lm)['Residuals', 'Mean Sq'] # MSE calculation from anova table
sigma(data_lm) # residual standard deviation

## predict lm model on test set
pred_test <- predict(data_lm, newdata = data_test)
mse_error <- sum((pred_tes) - data_test$rating)^2)/length(data_test$rating) ## mse of test set
c(MSE = mse_error, R2=summary(data_lm)$r.squared)

sqrt(sum((pred_test - data_test$rating)^2)/length(data_test$rating)) ## rmse of test set 

## predict lm model on train set
pred_train_lm <- predict(data_lm, newdata = data_train)
mse_error_tr <- sum((pred_train_lm - data_train$rating)^2)/length(data_train$rating) ## mse of train set
c(MSE = mse_error_tr)

sqrt(sum((pred_train_lm - data_train$rating)^2)/length(data_train$rating)) ## rmse of train set

## MAE error
mean(abs(pred_test- data_test$rating))
mean(abs(pred_train_lm - data_train$rating))

## plot of mse on train and test sets
options(repr.plot.width=8, repr.plot.height=4, repr.plot.res = 200)
par(mfrow=c(1,2))
plot(data_train$rating, pred_train_lm, xlim=c(0,3), ylim=c(0,3), xlab="original rating", ylab="predicted rating on training set", col="blue")
abline(a = 0, b = 1, lty = 2)
plot(data_test$rating, pred_test, xlim=c(0,3), ylim=c(0,3), xlab="original rating", ylab="predicted rating on test set", col="red")
abline(a = 0, b = 1, lty = 2)

################################ Backward subsets selection ####################################
data_back <- regsubsets(rating~., data= data_train, nvmax = 14, method = "backward")
back_sum <- summary(data_back)

# names of the 14 selected variables
back_sum$outmat[14,]

# Structure of the best 9 variable model
back_sum$outmat

# Look at the regression models determined by the different methods
data.frame(coef(data_back,14))

## prediction on train and test set for backward selection
test_error = rep(NA, 14)
train_error = rep(NA, 14)

new_test = model.matrix(rating ~., data=data_test)
new_train = model.matrix(rating ~., data=data_train)

for (i in 1:14){
        coeffs = coef(data_back, id=i)
        pred_te = new_test[,names(coeffs)]%*%coeffs
        pred_tr = new_train[,names(coeffs)]%*%coeffs
        test_error[i] = mean((data_test$rating-pred_te)^2) # predict on test 
        train_error[i] = mean((data_train$rating-pred_tr)^2) # predict on train 
}

## mse plot from train and test prediction 

options(repr.plot.width=6, repr.plot.height=6, repr.plot.res = 250)
plot(test_error, ylim= c(0.1,0.3), col='red', type="b", xlab="subset size", ylab= "MSE from backward selection")
abline(v = which.min(test_error),y = min(test_error)*100, type = "l", col = "red", lwd = 4, lty=2)
lines(train_error, col= "blue", type = "b")
abline(v = which.min(train_error),y = min(train_error)*100, type = "l", col = "blue", lwd = 2, lty=2)
abline(v = which.min(train_error),y = min(train_error)*100, type = "l", col = "blue", lwd = 2, lty=2)

legend(0.3,inset=.02, c("Test Set", "Train Set"), lty= c(1,1), lwd=c(2.5,2.5),col= c("red", "blue"))

#How many variables are needed for the best model fit.
data.frame(
  Adj.R2 = which.max(back_sum$adjr2),
  CP = which.min(back_sum$cp),
  BIC = which.min(back_sum$bic),
  RSS = which.min(back_sum$rss)
  )

## comparison for statistical parameters from backward selection
options(repr.plot.width=6, repr.plot.height=6, repr.plot.res = 200)
## Adjusted R2
par(mfrow = c(2,2))
plot(back_sum$cp, xlab = "Number of Variables", ylab = "Mallow's Cp", type = "l")
points(x= 1:14, y=back_sum$cp, col="red",cex=1,pch=20)
abline(v=which.min(back_sum$cp), y=min(back_sum$cp),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(back_sum$cp), h=min(back_sum$cp),  type = "l", col = "blue", lty = 3) 

plot(back_sum$bic, xlab = "Number of Variables", ylab = "BIC", type = "l")
points(x= 1:14, y=back_sum$bic, col="red",cex=1,pch=20)
abline(v=which.min(back_sum$bic), y=min(back_sum$bic),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(back_sum$bic), h=min(back_sum$bic),  type = "l", col = "blue", lty = 3) 

plot(back_sum$rss, xlab = "Number of Variables", ylab = "RSS", type = "l")
points(x= 1:14, y=back_sum$rss, col="red",cex=1,pch=20)
abline(v=which.min(back_sum$rss), y=min(back_sum$rss),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(back_sum$rss), h=min(back_sum$rss),  type = "l", col = "blue", lty = 3) 

plot(back_sum$adjr2, xlab = "Number of Variables", ylab = "Adjusted R^2", type = "l")
points(x= 1:14, y=back_sum$adjr2, col="red",cex=1,pch=20)
abline(v=which.max(back_sum$adjr2), y=max(back_sum$adjr2),  type = "l", col = "blue", lty = 3) 
abline(x=which.max(back_sum$adjr2), h=max(back_sum$adjr2),  type = "l", col = "blue", lty = 3)

################### performing CV for cross-checking ###################################

set.seed(123) # set seed for unique sampling 
k <- 10 # no. of folds in cv
cv_folds <- sample(1:k, nrow(data_nut_s), replace = TRUE)
cv_errors <- matrix(NA, k, 14, dimnames = list(NULL, paste(1:14)))

predict.regsubsets <- function(object, newdata, id ,...) { ## from lecture slides
  form <- as.formula(object$call[[2]]) 
  mat <- model.matrix(form, newdata)
  coefi <- coef(object, id = id)
  xvars <- names(coefi)
  mat[, xvars] %*% coefi
  }

for(j in 1:k) {
  
  # perform backward subset on rows not equal to j
  cv_subset <- regsubsets(rating ~ ., data_nut_s[cv_folds != j, ], nvmax = 14)
  
  # prediction on test set from cross-validation
  for( i in 1:14) {
    pred_cv <- predict.regsubsets(cv_subset, data_nut_s[cv_folds == j, ], id = i)
    cv_errors[j, i] <- mean((data_nut_s$rating[cv_folds == j] - pred_cv)^2)
    }
  }

mean_cv_errors <- colMeans(cv_errors) # mse on test set in CV
se_cv_errors <- apply(cv_errors, 2, sd)/sqrt(k)

## plot of mse on test set with error bars
par(mfrow = c(1,2))
options(repr.plot.width=8, repr.plot.height=4, repr.plot.res = 200)
plot(mean_cv_errors, type = "l", col="black", xlab= "No. of Variables", ylab="MSE in CV (test set)", ylim=c(0.12,0.24))
points(mean_cv_errors, col="red",cex=1,pch=20)
errbar(1:14, mean_cv_errors, mean_cv_errors+se_cv_errors, mean_cv_errors-se_cv_errors, type="l", xlab= "No. of Variables",ylab="Error bars from CV (test set)", ylim=c(0.12,0.24) )
points(mean_cv_errors, col="red",cex=1,pch=20)

cv_sum<-summary(cv_subset) ## summary of CV

#How many variables are needed for the best model fit.
data.frame(
  Adj.R2 = which.max(cv_sum$adjr2),
  CP = which.min(cv_sum$cp),
  BIC = which.min(cv_sum$bic),
  RSS = which.min(cv_sum$rss)
  )

test_error

## comparison of mse error on test set from backward and CV
options(repr.plot.width=7, repr.plot.height=7, repr.plot.res = 250)
plot(test_error, ylim= c(0.1,0.3), type = "l", col='blue',  xlab="subset size", ylab= "MSE in test set")
points(test_error, col="green",cex=1,pch=20)
abline(v = which.min(test_error),h = min(test_error)*100,  col = "blue",  lwd = 2, lty = 2)

lines(mean_cv_errors, type = "l", col="black", xlab= "No. of Variables", ylab="MSE in cross validation", ylim=c(0.12,0.24))
points(mean_cv_errors, col="red",cex=1,pch=20)
legend(0.3,inset=.02, c("MSE from backward subset", "MSE from CV"), lty= c(1,1), lwd=c(2.0,2.0),col= c("blue", "black"))

abline(v = which.min(mean_cv_errors),y = min(mean_cv_errors)*100, type = "l", col = "black", lwd = 2, lty=2)
# abline(x = which.max(mean_cv_errors),h = max(mean_cv_errors)*100, type = "l", col = "black", lty = 2)


################################### Exhaustive Subsets selection (nbest=100) ##################################

data_all <- regsubsets(rating~., data= data_train, ## from lecture slides
             nbest = 100,       # '100' best model for each number of predictors
             nvmax = NULL,    # NULL for no limit on number of variables
             force.in = NULL, force.out = NULL,
             really.big = TRUE,
             method = "exhaustive")
exh_all <- summary(data_all)
names(exh_all)

head(exh_all$which)

data_all_size <- as.numeric(attr(exh_all$which, "dimnames")[[1]])
data_all_size

length(data_all_size)

options(repr.plot.width=5, repr.plot.height=5, repr.plot.res = 200)
plot(data_all)

all_rss <- exh_all$rss
all_best_rss<- tapply(all_rss, data_all_size, min)
all_best_rss

data_all_size

all_adjr2  <- exh_all$adjr2

exh_all_adjr2 <- data.frame(data_all_size, all_adjr2)
dim(exh_all_adjr2)

#exh_all_adjr2["all_adjr2"]

# Considering intercept only for calculatinng RSS on train data
all_dummy <- lm(rating~1, data_train)
all_dummy_best <- c(sum(resid(all_dummy)^2), all_best_rss)
# 
options(repr.plot.width=6, repr.plot.height=6, repr.plot.res = 200)
par(mfrow = c(1,1))
plot(0:14, all_dummy_best, ylim= c(0,2), type="b", xlab="subset size", ylab= "Residual Sum of Square (train set)", col="black")
points(data_all_size, all_rss, pch = 2, col="red", cex=0.5)

## predicting on train and test set
test_error_ex1 = rep(NA, 1111)
train_error_ex1 = rep(NA, 1111)

new_test_ex1 = model.matrix(rating ~., data=data_test)
new_train_ex1 = model.matrix(rating ~., data=data_train)

for (i in 1:1111){
        coeffs_ex1 = coef(data_all, id=i)
        pred_te_ex1 = new_test_ex1[,names(coeffs_ex1)]%*%coeffs_ex1
        pred_tr_ex1 = new_train_ex1[,names(coeffs_ex1)]%*%coeffs_ex1
        test_error_ex1[i] = mean((data_test$rating-pred_te_ex1)^2) # prediction on test
        train_error_ex1[i] = mean((data_train$rating-pred_tr_ex1)^2) # prediction on train
}

all_adjr2 <- exh_all$adjr2
all_best_adjr2<- tapply(all_adjr2, data_all_size, min)
all_best_adjr2

all_best_mse<- tapply(test_error_ex1, data_all_size, min)
all_best_mse_tr<- tapply(train_error_ex1, data_all_size, min)

## plot of mse on train anad test set 

options(repr.plot.width=7, repr.plot.height=7, repr.plot.res = 200)
par(mfrow = c(1,1))
plot(data_all_size, test_error_ex1, pch = 20, ylim= c(0.0,0.4), col='red',  xlab="subset size", ylab= "MSE from exhaustive model (nbest=100)", cex=0.5)
points(data_all_size, train_error_ex1, pch = 17,col= "blue", cex=0.5)
lines(all_best_mse, ylim= c(0,0.4), type="b",  xlab="subset size", ylab= "MSE of test set", col="red")
lines(all_best_mse_tr, ylim= c(0,0.4), type="b",  xlab="subset size", ylab= "MSE of test set", col="blue")
legend(0.4,inset=.02, c("Test Set (least mse)", "Train Set (least mse)"), lty= c(1,1), lwd=c(2.0,2.0),col= c("red", "blue"))
exh_all_adjr2 <- as.data.frame(exh_all$adjr2)

exh_all_adjr2[706,]

## accuracy prediction fro exhaustive models (nbest=100)
data.frame(
  Adj.R2 = which.max(exh_all$adjr2),
  CP = which.min(exh_all$cp),
  BIC = which.min(exh_all$bic),
  RSS = which.min(exh_all$rss)
  )# data_all_size

par(mfrow = c(1,2))
as.data.frame(exh_all$outmat[706,])
as.data.frame(exh_all$outmat[806,])

options(repr.plot.width=8, repr.plot.height=10, repr.plot.res = 200)
par(mfrow = c(2,2))
plot(data_all, scale = "r2", main = "R^2")
plot(data_all, scale = "adjr2", main = "Adjusted R^2")
plot(data_all, scale = "Cp",main = "Cp" )
plot(data_all, scale = "bic", main = "BIC")

################################### Exhaustive Subsets selection, nvmax=1 ##################################

data_exh <- regsubsets(rating~., data= data_train,
             nbest = 1,       # only 'one' best model for each number of predictors
             nvmax = NULL,    # NULL for no limit on number of variables
             force.in = NULL, force.out = NULL,
             really.big = TRUE,
             method = "exhaustive")
exh_sum <- summary(data_exh)
names(exh_sum)

as.data.frame(exh_sum$outmat)

exh_sum$rsq

coef(data_exh ,14)

#plot of r2 for different models

options(repr.plot.width=4, repr.plot.height=4, repr.plot.res = 200)
exh_r2 <- as.data.frame(exh_sum$rsq)
names(exh_r2) <- "R2"
plot(x= 1:nrow(exh_r2), y=exh_r2[,'R2'],  xlab = "Number of Variables", ylab = "R^2",type="l") 
points(x= 1:nrow(exh_r2), y=exh_r2[,'R2'], col="red",cex=1,pch=20)
abline(v=which.max(exh_r2[,'R2']), y=max(exh_r2['R2']),  type = "l", col = "blue", lty = 3) 
abline(x=which.max(exh_r2[,'R2']), h=max(exh_r2['R2']),  type = "l", col = "blue", lty = 3)

options(repr.plot.width=6, repr.plot.height=6, repr.plot.res = 200)


## Plot Cp, BIC, RSS, Adjusted R2 for ex.model(nbest=100)

par(mfrow = c(2,2))
plot(exh_sum$cp, xlab = "Number of Variables", ylab = "Mallow's Cp", type = "l")
points(x= 1:14, y=exh_sum$cp, col="red",cex=1,pch=20)
abline(v=which.min(exh_sum$cp), y=min(exh_sum$cp),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(exh_sum$cp), h=min(exh_sum$cp),  type = "l", col = "blue", lty = 3) 


plot(exh_sum$bic, xlab = "Number of Variables", ylab = "BIC", type = "l")
points(x= 1:14, y=exh_sum$bic, col="red",cex=1,pch=20)
abline(v=which.min(exh_sum$bic), y=min(exh_sum$bic),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(exh_sum$bic), h=min(exh_sum$bic),  type = "l", col = "blue", lty = 3) 

plot(exh_sum$rss, xlab = "Number of Variables", ylab = "RSS", type = "l")
points(x= 1:14, y=exh_sum$rss, col="red",cex=1,pch=20)
abline(v=which.min(exh_sum$rss), y=min(exh_sum$rss),  type = "l", col = "blue", lty = 3) 
abline(x=which.min(exh_sum$rss), h=min(exh_sum$rss),  type = "l", col = "blue", lty = 3) 

plot(exh_sum$adjr2, xlab = "Number of Variables", ylab = "Adjusted R^2", type = "l")
points(x= 1:14, y=exh_sum$adjr2, col="red",cex=1,pch=20)
abline(v=which.max(exh_sum$adjr2), y=max(exh_sum$adjr2),  type = "l", col = "blue", lty = 3) 
abline(x=which.max(exh_sum$adjr2), h=max(exh_sum$adjr2),  type = "l", col = "blue", lty = 3)

#How many variables are needed for the best model fit.

data.frame(
  Adj.R2 = which.max(exh_sum$adjr2),
  CP = which.min(exh_sum$cp),
  BIC = which.min(exh_sum$bic),
  RSS = which.min(exh_sum$rss)
  )

options(repr.plot.width=8, repr.plot.height=8, repr.plot.res = 200)
par(mfrow = c(2,2))
plot(data_exh, scale = "r2", main = "R^2")
plot(data_exh, scale = "adjr2", main = "Adjusted R^2")
plot(data_exh, scale = "Cp",main = "Cp" )
plot(data_exh, scale = "bic", main = "BIC")

# coefficient output
exh_sum$outmat[9,]
exh_sum$outmat[12,]
exh_sum$outmat[13,]

# variables for best models 
options(repr.plot.width=10, repr.plot.height=5, repr.plot.res = 200)
par(mfrow = c(1,2))
## Adjusted R2
res_adjr <- subsets(data_exh, statistic="adjr2", legend = FALSE, min.size = 5, main = "Adjusted R^2")
## Mallow Cp
res_mcp <- subsets(data_exh, statistic="cp", legend = FALSE, min.size = 5, main = "Mallow Cp")
abline(a = 1, b = 1, lty = 2)

res_adjr ## gives the legend in the previous plots

## prediction on train and test set

test_error_ex = rep(NA, 14)
train_error_ex = rep(NA, 14)

new_test_ex = model.matrix(rating ~., data=data_test)
new_train_ex = model.matrix(rating ~., data=data_train)

for (i in 1:14){
        coeffs_ex = coef(data_exh, id=i)
        pred_te_ex = new_test_ex[,names(coeffs_ex)]%*%coeffs_ex
        pred_tr_ex = new_train_ex[,names(coeffs_ex)]%*%coeffs_ex
        test_error_ex[i] = mean((data_test$rating-pred_te_ex)^2) # prediction on test
        train_error_ex[i] = mean((data_train$rating-pred_tr_ex)^2) # prediction on train
}

options(repr.plot.width=7, repr.plot.height=7, repr.plot.res = 200)
plot(test_error_ex, ylim= c(0.1,0.4), col='red', type="b", xlab="subset size", ylab= "MSE from exhaustive model (nbest=1)")
abline(v = which.min(test_error_ex),y = min(test_error_ex), type = "d", col = "blue", lty=2, lwd=4)

lines(train_error_ex, col= "blue", type = "b")
abline(v = which.min(train_error_ex),y = min(train_error_ex), type = "d", col = "red", lty=2, lwd=2)

legend(0.4,inset=.02, c("Test Set", "Train Set"), lty= c(1,1), lwd=c(2.5,2.5),col= c("red", "blue"))

test_error

test_error_ex

## comparison of mse on test set from the previous considered models. 
options(repr.plot.width=7, repr.plot.height=7, repr.plot.res = 200)
plot(test_error, ylim= c(0.1,0.4), type = "b", col='blue',  xlab="subset size", ylab= "MSE in test set")
#points(test_error, col="green",cex=1,pch=20)
abline(v = which.min(test_error),y = min(test_error)*100, type = "d", col = "blue", lty=2, lwd=4)


lines(mean_cv_errors, type = "b", col="black", xlab= "No. of Variables", ylab="MSE in cross validation", ylim=c(0.12,0.24))
#points(mean_cv_errors, col="red",cex=1,pch=20)
abline(v = which.min(mean_cv_errors),y = min(mean_cv_errors)*100, type = "d", col = "black", lty=2, lwd=2)


lines(test_error_ex, ylim= c(0.1,0.4), col='green', type="b", xlab="subset size", ylab= "MSE from exhaustive model")
points(test_error_ex, col="yellow",cex=1,pch=20)
abline(v = which.min(test_error_ex),y = min(test_error_ex)*100, type = "d", col = "green", lty=2, lwd=2)


lines(all_best_mse, ylim= c(0,0.4), type="b", xlab="subset size", ylab= "MSE of test set", col="brown")
points(all_best_mse, col="yellow",cex=1,pch=20)
#abline(v = which.min(all_best_mse),y = min(all_best_mse)*100, type = "d", col = "brown", lty=2, lwd=2)

legend(0.4,inset=.02, c("MSE from backward subset", "MSE from CV", "MSE from exhaustive model (nbest=1)","MSE from exhaustive model (nbest=100)"), 
      lty= c(1,1), lwd=c(2.0,2.0),col= c( "blue","black","green", "brown"))

# performing exhaustive subset on whole dataset for model comparison
whole_exh <- regsubsets(rating~., data= data_nut_s,
             nbest = 1,       # only 'one' best model for each number of predictors
             nvmax = NULL,    # NULL for no limit on number of variables
             force.in = NULL, force.out = NULL,
             really.big = TRUE,
             method = "exhaustive")
whole_sum <- summary(whole_exh)
names(whole_sum)

#How many variables are needed for the best model fit.
data.frame(
  Adj.R2 = which.max(whole_sum$adjr2),
  CP = which.min(whole_sum$cp),
  BIC = which.min(whole_sum$bic),
  RSS = which.min(whole_sum$rss)
  )

## end ###