diff --git a/DESCRIPTION b/DESCRIPTION index 5c97d85..f05ebaf 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: tswgewrapped Title: Helpful wrappers for 'tswge' and 'vars' packages -Version: 1.8.8 +Version: 1.8.9 Authors@R: c(person("David", "Josephs", email = "josephsd@smu.edu", role = c("aut", "cre")), person("Nikhil", "Gupta", email = "guptan@smu.edu", role = c("aut"))) Description: This package provides several helpful wrappers for the already useful tswge package. In the future, this package intends to move away from the tswge backend, to be faster, with more readable source code. diff --git a/R/ModelBuildMultivariateVAR.R b/R/ModelBuildMultivariateVAR.R index f706d49..046ba66 100644 --- a/R/ModelBuildMultivariateVAR.R +++ b/R/ModelBuildMultivariateVAR.R @@ -16,7 +16,7 @@ ModelBuildMultivariateVAR = R6::R6Class( #' @param data The dataframe containing the time series realizations (data should not contain time index) #' @param var_interest The output variable of interest (dependent variable) #' @param mdl_list A names list of all models (see format below) - #' @param alpha Significance level to use for filtering of variables from the recommendations + #' @param alpha Significance level to use for filtering of variables from the recommendations (Default = 0.05) #' @param verbose How much to print during the model building and other processes (Default = 0) #' @param ... Additional parameers to feed to VARSelect (if applicable) and VAR --> Most notably "exogen" #' @return A new `ModelCompareMultivariateVAR` object. @@ -96,15 +96,29 @@ ModelBuildMultivariateVAR = R6::R6Class( #' (2) The names of the significant variables to use #' (3) Lag value to use for the model get_recommendations = function(){ + + # Check if any seasonality term is significant + season_data = self$summarize_build() %>% + dplyr::group_by(Model, trend_type, season) %>% + dplyr::filter(grepl("^sd[0-9]+$", OriginalVar)) %>% + dplyr::summarise(num_sig_season_factors = n()) %>% + dplyr::ungroup() %>% + dplyr::mutate(sig_season_factors = ifelse(num_sig_season_factors >= 1, TRUE, FALSE)) %>% + dplyr::select(-num_sig_season_factors) + results = self$summarize_build() %>% dplyr::filter(!(OriginalVar %in% c("trend", "const"))) %>% - dplyr::filter(!(grepl("^s[0-9]+$", OriginalVar))) %>% # Remove seasonality factors + dplyr::filter(!(grepl("^sd[0-9]+$", OriginalVar))) %>% # Remove seasonality factors dplyr::group_by(Model, trend_type, season) %>% dplyr::summarise(num_sig_vars = dplyr::n(), lag_to_use = -min(MaxLag, na.rm = TRUE), vars_to_use = paste0(OriginalVar, collapse = ",")) %>% - dplyr::ungroup() - + dplyr::ungroup() %>% + dplyr::left_join(season_data, by = c("Model", "trend_type", "season")) %>% + dplyr::mutate(season_to_use = ifelse(sig_season_factors == TRUE, season, 0)) %>% + tidyr::replace_na(list(season_to_use = 0)) %>% + dplyr::select(-sig_season_factors) + return(results) }, @@ -125,7 +139,7 @@ ModelBuildMultivariateVAR = R6::R6Class( cat("\n\n\n") cat(paste("Model: ", name, "\n")) trend_type = recommendations[[name]][['trend_type']] - season = recommendations[[name]][['season']] + season = recommendations[[name]][['season_to_use']] if (season == 0){ season = NULL } @@ -223,7 +237,8 @@ ModelBuildMultivariateVAR = R6::R6Class( mdl_subset = list() for (name in mdl_subset_names){ - mdl_subset[[name]] = private$models[[name]][['varfit']] + mdl_subset[[name]][['varfit']] = private$models[[name]][['varfit']] + mdl_subset[[name]][['sliding_ase']] = FALSE } return(mdl_subset) diff --git a/R/ModelCompareBase.R b/R/ModelCompareBase.R index 5234819..ab8632c 100644 --- a/R/ModelCompareBase.R +++ b/R/ModelCompareBase.R @@ -93,7 +93,7 @@ ModelCompareBase = R6::R6Class( stop("The model names above already exist in this comparison object. Please provide unique names.") } - mdl_list = private$clean_model_input(mdl_list) + mdl_list = private$clean_model_input(mdl_list, batch_size) if (all(is.na(private$models))){ private$models = mdl_list @@ -162,8 +162,6 @@ ModelCompareBase = R6::R6Class( private$models[[name]][['time_test_start']] = res$time_test_start private$models[[name]][['time_test_end']] = res$time_test_end private$models[[name]][['batch_num']] = res$batch_num - # private$models[[name]][['AICs']] = res$AICs - # private$models[[name]][['BICs']] = res$BICs private$models[[name]][['f']] = res$f private$models[[name]][['ll']] = res$ll private$models[[name]][['ul']] = res$ul @@ -472,6 +470,10 @@ ModelCompareBase = R6::R6Class( private$data = data }, + get_data_subset = function(col_names){ + stop("You are calling the 'get_data_subset' method in the parent class. This should be implemented in the child class.") + }, + set_step_n.ahead = function(step_n.ahead){ private$step_n.ahead = step_n.ahead }, @@ -490,7 +492,7 @@ ModelCompareBase = R6::R6Class( return(private$verbose) }, - clean_model_input = function(mdl_list){ + clean_model_input = function(mdl_list, batch_size){ # If the inputs are missing sliding_ase, make it FALSE # Also add 'metric_has_been_computed' key for (name in names(mdl_list)){ @@ -499,7 +501,6 @@ ModelCompareBase = R6::R6Class( } mdl_list[[name]][['metric_has_been_computed']] = FALSE - } return(mdl_list) diff --git a/R/ModelCompareMultivariateVAR.R b/R/ModelCompareMultivariateVAR.R index 8752e0b..309f54b 100644 --- a/R/ModelCompareMultivariateVAR.R +++ b/R/ModelCompareMultivariateVAR.R @@ -70,19 +70,19 @@ ModelCompareMultivariateVAR = R6::R6Class( #' @description Returns the VAR model Build Summary #' @returns A dataframe containing the following columns #' 'Model': Name of the model - #' 'Selection': The selection criteria used for K value (AIC or BIC) - #' 'Trend': The trend argument used in the VARselect and VAR functions + #' 'Trend': The trend argument used in the VAR functions + #' 'Season' The season argument used in the VAR functions #' 'SlidingASE': Whether Sliding ASE will be used for this model #' 'Init_K': The K value recommended by the VARselect function #' 'Final_K': The adjusted K value to take into account the smaller batch size (only when using sliding_ase) summarize_build = function(){ - results = dplyr::tribble(~Model, ~Selection, ~Trend, ~SlidingASE, ~Init_K, ~Final_K) + results = dplyr::tribble(~Model, ~Trend, ~Season, ~SlidingASE, ~Init_K, ~Final_K) for (name in names(private$get_models())){ results = results %>% dplyr::add_row(Model = name, - Selection = private$models[[name]][['select']], Trend = private$models[[name]][['trend_type']], + Season = ifelse(is.null(private$models[[name]][['season']]), 0, private$models[[name]][['season']]), SlidingASE = private$models[[name]][['sliding_ase']], Init_K = private$models[[name]][['k_initial']], Final_K = private$models[[name]][['k_final']] @@ -102,22 +102,54 @@ ModelCompareMultivariateVAR = R6::R6Class( set_var_interest = function(var_interest){private$var_interest = var_interest}, + get_data_subset = function(col_names){ + return(self$get_data() %>% dplyr::select(col_names)) + }, + get_len_x = function(){ return(nrow(self$get_data())) }, - clean_model_input = function(mdl_list){ + clean_model_input = function(mdl_list, batch_size){ mdl_list = super$clean_model_input(mdl_list) + for (name in names(mdl_list)){ + k = mdl_list[[name]]$varfit$p + + mdl_list[[name]][['varfit_alldata']] = mdl_list[[name]]$varfit + mdl_list[[name]][['vars_to_use']] = colnames(mdl_list[[name]]$varfit$y) + mdl_list[[name]][['k_initial']] = k + mdl_list[[name]][['trend_type']] = mdl_list[[name]]$varfit$type + mdl_list[[name]][['season']] = mdl_list[[name]]$varfit$call$season + + k_final = k + ## If using sliding ASE, make sure that the batch size is large enough to support the number of lags + if (mdl_list[[name]][['sliding_ase']] == TRUE){ + k_final = private$validate_k(k, batch_size, + season = mdl_list[[name]]$varfit$call$season, + col_names = colnames(mdl_list[[name]]$varfit$y)) + } + + mdl_list[[name]][['k_final']] = k_final + + AIC = stats::AIC(mdl_list[[name]]$varfit) + BIC = stats::BIC(mdl_list[[name]]$varfit) + + mdl_list[[name]][['AIC']] = AIC + mdl_list[[name]][['BIC']] = BIC + + } + return(mdl_list) }, get_sliding_ase_results = function(name, step_n.ahead){ - res = sliding_ase_var(data = self$get_data(), + res = sliding_ase_var(data = private$get_data_subset(col_names = private$get_models()[[name]][['vars_to_use']]), var_interest = self$get_var_interest(), k = private$get_models()[[name]][['k_final']], trend_type = private$get_models()[[name]][['trend_type']], + season = private$get_models()[[name]][['season']], n.ahead = self$get_n.ahead(), batch_size = private$get_models()[[name]][['batch_size']], step_n.ahead = step_n.ahead, verbose = private$get_verbose()) @@ -172,96 +204,29 @@ ModelCompareMultivariateVAR = R6::R6Class( }, build_models = function(verbose = 0){ - for (name in names(private$get_models())){ - cat("\n\n\n") - cat(paste("Model: ", name, "\n")) - trend_type = private$get_models()[[name]][['trend_type']] - cat(paste("Trend type: ", trend_type, "\n")) - - varselect = vars::VARselect(self$get_data(), - lag.max = private$get_models()[[name]][['lag.max']], - type = trend_type, - season = NULL, exogen = NULL) - - if (verbose >= 1){ - cat("\nVARselect Object:\n") - print(varselect) - } - - selection = private$extract_correct_varselection(varselect) - - select = tolower(private$get_models()[[name]][['select']]) - if (select == 'aic'){ - k = selection[["AIC(n)"]] - } - else if (select == 'bic'){ - k = selection[["SC(n)"]] - } - else{ - stop("'select' argument must be with 'aic' or 'bic'") - } - cat(paste("Lag K to use for the VAR Model: ", k, "\n")) - - k_final = k - ## If using sliding ASE, make sure that the batch size is large enough to support the number of lags - if (private$get_models()[[name]][['sliding_ase']] == TRUE){ - k_final = private$validate_k(k) - } - - # Fit to Entire Data - # This might be needed in many places to computing it here. - varfit = vars::VAR(self$get_data(), p=k_final, type=trend_type) - - if (verbose >= 1){ - cat(paste0("\n\nPrinting summary of the VAR fit for the variable of interest: ", var_interest, "\n")) - print(summary(varfit$varresult[[var_interest]])) - } - - ## Inplace - private$models[[name]][['varselect_alldata']] = varselect - private$models[[name]][['k_initial']] = k - private$models[[name]][['k_final']] = k_final - private$models[[name]][['varfit_alldata']] = varfit - - } + }, evaluate_xIC = function(){ - for (name in names(private$get_models())){ - - varfit = private$models[[name]][['varfit_alldata']] - - AIC = stats::AIC(varfit) - BIC = stats::BIC(varfit) - - private$models[[name]][['AIC']] = AIC - private$models[[name]][['BIC']] = BIC - } + }, - extract_correct_varselection = function(vselect){ - criteria = vselect$criteria + validate_k = function(k, batch_size, season, col_names){ + # https://stats.stackexchange.com/questions/234975/how-many-endogenous-variables-in-a-var-model-with-120-observations + ## num_vars (in code) = K in the equation in link + ## k (code) = p in the equation in link + t = batch_size - self$get_n.ahead() + num_vars = length(col_names) - if(sum(criteria == -Inf, na.rm = TRUE) > 0){ - warning("VARselect produced -Inf values. These will be removed before making final 'K' selection.") - - criteria[criteria == -Inf] = max(criteria) + 1 + if (is.null(season)){ + season = 1 # So we dont subtract anthign from the numerator } - selection = data.frame(t(Rfast::rowMins(criteria))) - colnames(selection) = rownames(criteria) - - selection = selection %>% - assertr::verify(assertr::has_all_names("AIC(n)", "HQ(n)", "SC(n)", "FPE(n)")) - - return(selection) - }, - - validate_k = function(k){ new_k = k - num_vars = ncol(self$get_data()) - if (k * num_vars >= (self$get_batch_size()-1)){ - new_k = floor((self$get_batch_size()-1)/num_vars) - 1 # Being a little more conservative + + # NOTE: I changed 1 to 2 since we can also have a case with trend and const instead of just constant. + if (k * (num_vars+1) + 2 > t){ + new_k = floor((t-2-(season-1))/(num_vars + 1)) warning("Although the lag value k: ", k, " selected by VARselect will work for your full dataset, is too large for your batch size. Reducing k to allow Batch ASE calculations. New k: ", new_k, " If you do not want to reduce the k value, increase the batch size or make sliding_ase = FALSE for this model in the model list") } return((new_k)) diff --git a/R/ModelCompareUnivariate.R b/R/ModelCompareUnivariate.R index 4d4a2a0..496e28c 100644 --- a/R/ModelCompareUnivariate.R +++ b/R/ModelCompareUnivariate.R @@ -211,7 +211,7 @@ ModelCompareUnivariate = R6::R6Class( return(length(self$get_data())) }, - clean_model_input = function(mdl_list){ + clean_model_input = function(mdl_list, batch_size){ # If the inputs are missing p, d, q, or s values, this will add 0s to make it consistent for (name in names(mdl_list)){ if (is.null(mdl_list[[name]][['phi']])){ diff --git a/R/sliding_ase.R b/R/sliding_ase.R index 4ea2f5b..990b80b 100644 --- a/R/sliding_ase.R +++ b/R/sliding_ase.R @@ -133,10 +133,12 @@ sliding_ase_univariate = function(x, #' @param var_interest The output variable of interest (dependent variable) #' @param k The lag value to use for the VAR model (generally determined by the VARselect function) #' @param trend_type The trend type to use in VARselect and the VAR model. Refer to vars::VARselect and vars::VAR for valid options. +#' @param season The seasonality to use in the VAR model. #' @param batch_size Window Size used #' @param n.ahead last n.ahead data points in each batch will be used for prediction and ASE calculations #' @param step_n.ahead Whether to step each batch by n.ahead values (Default = FALSE) #' @param verbose How much to print during the model building and other processes (Default = 0) +#' @param ... Additional arguments to pass to the VAR model #' @return Named list #' 'ASEs' - ASE values #' 'time_test_start' - Time Index indicating start of test time corresponding to the ASE values @@ -150,9 +152,10 @@ sliding_ase_univariate = function(x, #' 'time.forecasts' - Time Corresponding to each forecast, upper and lower limit values #' @export sliding_ase_var = function(data, var_interest, - k, trend_type = NA, + k, trend_type = NA, season = NULL, n.ahead = NA, batch_size = NA, # Forecasting specific arguments - step_n.ahead = TRUE, verbose = 0 + step_n.ahead = TRUE, verbose = 0, + ... ) { # Sliding CV ... batches are mutually exclusive @@ -198,8 +201,6 @@ sliding_ase_var = function(data, var_interest, time_test_start = numeric(num_batches) time_test_end = numeric(num_batches) batch_num = numeric(num_batches) - # AICs = numeric(num_batches) - # BICs = numeric(num_batches) for (i in 0:(num_batches-1)) { @@ -225,7 +226,7 @@ sliding_ase_var = function(data, var_interest, test_data = data[test_start:test_end, ] # Fit model for the batch - varfit = vars::VAR(train_data, p=k, type=trend_type) + varfit = vars::VAR(train_data, p=k, type=trend_type, season = season, ...) if (verbose >= 2){ cat(paste0("\nBatch: ", i+1, "\n")) @@ -236,7 +237,7 @@ sliding_ase_var = function(data, var_interest, # Forecast for the batch forecasts = stats::predict(varfit, n.ahead=n.ahead) - forecasts = forecasts$fcst[[var_interest]] ## Get the forecasts only for the dependent variable + forecasts = forecasts[['fcst']][[var_interest]] ## Get the forecasts only for the dependent variable ASEs[i+1] = mean((test_data[, var_interest] - forecasts[, 'fcst'])^2) start = start + step_size @@ -246,18 +247,12 @@ sliding_ase_var = function(data, var_interest, forecasts.ul[test_start: test_end] = forecasts[, 'upper'] time.forecasts[test_start: test_end] = seq(test_start, test_end, 1) - # # Batch AIC and BIC - # AICs[i+1] = stats::AIC(varfit) - # BICs[i+1] = stats::BIC(varfit) - } return(list(ASEs = ASEs, time_test_start = time_test_start, time_test_end = time_test_end, batch_num = batch_num, - # AICs = AICs, - # BICs = BICs, f = forecasts.f, ll = forecasts.ll, ul = forecasts.ul, diff --git a/inst/extdata/multivar_VAR_build_final_model_no_season_list.rds b/inst/extdata/multivar_VAR_build_final_model_no_season_list.rds new file mode 100644 index 0000000..6bc2410 Binary files /dev/null and b/inst/extdata/multivar_VAR_build_final_model_no_season_list.rds differ diff --git a/inst/extdata/multivar_VAR_build_final_model_season_list.rds b/inst/extdata/multivar_VAR_build_final_model_season_list.rds new file mode 100644 index 0000000..e9e2447 Binary files /dev/null and b/inst/extdata/multivar_VAR_build_final_model_season_list.rds differ diff --git a/inst/extdata/multivar_VAR_build_no_season_recommendations.csv b/inst/extdata/multivar_VAR_build_no_season_recommendations.csv new file mode 100644 index 0000000..2d937ca --- /dev/null +++ b/inst/extdata/multivar_VAR_build_no_season_recommendations.csv @@ -0,0 +1,7 @@ +Model,trend_type,season,num_sig_vars,lag_to_use,vars_to_use,season_to_use +AIC Both,both,0,2,3,"logGNP,logM1",0 +AIC None,none,0,1,1,logGNP,0 +AIC Trend,trend,0,3,1,"logGNP,rs,rl",0 +BIC Both,both,0,1,1,logGNP,0 +BIC None,none,0,1,1,logGNP,0 +BIC Trend,trend,0,1,1,logGNP,0 diff --git a/inst/extdata/multivar_VAR_summary.csv b/inst/extdata/multivar_VAR_build_no_season_summary.csv similarity index 100% rename from inst/extdata/multivar_VAR_summary.csv rename to inst/extdata/multivar_VAR_build_no_season_summary.csv diff --git a/inst/extdata/multivar_VAR_build_season_recommendations.csv b/inst/extdata/multivar_VAR_build_season_recommendations.csv new file mode 100644 index 0000000..3ee0443 --- /dev/null +++ b/inst/extdata/multivar_VAR_build_season_recommendations.csv @@ -0,0 +1,3 @@ +"Model","trend_type","season","num_sig_vars","lag_to_use","vars_to_use","season_to_use" +"AIC Trend","trend",3,2,3,"logGNP,logM1",0 +"BIC Trend","trend",4,2,1,"logGNP,rl",4 diff --git a/inst/extdata/multivar_VAR_build_season_summary.csv b/inst/extdata/multivar_VAR_build_season_summary.csv new file mode 100644 index 0000000..a7e2ad5 --- /dev/null +++ b/inst/extdata/multivar_VAR_build_season_summary.csv @@ -0,0 +1,7 @@ +"Model","select","trend_type","season","p","SigVar","OriginalVar","Lag","MaxLag" +"AIC Trend","aic","trend",3,10,"logGNP.l1","logGNP",-1,-1 +"AIC Trend","aic","trend",3,10,"logM1.l3","logM1",-3,-3 +"AIC Trend","aic","trend",3,10,"trend","trend",0,0 +"BIC Trend","bic","trend",4,2,"logGNP.l1","logGNP",-1,-1 +"BIC Trend","bic","trend",4,2,"rl.l1","rl",-1,-1 +"BIC Trend","bic","trend",4,2,"sd1","sd1",0,0 diff --git a/inst/extdata/multivar_VAR_compare_summary.csv b/inst/extdata/multivar_VAR_compare_summary.csv new file mode 100644 index 0000000..57fe8af --- /dev/null +++ b/inst/extdata/multivar_VAR_compare_summary.csv @@ -0,0 +1,7 @@ +Model,Trend,Season,SlidingASE,Init_K,Final_K +AIC None,none,0,TRUE,3,3 +AIC Trend,trend,0,TRUE,3,3 +AIC Both,both,0,TRUE,10,6 +BIC None,none,0,TRUE,2,2 +BIC Trend,trend,0,TRUE,2,2 +BIC Both,both,0,TRUE,2,2 diff --git a/inst/extdata/multivar_VAR_final_model_list.rds b/inst/extdata/multivar_VAR_final_model_list.rds deleted file mode 100644 index e42535c..0000000 Binary files a/inst/extdata/multivar_VAR_final_model_list.rds and /dev/null differ diff --git a/inst/extdata/multivar_VAR_recommendations.csv b/inst/extdata/multivar_VAR_recommendations.csv deleted file mode 100644 index 3153657..0000000 --- a/inst/extdata/multivar_VAR_recommendations.csv +++ /dev/null @@ -1,7 +0,0 @@ -"Model","trend_type","season","num_sig_vars","lag_to_use","vars_to_use" -"AIC Both","both",0,2,3,"logGNP,logM1" -"AIC None","none",0,1,1,"logGNP" -"AIC Trend","trend",0,3,1,"logGNP,rs,rl" -"BIC Both","both",0,1,1,"logGNP" -"BIC None","none",0,1,1,"logGNP" -"BIC Trend","trend",0,1,1,"logGNP" diff --git a/man/ModelBuildMultivariateVAR.Rd b/man/ModelBuildMultivariateVAR.Rd index 4676c2b..97236a9 100644 --- a/man/ModelBuildMultivariateVAR.Rd +++ b/man/ModelBuildMultivariateVAR.Rd @@ -61,7 +61,7 @@ Initialize an object to compare several Univatiate Time Series Models \item{\code{mdl_list}}{A names list of all models (see format below)} -\item{\code{alpha}}{Significance level to use for filtering of variables from the recommendations} +\item{\code{alpha}}{Significance level to use for filtering of variables from the recommendations (Default = 0.05)} \item{\code{verbose}}{How much to print during the model building and other processes (Default = 0)} diff --git a/man/ModelCompareMultivariateVAR.Rd b/man/ModelCompareMultivariateVAR.Rd index eca7f30..8f90d8d 100644 --- a/man/ModelCompareMultivariateVAR.Rd +++ b/man/ModelCompareMultivariateVAR.Rd @@ -6,8 +6,8 @@ \value{ A dataframe containing the following columns 'Model': Name of the model - 'Selection': The selection criteria used for K value (AIC or BIC) - 'Trend': The trend argument used in the VARselect and VAR functions + 'Trend': The trend argument used in the VAR functions + 'Season' The season argument used in the VAR functions 'SlidingASE': Whether Sliding ASE will be used for this model 'Init_K': The K value recommended by the VARselect function 'Final_K': The adjusted K value to take into account the smaller batch size (only when using sliding_ase) diff --git a/man/sliding_ase_var.Rd b/man/sliding_ase_var.Rd index 2a766b1..70349a3 100644 --- a/man/sliding_ase_var.Rd +++ b/man/sliding_ase_var.Rd @@ -10,10 +10,12 @@ sliding_ase_var( var_interest, k, trend_type = NA, + season = NULL, n.ahead = NA, batch_size = NA, step_n.ahead = TRUE, - verbose = 0 + verbose = 0, + ... ) } \arguments{ @@ -25,6 +27,8 @@ sliding_ase_var( \item{trend_type}{The trend type to use in VARselect and the VAR model. Refer to vars::VARselect and vars::VAR for valid options.} +\item{season}{The seasonality to use in the VAR model.} + \item{n.ahead}{last n.ahead data points in each batch will be used for prediction and ASE calculations} \item{batch_size}{Window Size used} @@ -32,6 +36,8 @@ sliding_ase_var( \item{step_n.ahead}{Whether to step each batch by n.ahead values (Default = FALSE)} \item{verbose}{How much to print during the model building and other processes (Default = 0)} + +\item{...}{Additional arguments to pass to the VAR model} } \value{ Named list diff --git a/tests/testthat/test-MultivariateBuildVAR.R b/tests/testthat/test-MultivariateBuildVAR.R index 36e6c49..23ef385 100644 --- a/tests/testthat/test-MultivariateBuildVAR.R +++ b/tests/testthat/test-MultivariateBuildVAR.R @@ -1,5 +1,9 @@ -#### Model Build Multivariate Class #### -test_that("ModelBuildMultivariateVAR", { + +# TODO: ... may not work in the initialization. Check. +# TODO: Check if adding models after object initialization works, especially id we have asked for recommendations already + +#### Non Seasonal Model #### +test_that("Non Seasonal Model", { data("USeconomic") data = as.data.frame(USeconomic) @@ -21,20 +25,21 @@ test_that("ModelBuildMultivariateVAR", { summary_build = mdl_build$summarize_build() - #summary_build %>% write.csv(file = "multivar_VAR_summary.csv", row.names = FALSE) recommendations = mdl_build$get_recommendations() + + #summary_build %>% write.csv(file = "multivar_VAR_summary.csv", row.names = FALSE) #recommendations %>% write.csv(file = "multivar_VAR_recommendations.csv", row.names = FALSE) # https://stackoverflow.com/questions/32328802/where-should-i-put-data-for-automated-tests-with-testthat # http://r-pkgs.had.co.nz/data.html#other-data - summary_target_file = system.file("extdata", "multivar_VAR_summary.csv", package = "tswgewrapped", mustWork = TRUE) + summary_target_file = system.file("extdata", "multivar_VAR_build_no_season_summary.csv", package = "tswgewrapped", mustWork = TRUE) summary_target = read.csv(summary_target_file, header = TRUE, stringsAsFactors = FALSE) %>% dplyr::as_tibble() %>% dplyr::mutate_if(is.numeric, as.double) # Converts integer to double to match type good = all.equal(summary_build, summary_target) testthat::expect_equal(good, TRUE) - recommendation_target_file = system.file("extdata", "multivar_VAR_recommendations.csv", package = "tswgewrapped", mustWork = TRUE) + recommendation_target_file = system.file("extdata", "multivar_VAR_build_no_season_recommendations.csv", package = "tswgewrapped", mustWork = TRUE) recommendation_target = read.csv(recommendation_target_file, header = TRUE, stringsAsFactors = FALSE) %>% dplyr::as_tibble() %>% dplyr::mutate_if(is.numeric, as.double) # Converts integer to double to match type @@ -44,36 +49,64 @@ test_that("ModelBuildMultivariateVAR", { mdl_build$build_recommended_models() final_models = mdl_build$get_final_models() - final_model_list_file = system.file("extdata", "multivar_VAR_final_model_list.rds", package = "tswgewrapped", mustWork = TRUE) + # saveRDS(final_models, "multivar_VAR_final_model_list.rds") + + final_model_list_file = system.file("extdata", "multivar_VAR_build_final_model_no_season_list.rds", package = "tswgewrapped", mustWork = TRUE) final_models_target = readRDS(final_model_list_file) good = all.equal(final_models, final_models_target) testthat::expect_equal(good, TRUE) - # mdl_list = mdl_build$get_models() - # - # for (name in names(mdl_list)){ - # print(name) - # } - # - # # Get Variables Used - # colnames(mdl_list[["AIC None"]]$varfit$y) - # #mdl_list[["AIC None"]]$vars_used - # - # # colnames(mdl_list[[name]]$varfit$y) - # # mdl_list[[name]]$vars_used - # - # ## Get p value used - # mdl_list[["AIC Both"]]$p - # mdl_list[["AIC Both"]]$varfit$p - # - # mdl_list[["AIC Trend - R"]]$p - # mdl_list[["AIC Trend - R"]]$varfit$p - # - # mdl_list[["AIC Both - R"]]$p - # mdl_list[["AIC Both - R"]]$varfit$p - # - # ## Get Season Data - # mdl_list[["AIC None"]]$varfit$call$season + +}) + +#### Seasonal Model #### +test_that("Seasonal Model", { + + data("USeconomic") + data = as.data.frame(USeconomic) + colnames(data) = c("logM1", "logGNP", "rs", "rl") + + lag.max = 10 + + models = list("AIC Trend" = list(select = "aic", trend_type = "trend", season = 3, lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", season = 4, lag.max = lag.max)) + + var_interest = 'logGNP' + + mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 1) + + summary_build = mdl_build$summarize_build() + recommendations = mdl_build$get_recommendations() + + # summary_build %>% write.csv(file = "multivar_VAR_build_season_summary.csv", row.names = FALSE) + # recommendations %>% write.csv(file = "multivar_VAR_build_season_recommendations.csv", row.names = FALSE) + + # https://stackoverflow.com/questions/32328802/where-should-i-put-data-for-automated-tests-with-testthat + # http://r-pkgs.had.co.nz/data.html#other-data + summary_target_file = system.file("extdata", "multivar_VAR_build_season_summary.csv", package = "tswgewrapped", mustWork = TRUE) + summary_target = read.csv(summary_target_file, header = TRUE, stringsAsFactors = FALSE) %>% + dplyr::as_tibble() %>% + dplyr::mutate_if(is.numeric, as.double) # Converts integer to double to match type + good = all.equal(summary_build, summary_target) + testthat::expect_equal(good, TRUE) + + recommendation_target_file = system.file("extdata", "multivar_VAR_build_season_recommendations.csv", package = "tswgewrapped", mustWork = TRUE) + recommendation_target = read.csv(recommendation_target_file, header = TRUE, stringsAsFactors = FALSE) %>% + dplyr::as_tibble() %>% + dplyr::mutate_if(is.numeric, as.double) # Converts integer to double to match type + good = all.equal(recommendations %>% dplyr::mutate_if(is.numeric, as.double), recommendation_target) + testthat::expect_equal(good, TRUE) + + mdl_build$build_recommended_models() + final_models = mdl_build$get_final_models() + + # saveRDS(final_models, "multivar_VAR_build_final_model_season_list.rds") + + final_model_list_file = system.file("extdata", "multivar_VAR_build_final_model_season_list.rds", package = "tswgewrapped", mustWork = TRUE) + final_models_target = readRDS(final_model_list_file) + good = all.equal(final_models, final_models_target) + testthat::expect_equal(good, TRUE) }) diff --git a/tests/testthat/test-MultivariateCompareVAR.R b/tests/testthat/test-MultivariateCompareVAR.R index 6fa45c4..214282d 100644 --- a/tests/testthat/test-MultivariateCompareVAR.R +++ b/tests/testthat/test-MultivariateCompareVAR.R @@ -1,57 +1,66 @@ -#### Model Compare Multivariate Class #### -test_that("ModelCompareMultivariateVAR", { +# TODO: Add checks for model that have seasonality +# TODO: Add checks for adding and removing models after object initialization + + +#### Batch ASE = FALSE #### +test_that("Batch ASE = FALSE", { + + data("USeconomic") data = as.data.frame(USeconomic) colnames(data) = c("logM1", "logGNP", "rs", "rl") lag.max = 10 - models = list("VARS AIC No Trend A" = list(select = "aic", trend_type = "none", lag.max = lag.max, sliding_ase = FALSE), - "VARS AIC Trend A" = list(select = "aic", trend_type = "trend", lag.max = lag.max, sliding_ase = FALSE), - "VARS AIC Const + Trend A" = list(select = "aic", trend_type = "both", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC No Trend A" = list(select = "bic", trend_type = "none", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC Trend A" = list(select = "bic", trend_type = "trend", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC Const + Trend A" = list(select = "bic", trend_type = "both", lag.max = lag.max, sliding_ase = FALSE) - # "VARS AIC No Trend B" = list(select = "aic", trend_type = "none", lag.max = lag.max, sliding_ase = TRUE), - # "VARS AIC Trend B" = list(select = "aic", trend_type = "trend", lag.max = lag.max, sliding_ase = TRUE), - # "VARS AIC Const + Trend B" = list(select = "aic", trend_type = "both", lag.max = lag.max, sliding_ase = TRUE), - # "VARS BIC No Trend B" = list(select = "bic", trend_type = "none", lag.max = lag.max, sliding_ase = TRUE), - # "VARS BIC Trend B" = list(select = "bic", trend_type = "trend", lag.max = lag.max, sliding_ase = TRUE), - # "VARS BIC Const + Trend B" = list(select = "bic", trend_type = "both", lag.max = lag.max, sliding_ase = TRUE) - ) + models = list("AIC None" = list(select = "aic", trend_type = "none", lag.max = lag.max), + "AIC Trend" = list(select = "aic", trend_type = "trend", lag.max = lag.max), + "AIC Both" = list(select = "aic", trend_type = "both", lag.max = lag.max), + "BIC None" = list(select = "bic", trend_type = "none", lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", lag.max = lag.max), + "BIC Both" = list(select = "bic", trend_type = "both", lag.max = lag.max) + ) + var_interest = 'logGNP' + + mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 0) + + + recommendations = mdl_build$get_recommendations() + mdl_build$build_recommended_models() + + # Get only user defined models + models = mdl_build$get_final_models(subset = 'u') + + + #### With sliding ASE = FALSE + + for (name in names(models)){ + models[[name]][['sliding_ase']] = FALSE + } n.ahead = 2 - batch_size = 50 - var_interest = 'logGNP' - + #### With n_step.ahead = TRUE (Default) mdl_compare = ModelCompareMultivariateVAR$new(data = data, var_interest = var_interest, - mdl_list = models, n.ahead = n.ahead, batch_size = batch_size) - - - mdl_compare$get_xIC() ## TODO: May change this later + mdl_list = models, n.ahead = n.ahead) + mdl_compare$get_xIC() mdl_compare$plot_simple_forecasts() - - #mdl_compare$plot_batch_forecasts(only_sliding = TRUE) mdl_compare$plot_batch_forecasts(only_sliding = FALSE) - #mdl_compare$plot_batch_ases(only_sliding = TRUE) mdl_compare$plot_batch_ases(only_sliding = FALSE) mdl_compare$plot_histogram_ases() mdl_compare$statistical_compare() - # mdl_compare$plot_multiple_realizations(n.realizations = 4, seed = 100, scales = 'free_y') - ## Compute ASE values from object ASEs = mdl_compare$get_tabular_metrics(ases = TRUE) - ASE_aic_none_a = ASEs %>% dplyr::filter(Model == "VARS AIC No Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) - ASE_aic_trend_a = ASEs %>% dplyr::filter(Model == "VARS AIC Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) - ASE_aic_both_a = ASEs %>% dplyr::filter(Model == "VARS AIC Const + Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) - ASE_bic_none_a = ASEs %>% dplyr::filter(Model == "VARS BIC No Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) - ASE_bic_trend_a = ASEs %>% dplyr::filter(Model == "VARS BIC Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) - ASE_bic_both_a = ASEs %>% dplyr::filter(Model == "VARS BIC Const + Trend A") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_aic_none_a = ASEs %>% dplyr::filter(Model == "AIC None") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_aic_trend_a = ASEs %>% dplyr::filter(Model == "AIC Trend") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_aic_both_a = ASEs %>% dplyr::filter(Model == "AIC Both") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_bic_none_a = ASEs %>% dplyr::filter(Model == "BIC None") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_bic_trend_a = ASEs %>% dplyr::filter(Model == "BIC Trend") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_bic_both_a = ASEs %>% dplyr::filter(Model == "BIC Both") %>% dplyr::select(ASE) %>% purrr::pluck(1) ## Compute Forecasts, Upper Limits and Lower Limits from object forecasts = mdl_compare$get_tabular_metrics(ases = FALSE) @@ -63,29 +72,29 @@ test_that("ModelCompareMultivariateVAR", { MeanUL = mean(ul, na.rm = TRUE) ) - meanForecast_aic_none_a = summary %>% dplyr::filter(Model == "VARS AIC No Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_aic_none_a = summary %>% dplyr::filter(Model == "VARS AIC No Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_aic_none_a = summary %>% dplyr::filter(Model == "VARS AIC No Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_aic_none_a = summary %>% dplyr::filter(Model == "AIC None") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_aic_none_a = summary %>% dplyr::filter(Model == "AIC None") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_aic_none_a = summary %>% dplyr::filter(Model == "AIC None") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) - meanForecast_aic_trend_a = summary %>% dplyr::filter(Model == "VARS AIC Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_aic_trend_a = summary %>% dplyr::filter(Model == "VARS AIC Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_aic_trend_a = summary %>% dplyr::filter(Model == "VARS AIC Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_aic_trend_a = summary %>% dplyr::filter(Model == "AIC Trend") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_aic_trend_a = summary %>% dplyr::filter(Model == "AIC Trend") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_aic_trend_a = summary %>% dplyr::filter(Model == "AIC Trend") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) - meanForecast_aic_both_a = summary %>% dplyr::filter(Model == "VARS AIC Const + Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_aic_both_a = summary %>% dplyr::filter(Model == "VARS AIC Const + Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_aic_both_a = summary %>% dplyr::filter(Model == "VARS AIC Const + Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_aic_both_a = summary %>% dplyr::filter(Model == "AIC Both") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_aic_both_a = summary %>% dplyr::filter(Model == "AIC Both") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_aic_both_a = summary %>% dplyr::filter(Model == "AIC Both") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) - meanForecast_bic_none_a = summary %>% dplyr::filter(Model == "VARS BIC No Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_bic_none_a = summary %>% dplyr::filter(Model == "VARS BIC No Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_bic_none_a = summary %>% dplyr::filter(Model == "VARS BIC No Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_bic_none_a = summary %>% dplyr::filter(Model == "BIC None") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_bic_none_a = summary %>% dplyr::filter(Model == "BIC None") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_bic_none_a = summary %>% dplyr::filter(Model == "BIC None") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) - meanForecast_bic_trend_a = summary %>% dplyr::filter(Model == "VARS BIC Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_bic_trend_a = summary %>% dplyr::filter(Model == "VARS BIC Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_bic_trend_a = summary %>% dplyr::filter(Model == "VARS BIC Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_bic_trend_a = summary %>% dplyr::filter(Model == "BIC Trend") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_bic_trend_a = summary %>% dplyr::filter(Model == "BIC Trend") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_bic_trend_a = summary %>% dplyr::filter(Model == "BIC Trend") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) - meanForecast_bic_both_a = summary %>% dplyr::filter(Model == "VARS BIC Const + Trend A") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) - meanLL_bic_both_a = summary %>% dplyr::filter(Model == "VARS BIC Const + Trend A") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) - meanUL_bic_both_a = summary %>% dplyr::filter(Model == "VARS BIC Const + Trend A") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + meanForecast_bic_both_a = summary %>% dplyr::filter(Model == "BIC Both") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_bic_both_a = summary %>% dplyr::filter(Model == "BIC Both") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_bic_both_a = summary %>% dplyr::filter(Model == "BIC Both") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) @@ -94,51 +103,48 @@ test_that("ModelCompareMultivariateVAR", { train_data = data %>% dplyr::filter(dplyr::row_number() <= (n - n.ahead)) test_data = data %>% dplyr::filter(dplyr::row_number() > (n - n.ahead)) - trend_types = c("none", "trend", "both") - selections = c("AIC(n)", "SC(n)") + trend_types = rep(c("none", "trend", "both"),2) + ks = c(3,3,10,2,2,2) ASEs = c() mean_forecasts = c() mean_lls = c() mean_uls = c() - for (selection in selections){ - for (trend_type in trend_types){ - ## VARselect - vselect = vars::VARselect(data, lag.max = lag.max, type = trend_type, season = NULL, exogen = NULL) - vselect # Gives AIC values for various K values - k = vselect$selection[[selection]] - - ## VAR Model - lsfit = vars::VAR(train_data, p=k, type=trend_type) - - ## Predictions - preds = stats::predict(lsfit, n.ahead=n.ahead) - - results = preds$fcst[[var_interest]] %>% - tibble::as.tibble() %>% - dplyr::mutate(Time = seq(n-n.ahead+1,n,1)) - - ## ASE - data_all = data %>% - dplyr::mutate(Time = dplyr::row_number()) - - ASE_data = data_all %>% - dplyr::full_join(results, by = "Time") %>% - na.omit() - - ASE = mean((ASE_data[[var_interest]] - ASE_data$fcst)^2, na.rm = TRUE) - ASEs = c(ASEs, ASE) + for (i in seq_along(trend_types)){ + + trend_type = trend_types[i] + k = ks[i] + + ## VAR Model + varfit = vars::VAR(train_data, p=k, type=trend_type) + + ## Predictions + preds = stats::predict(varfit, n.ahead=n.ahead) + + results = preds$fcst[[var_interest]] %>% + tibble::as_tibble() %>% + dplyr::mutate(Time = seq(n-n.ahead+1,n,1)) + + ## ASE + data_all = data %>% + dplyr::mutate(Time = dplyr::row_number()) + + ASE_data = data_all %>% + dplyr::full_join(results, by = "Time") %>% + na.omit() + + ASE = mean((ASE_data[[var_interest]] - ASE_data$fcst)^2, na.rm = TRUE) + ASEs = c(ASEs, ASE) + + mean_forecast = mean(ASE_data$fcst, na.rm = TRUE) + mean_ll = mean(ASE_data$lower, na.rm = TRUE) + mean_ul = mean(ASE_data$upper, na.rm = TRUE) - mean_forecast = mean(ASE_data$fcst, na.rm = TRUE) - mean_ll = mean(ASE_data$lower, na.rm = TRUE) - mean_ul = mean(ASE_data$upper, na.rm = TRUE) - - mean_forecasts = c(mean_forecasts, mean_forecast) - mean_lls = c(mean_lls, mean_ll) - mean_uls = c(mean_uls, mean_ul) - - } + mean_forecasts = c(mean_forecasts, mean_forecast) + mean_lls = c(mean_lls, mean_ll) + mean_uls = c(mean_uls, mean_ul) + } ## Compare ASE Values @@ -148,7 +154,7 @@ test_that("ModelCompareMultivariateVAR", { expect_equal(ASE_bic_none_a, ASEs[4]) expect_equal(ASE_bic_trend_a, ASEs[5]) expect_equal(ASE_bic_both_a, ASEs[6]) - + ## Compare Forecast, Upper Limit and Lower Limits expect_equal(meanForecast_aic_none_a, mean_forecasts[1]) expect_equal(meanForecast_aic_trend_a, mean_forecasts[2]) @@ -156,7 +162,7 @@ test_that("ModelCompareMultivariateVAR", { expect_equal(meanForecast_bic_none_a, mean_forecasts[4]) expect_equal(meanForecast_bic_trend_a, mean_forecasts[5]) expect_equal(meanForecast_bic_both_a, mean_forecasts[6]) - + ## Compare Lower Limits expect_equal(meanLL_aic_none_a, mean_lls[1]) expect_equal(meanLL_aic_trend_a, mean_lls[2]) @@ -164,7 +170,7 @@ test_that("ModelCompareMultivariateVAR", { expect_equal(meanLL_bic_none_a, mean_lls[4]) expect_equal(meanLL_bic_trend_a, mean_lls[5]) expect_equal(meanLL_bic_both_a, mean_lls[6]) - + ## Compare Upper Limits expect_equal(meanUL_aic_none_a, mean_uls[1]) expect_equal(meanUL_aic_trend_a, mean_uls[2]) @@ -174,11 +180,109 @@ test_that("ModelCompareMultivariateVAR", { expect_equal(meanUL_bic_both_a, mean_uls[6]) - #### With n_step.ahead = FALSE +}) + + +#### Batch ASE = TRUE #### +test_that("Batch ASE = TRUE", { + + data("USeconomic") + data = as.data.frame(USeconomic) + colnames(data) = c("logM1", "logGNP", "rs", "rl") + + lag.max = 10 + + models = list("AIC None" = list(select = "aic", trend_type = "none", lag.max = lag.max), + "AIC Trend" = list(select = "aic", trend_type = "trend", lag.max = lag.max), + "AIC Both" = list(select = "aic", trend_type = "both", lag.max = lag.max), + "BIC None" = list(select = "bic", trend_type = "none", lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", lag.max = lag.max), + "BIC Both" = list(select = "bic", trend_type = "both", lag.max = lag.max) + ) + var_interest = 'logGNP' + + mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 0) + + + recommendations = mdl_build$get_recommendations() + mdl_build$build_recommended_models() + + # Get only user defined models + models = mdl_build$get_final_models(subset = 'u') + + #### With sliding ASE = TRUE + + for (name in names(models)){ + models[[name]][['sliding_ase']] = TRUE + } + + batch_size = 38 + n.ahead = 2 + + #### With n_step.ahead = TRUE (Default) + mdl_compare = ModelCompareMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, n.ahead = n.ahead, batch_size = batch_size) + + + + + mdl_compare$get_xIC() + + summary_compare = mdl_compare$summarize_build() + #summary_compare %>% write.csv(file = "multivar_VAR_compare_summary.csv", row.names = FALSE) + + summary_target_file = system.file("extdata", "multivar_VAR_compare_summary.csv", package = "tswgewrapped", mustWork = TRUE) + summary_target = read.csv(summary_target_file, header = TRUE, stringsAsFactors = FALSE) %>% + dplyr::as_tibble() %>% + dplyr::mutate_if(is.numeric, as.double) # Converts integer to double to match type + good = all.equal(summary_compare %>% dplyr::mutate_if(is.numeric, as.double), summary_target) + testthat::expect_equal(good, TRUE) + + + mdl_compare$plot_simple_forecasts() + mdl_compare$plot_batch_forecasts(only_sliding = FALSE) + mdl_compare$plot_batch_ases(only_sliding = FALSE) + mdl_compare$plot_histogram_ases() + mdl_compare$statistical_compare() + +}) + +#### "n_step.ahead = FALSE #### +test_that("n_step.ahead = FALSE", { + + data("USeconomic") + data = as.data.frame(USeconomic) + colnames(data) = c("logM1", "logGNP", "rs", "rl") + + lag.max = 10 + + models = list("AIC None" = list(select = "aic", trend_type = "none", lag.max = lag.max), + "AIC Trend" = list(select = "aic", trend_type = "trend", lag.max = lag.max), + "AIC Both" = list(select = "aic", trend_type = "both", lag.max = lag.max), + "BIC None" = list(select = "bic", trend_type = "none", lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", lag.max = lag.max), + "BIC Both" = list(select = "bic", trend_type = "both", lag.max = lag.max) + ) + var_interest = 'logGNP' + + mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 0) + + recommendations = mdl_build$get_recommendations() + mdl_build$build_recommended_models() + + # Get only user defined models + models = mdl_build$get_final_models(subset = 'u') + + batch_size = 38 + n.ahead = 2 + + #### With n_step.ahead = FALSE mdl_compare = ModelCompareMultivariateVAR$new(data = data, var_interest = var_interest, - mdl_list = models, n.ahead = n.ahead, batch_size = batch_size, - step_n.ahead = FALSE) + mdl_list = models, n.ahead = n.ahead, batch_size = batch_size, + step_n.ahead = FALSE) forecasts = mdl_compare$get_tabular_metrics(ases = FALSE) @@ -190,46 +294,129 @@ test_that("ModelCompareMultivariateVAR", { ) - # meanForecast_wg_modelB = round(summary %>% dplyr::filter(Model == "Woodward Gray Model B") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1), 6) - # meanLL_wg_modelB = round(summary %>% dplyr::filter(Model == "Woodward Gray Model B") %>% dplyr::select(MeanLL) %>% purrr::pluck(1), 6) - # meanUL_wg_modelB = round(summary %>% dplyr::filter(Model == "Woodward Gray Model B") %>% dplyr::select(MeanUL) %>% purrr::pluck(1), 6) - # - # meanForecast_pz_modelB = round(summary %>% dplyr::filter(Model == "Parzen Model B") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1), 6) - # meanLL_pz_modelB = round(summary %>% dplyr::filter(Model == "Parzen Model B") %>% dplyr::select(MeanLL) %>% purrr::pluck(1), 6) - # meanUL_pz_modelB = round(summary %>% dplyr::filter(Model == "Parzen Model B") %>% dplyr::select(MeanUL) %>% purrr::pluck(1), 6) - # - # meanForecast_bx_modelB = round(summary %>% dplyr::filter(Model == "Box Model B") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1), 6) - # meanLL_bx_modelB = round(summary %>% dplyr::filter(Model == "Box Model B") %>% dplyr::select(MeanLL) %>% purrr::pluck(1), 6) - # meanUL_bx_modelB = round(summary %>% dplyr::filter(Model == "Box Model B") %>% dplyr::select(MeanUL) %>% purrr::pluck(1), 6) - # - # expect_equal(meanForecast_wg_modelB, 5.75461) - # expect_equal(meanForecast_pz_modelB, 5.697653) - # expect_equal(meanForecast_bx_modelB, 5.764886) - # - # expect_equal(meanLL_wg_modelB, 5.627753) - # expect_equal(meanLL_pz_modelB, 5.573369) - # expect_equal(meanLL_bx_modelB, 5.653335) - # - # expect_equal(meanUL_wg_modelB, 5.881467) - # expect_equal(meanUL_pz_modelB, 5.821938) - # expect_equal(meanUL_bx_modelB, 5.876438) - - - # # Generated White Noise - # wn = tswge::gen.arma.wge(n = 200, sn = 101, plot = FALSE) - # - # # Using p = 1 since I need to pass that to the ModelCompareUnivariate to make it work - # k24 = tswge::ljung.wge(wn, K = 24, p = 1) - # k48 = tswge::ljung.wge(wn, K = 48, p = 1) - # - # models = list("Model 1" = list(phi = 0.5, res = wn, sliding_ase = FALSE)) # Hypothetical Model - # - # mdl_compare = ModelCompareUnivariate$new(data = wn, mdl_list = models, n.ahead = 10) - # table = mdl_compare$evaluate_residuals() - # - # expect_equal(table %>% dplyr::filter(K == 24) %>% dplyr::select(pval) %>% purrr::pluck(1), k24$pval) - # expect_equal(table %>% dplyr::filter(K == 48) %>% dplyr::select(pval) %>% purrr::pluck(1), k48$pval) - # expect_equal(table %>% dplyr::filter(K == 24) %>% dplyr::select(Decision) %>% purrr::pluck(1), "FTR NULL") - # expect_equal(table %>% dplyr::filter(K == 48) %>% dplyr::select(Decision) %>% purrr::pluck(1), "FTR NULL") +}) + + +#### Subset of data #### +test_that("Models that used only subset of data", { + + data("USeconomic") + data = as.data.frame(USeconomic) + colnames(data) = c("logM1", "logGNP", "rs", "rl") + + var_interest = 'logGNP' + subset_cols = c("logM1", "logGNP") + + varfit1 = vars::VAR(data, p = 3, type = "none") + varfit2 = vars::VAR(data %>% dplyr::select(!!subset_cols), p = 3, type = "none") + + # Get only user defined models + models = list("All Data" = list(varfit = varfit1, sliding_ase = FALSE), + "Subset Data" = list(varfit = varfit2, sliding_ase = FALSE) + ) + + #### With sliding ASE = FALSE + n.ahead = 2 + + #### With n_step.ahead = TRUE (Default) + mdl_compare = ModelCompareMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, n.ahead = n.ahead, verbose = 0) + ## Compute ASE values from object + ASEs = mdl_compare$get_tabular_metrics(ases = TRUE) + + ASE_all_data = ASEs %>% dplyr::filter(Model == "All Data") %>% dplyr::select(ASE) %>% purrr::pluck(1) + ASE_subset_data = ASEs %>% dplyr::filter(Model == "Subset Data") %>% dplyr::select(ASE) %>% purrr::pluck(1) + + ## Compute Forecasts, Upper Limits and Lower Limits from object + forecasts = mdl_compare$get_tabular_metrics(ases = FALSE) + + summary = forecasts %>% + dplyr::group_by(Model) %>% + dplyr::summarise(MeanForecast = mean(f, na.rm = TRUE), + MeanLL = mean(ll, na.rm = TRUE), + MeanUL = mean(ul, na.rm = TRUE) + ) + + meanForecast_all_data = summary %>% dplyr::filter(Model == "All Data") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_all_data = summary %>% dplyr::filter(Model == "All Data") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_all_data = summary %>% dplyr::filter(Model == "All Data") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + + meanForecast_subset_data = summary %>% dplyr::filter(Model == "Subset Data") %>% dplyr::select(MeanForecast) %>% purrr::pluck(1) + meanLL_subset_data = summary %>% dplyr::filter(Model == "Subset Data") %>% dplyr::select(MeanLL) %>% purrr::pluck(1) + meanUL_subset_data = summary %>% dplyr::filter(Model == "Subset Data") %>% dplyr::select(MeanUL) %>% purrr::pluck(1) + + + ## Compute values computed manually to how the object calculates it + n = nrow(data) + train_data = data %>% dplyr::filter(dplyr::row_number() <= (n - n.ahead)) + test_data = data %>% dplyr::filter(dplyr::row_number() > (n - n.ahead)) + + train_data_subset = train_data %>% dplyr::select(!!subset_cols) + test_data_subset = test_data %>% dplyr::select(!!subset_cols) + + trend_types = rep(c("none"),2) + ks = c(3,3) + train_datasets = list(train_data, train_data_subset) + test_datasets = list(test_data, test_data_subset) + + ASEs = c() + mean_forecasts = c() + mean_lls = c() + mean_uls = c() + + for (i in seq_along(trend_types)){ + + trend_type = trend_types[i] + k = ks[i] + + ## VAR Model + varfit = vars::VAR(train_datasets[[i]], p=k, type=trend_type) + + ## Predictions + preds = stats::predict(varfit, n.ahead=n.ahead) + + results = preds$fcst[[var_interest]] %>% + tibble::as_tibble() %>% + dplyr::mutate(Time = seq(n-n.ahead+1,n,1)) + + ## ASE + data_all = data %>% + dplyr::mutate(Time = dplyr::row_number()) + + ASE_data = data_all %>% + dplyr::full_join(results, by = "Time") %>% + na.omit() + + ASE = mean((ASE_data[[var_interest]] - ASE_data$fcst)^2, na.rm = TRUE) + ASEs = c(ASEs, ASE) + + mean_forecast = mean(ASE_data$fcst, na.rm = TRUE) + mean_ll = mean(ASE_data$lower, na.rm = TRUE) + mean_ul = mean(ASE_data$upper, na.rm = TRUE) + + mean_forecasts = c(mean_forecasts, mean_forecast) + mean_lls = c(mean_lls, mean_ll) + mean_uls = c(mean_uls, mean_ul) + + } + + ## Compare ASE Values + testthat::expect_equal(ASE_all_data, ASEs[1]) + testthat::expect_equal(ASE_subset_data, ASEs[2]) + + ## Compare Forecast, Upper Limit and Lower Limits + testthat::expect_equal(meanForecast_all_data, mean_forecasts[1]) + testthat::expect_equal(meanForecast_subset_data, mean_forecasts[2]) + + ## Compare Lower Limits + testthat::expect_equal(meanLL_all_data, mean_lls[1]) + testthat::expect_equal(meanLL_subset_data, mean_lls[2]) + + ## Compare Upper Limits + testthat::expect_equal(meanUL_all_data, mean_uls[1]) + testthat::expect_equal(meanUL_subset_data, mean_uls[2]) + }) + + diff --git a/vignettes/ModelBuildMultivariateVAR.Rmd b/vignettes/ModelBuildMultivariateVAR.Rmd new file mode 100644 index 0000000..8af0f26 --- /dev/null +++ b/vignettes/ModelBuildMultivariateVAR.Rmd @@ -0,0 +1,130 @@ +--- +title: "ModelBuildMultivariateVAR" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{ModelBuildMultivariateVAR} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +# Setup Libraries +```{r setup} +library(tswgewrapped) +library(tseries) +``` + +# Load Data +```{r} +data("USeconomic") + +data = as.data.frame(USeconomic) +colnames(data) = c("logM1", "logGNP", "rs", "rl") +``` + + +# Basic Analysis + +## Setup Model Configurations + +```{r} +lag.max = 10 + +models = list("AIC None" = list(select = "aic", trend_type = "none", lag.max = lag.max), + "AIC Trend" = list(select = "aic", trend_type = "trend", lag.max = lag.max), + "AIC Both" = list(select = "aic", trend_type = "both", lag.max = lag.max), + "BIC None" = list(select = "bic", trend_type = "none", lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", lag.max = lag.max), + "BIC Both" = list(select = "bic", trend_type = "both", lag.max = lag.max)) + +var_interest = 'logGNP' +``` + +## Build Models +```{r} +mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 1) +``` + +## Summarize the initial build +```{r} +mdl_build$summarize_build() +``` + +## Get recommendations to prevent overfitting +```{r} +mdl_build$get_recommendations() +``` + + + +## Build the recommended models +```{r} +mdl_build$build_recommended_models() +``` + +## Get the final models +```{r} +# Get only user defined models (subset = 'u') +# Other options are ony recommended models (subset = 'r') or all models (subset = 'a') +models = mdl_build$get_final_models(subset = 'u') +print(names(models)) +``` + + +# Advanced Options + +## Adding Seasonality + +```{r} +lag.max = 10 + +models = list("AIC Trend" = list(select = "aic", trend_type = "trend", season = 3, lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", season = 4, lag.max = lag.max) + ) + +var_interest = 'logGNP' +``` + +### Build Models +```{r} +mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 1) +``` + +### Summarize the initial build +```{r} +mdl_build$summarize_build() +``` + +### Get recommendations to prevent overfitting +```{r} +mdl_build$get_recommendations() +``` + +## Build the recommended models +```{r} +mdl_build$build_recommended_models() +``` + +## Getting Final Models + +```{r} +# Get only recommended model +models = mdl_build$get_final_models(subset = 'r') +print(names(models)) +``` + + +```{r} +# Get all models +models = mdl_build$get_final_models(subset = 'a') +print(names(models)) +``` + diff --git a/vignettes/ModelCompareMultivariateVAR.Rmd b/vignettes/ModelCompareMultivariateVAR.Rmd index 0009b82..5b2958a 100644 --- a/vignettes/ModelCompareMultivariateVAR.Rmd +++ b/vignettes/ModelCompareMultivariateVAR.Rmd @@ -28,27 +28,58 @@ data = as.data.frame(USeconomic) colnames(data) = c("logM1", "logGNP", "rs", "rl") ``` -# Setup Models to be compared -```{r} + +# Get Model Recommendations + +```{r} lag.max = 10 -n.ahead = 4 -batch_size = 40 + +models = list("AIC None" = list(select = "aic", trend_type = "none", lag.max = lag.max), + "AIC Trend" = list(select = "aic", trend_type = "trend", lag.max = lag.max), + "AIC Both" = list(select = "aic", trend_type = "both", lag.max = lag.max), + "BIC None" = list(select = "bic", trend_type = "none", lag.max = lag.max), + "BIC Trend" = list(select = "bic", trend_type = "trend", lag.max = lag.max), + "BIC Both" = list(select = "bic", trend_type = "both", lag.max = lag.max)) + var_interest = 'logGNP' +``` + +```{r} +mdl_build = ModelBuildMultivariateVAR$new(data = data, var_interest = var_interest, + mdl_list = models, verbose = 1) +``` + +```{r} +mdl_build$get_recommendations() +``` + +```{r} +mdl_build$build_recommended_models() +``` + +```{r} +# Get only user defined models +# Other options are ony recommended models (subset = 'r') or all models (subset = 'a') +models = mdl_build$get_final_models(subset = 'u') +``` -models = list("VARS AIC No Trend A" = list(select = "aic", trend_type = "none", lag.max = lag.max, sliding_ase = FALSE), - "VARS AIC Trend A" = list(select = "aic", trend_type = "trend", lag.max = lag.max, sliding_ase = FALSE), - "VARS AIC Const + Trend A" = list(select = "aic", trend_type = "both", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC No Trend A" = list(select = "bic", trend_type = "none", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC Trend A" = list(select = "bic", trend_type = "trend", lag.max = lag.max, sliding_ase = FALSE), - "VARS BIC Const + Trend A" = list(select = "bic", trend_type = "both", lag.max = lag.max, sliding_ase = FALSE) - ) +# Setup Models to be compared +```{r} +#### With sliding ASE = TRUE +for (name in names(models)){ + models[[name]][['sliding_ase']] = TRUE +} + +batch_size = 38 +n.ahead = 2 ``` -# Initialize the ModelCompareUnivariate object +# Initialize the ModelCompareMultivariateVAR object ```{r} #### With n_step.ahead = TRUE (Default) mdl_compare = ModelCompareMultivariateVAR$new(data = data, var_interest = var_interest, - mdl_list = models, n.ahead = n.ahead, batch_size = batch_size) + mdl_list = models, n.ahead = n.ahead, batch_size = batch_size, verbose = 1) + ``` # Compare the models