diff --git a/.Rhistory b/.Rhistory index d7922db..8f09963 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,92 +1,3 @@ -abline(a = 0, b = 1) -plot(x = coverage[[3]], y = alpha_vec, pch = 20, ylim = c(0,1), xlim = c(0,1)) -abline(a = 0, b = 1) -library(rfinterval) -?rfinterval -library(devtools) -document() -setwd("~/piRF") -document() -setwd("..") -install("piRF") -library(piRF) -library(rcdk) -library(piRF) -#physical checmistry MoleculeNet datasets -lipo <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/lipophilicity/Lipophilicity.csv") -esol <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/ESOL/esol.csv") -freesolv <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/FreeSolv/freesolv.csv") -#combine data -all_data <- list(lipo, freesolv, esol) -names(all_data[[1]])[2] <- "resp" -names(all_data[[3]])[9] <- "resp" -names(all_data[[2]])[4] <- "resp" -#type <- c("lower", "lower", "upper") -type <- c("two-sided", "two-sided", "two-sided") -res <- cov_color <- test <- list() -#par(mfrow = c(3,1)) -for(i in 1:length(all_data)){ -bit_mat <- convert_smiles(all_data[[i]]$smiles) -#data frame form -data <- cbind(all_data[[i]]$resp, bit_mat) -names(data)[1] <- "resp" -set.seed(2020) -n <- nrow(data) -ratio <- .975 -samp <- sample(1:n, ratio*n) -train <- data[samp,] -test[[i]] <- data[-samp,] -res[[i]] <- rfint(resp~., train_data = train, test_data = test[[i]], concise = FALSE, interval_type = type[i]) -cov_color[[i]] <- (test[[i]]$resp <= res[[i]]$int[[1]][,2])*(test[[i]]$resp>= res[[i]]$int[[1]][,1]) -} -for(i in 1:length(all_data)){ -#changing to color -range_pred <- range(res[[i]]$int) -range_true <- range(test[[i]]$resp) -lim <- .5 -cov_color[[i]][cov_color[[i]] == 1] <- "black" -cov_color[[i]][cov_color[[i]] == 0] <- "red" -p_color <- rep("black", times = nrow(test[[i]])) -p_color[res[[i]]$int[[1]][,1] > lim] <- "skyblue" -#segments -#plotting intervals vs true -plot(x = res[[i]]$preds[[1]], y = test[[i]]$resp, pch = 20, -ylab = "true", xlab = "predicted", ylim = range_true, xlim = range_pred) -abline(a = 0, b = 1) -segments(x0 = res[[i]]$int[[1]][,1], x1 = res[[i]]$int[[1]][,2], -y1 = test[[i]]$resp, y0 = test[[i]]$resp, -col = cov_color[[i]], lwd = .5) -} -#points(x = res$preds[[1]], y = test$lipo, pch = 20, col = p_color) -#one sided for freesolv data -alpha_vec <- seq(.01,.99,length.out = 33) -int <- rep(0, times = 2*length(alpha_vec)) -dim(int) <- c(length(alpha_vec), 2) -track <- 1 -k <- 5 -for(alpha in alpha_vec){ -test2 <- data[k,] -train2 <- data[-k,] -int[track,] <- unlist(rfint(resp~., train_data = train2, test_data = test2, -interval_type = "upper", alpha = alpha)$int$Zhang) -track <- track + 1 -} -int[,1] <- -100 -plot(x = int[,2], y = 1-alpha_vec, pch = 20, -ylab = expression((1-alpha)), xlab = "upper one-sided interval", ylim = c(0,1), xlim = c(-3,3)) -segments(x0 = int[,1], x1 = int[,2], -y1 = 1-alpha_vec, y0 = 1-alpha_vec, -lwd = .5) -abline(v = -.5, lty = 2, col = "red") -#checking calibration of probabilities... -#insert code here... -#one sided for freesolv data -alpha_vec <- seq(.01,.48,length.out = 33) -k <- 1:100 -method_vec <- c("Zhang", "quantile", "Romano") -int <- vector(mode = "list", length = length(method_vec)) -for(m in 1:length(int)){ -int[[m]] <- matrix(0, nrow = length(k), ncol = length(alpha_vec)) } track <- 1 for(alpha in alpha_vec){ @@ -510,3 +421,92 @@ abline(a = 0, b = 1) segments(x0 = res$int[[i]][,1], x1 = res$int[[i]][,2], y1 = test$pressure, y0 = test$pressure, lwd = 1, col = col) } +par.get() +o <- par() +par(o) +opar <- par() +opar +par(opar) +opar <- par()$mfrow +par(mfrow = opar) +opar <- par(mfrow = c(1,1)) +opar +par(opar) +opar <- par(mfrow = c(12,2)) +opar +par(opar) +par() +par()$mfrow +opar <- par(mfrow = c(12,2)) +par()$mfrow +library(devtools) +check() +opar <- par(mfrow = c(2,17)) +par +par()$mfrow +par(opar) +par()$mfrow +par(mfrow = c(1,1)) +opar <- par(mfrow = c(2,17)) +par()$mfrow +par(opar) +par()$mfrow +release() +shiny::runApp('~/Trivia') +runApp('~/Trivia') +runApp('~/Trivia') +runApp('~/Trivia') +runApp('~/Trivia') +library(rsconnect) +deployApp() +getwd() +setwd("~/") +getwd() +setwd("~/Trivia") +getwd() +ls +ls(0) +getwd() +deployApp() +runApp() +deployApp() +key <- "1oDgk4uSTEJgNQu9HCkrmIltZOk_Bi1eQFdceH2qmU8Y" +#trivia_sheet <- read_sheet(key) +fieldNames <- c("team_name", +"round", +"q1", +"q2", +"q3", +"q4", +"q5", +"q6", +"q7", +"q8", +"q9", +"q10") +runApp() +logical(1) +logical(0) +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +runApp() +deployApp() +library(devtools) +document() +document() +setwd("..") +install("piRF") +setwd("~/piRF") +check() +document() +?rfint +document() +setwd("..") +install("piRF") diff --git a/CRAN-RELEASE b/CRAN-RELEASE index 0a4451d..c607396 100644 --- a/CRAN-RELEASE +++ b/CRAN-RELEASE @@ -1,2 +1,2 @@ -This package was submitted to CRAN on 2020-04-28. -Once it is accepted, delete this file and tag the release (commit a409810de7). +This package was submitted to CRAN on 2020-05-02. +Once it is accepted, delete this file and tag the release (commit 213beee10d). diff --git a/DESCRIPTION b/DESCRIPTION index da0d9cf..0e39617 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,7 +26,6 @@ Description: Implements multiple state-of-the-art prediction interval methodolog original random forest methodology and novel prediction interval methodologies. All of these methodologies can be utilized using solely this package, rather than a collection of separate packages. Currently, only regression trees are supported. Also capable of handling high dimensional data. - Breiman, Leo (2001) . Roy, Marie-Helene and Larocque, Denis (2019) . Ghosal, Indrayudh and Hooker, Giles (2018) . Zhu, Lin and Lu, Jiaxin and Chen, Yihong (2019) . @@ -34,7 +33,6 @@ Description: Implements multiple state-of-the-art prediction interval methodolog Meinshausen, Nicolai (2006) . Romano, Yaniv and Patterson, Evan and Candes, Emmanuel (2019) . Tung, Nguyen Thanh and Huang, Joshua Zhexue and Nguyen, Thuy Thi and Khan, Imran (2014) . - Lopez, Roberto and Balsa-Canto, E. and Onate, E. (2008) . License: GPL-3 Encoding: UTF-8 Depends: diff --git a/R/Ghosal_Hooker_2018.R b/R/Ghosal_Hooker_2018.R index dade536..a7fd423 100644 --- a/R/Ghosal_Hooker_2018.R +++ b/R/Ghosal_Hooker_2018.R @@ -12,15 +12,10 @@ ## Email: cjohnsto@iastate.edu ## ## --------------------------- -## -## Notes: -## multiple boosts functional; testing needs to occur; seems to be an issue with variance estimates -## variant 2 implemented -## -------------------------- -#' implements RF prediction interval method in Ghosal, Hooker 2018 +#' Implements RF prediction interval method in Ghosal, Hooker 2018. Helper function. #' -#' This function implements variant one of the prediction interval methods in Ghosal, Hooker 2018. +#' This function implements variant one and two of the prediction interval methods in Ghosal, Hooker 2018. Used inside rfint(). #' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables. #' @param train_data Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements. #' @param pred_data Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data. @@ -34,14 +29,9 @@ #' @param variant Choose which variant to use. Currently variant 2 not implemented. #' @param num_stages Number of boosting stages. Functional for >= 2; variance estimates need adjustment for variant 2. #' @param num_threads The number of threads to use in parallel. Default is the current number of cores. +#' @param interval_type Type of prediction interval to generate. +#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}. #' @keywords internal -#' @examples -#' GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = 500, -#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, -#' intervals = FALSE, alpha = NULL, forest_type = "RF", -#' replace = TRUE, prop = 1, variant = 1, -#' num_threads = num_threads) -#' @noRd GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = FALSE, alpha = NULL, prop = NULL, variant = 1, @@ -76,17 +66,10 @@ GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, n interval_type = interval_type) } -#' generate stage 1 RF for Ghosal, Hooker RF implementation +#' Generates stage 1 RF for Ghosal, Hooker RF implementation. Helper function. #' -#' This function is primarily meant to be used within the GhosalBoostRF function. All parameters are same as in GhosalBoostRF(). +#' This function is primarily meant to be used within GhosalBoostRF(). #' @keywords internal -#' @examples -#' genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees, -#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, -#' intervals = TRUE, -#' alpha = alpha, forest_type = "RF", importance = "none" , weights = NULL, -#' replace = replace, prop = prop, inbag = NULL, num_threads = num_threads) -#' @noRd genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, @@ -103,12 +86,10 @@ genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_t } -#' generate stage 2 (and more) RF for Ghosal, Hooker RF implementation +#' Generates stage 2 (and more) RF for Ghosal, Hooker RF implementation. Helper function. #' -#' This function is primarily meant to be used within the GhosalBoostRF() function. All parameters are same as in GhosalBoostRF(). +#' Used within GhosalBoostRF(). #' @keywords internal -#' @noRd -#boosting function boostStage <- function(rf, formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, alpha = alpha, weights = NULL, num_stages = 2, @@ -187,24 +168,11 @@ boostStage <- function(rf, formula = NULL, train_data = NULL, pred_data = NULL, inbag = rf$inbag.counts)) } -#' generate prediction intervals for Ghosal, Hooker 2018 implementation. +#' Generate prediction intervals for Ghosal, Hooker 2018 implementation. Helper function. #' -#' This function is primarily meant to be used within the GhosalBoostRF() function. All parameters are same as in GhosalBoostRF(). -#' @param love Do you love cats? Defaults to TRUE. +#' This function is primarily meant to be used within GhosalBoostRF(). #' @keywords internal -#' @examples -#' GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threads = num_threads) -#' @noRd -#get variance estimate GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threads = num_threads, interval_type = interval_type){ - #add variance estimate procedure for variant 2; requires estimates, and inbag for each stage... - - #one sided intervals - #if(interval_type == "two-sided"){ - # alpha <- alpha - #} else { - # alpha <- alpha*2 - #} #one sided intervals if(interval_type == "two-sided"){ @@ -218,8 +186,6 @@ GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threa alpha2 <- 1 } - - #includes original rf num_stages <- length(boostRF$boostrf) @@ -232,11 +198,10 @@ GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threa cov_est <- rep(0, times = pred_n) #needs to get predictions from boostRF - #maybe call this something different tree_preds <- boostRF$tree_preds num_trees <- ncol(tree_preds) - #test this; dont know which one is correct... + #further testing needed in_bag <- unlist(boostRF$inbag) dim(in_bag) <- c(dim(train_data)[1], num_trees) in_bag <- in_bag >= 1 diff --git a/R/HDI_quantregforest.R b/R/HDI_quantregforest.R index 24fe021..c34c77c 100644 --- a/R/HDI_quantregforest.R +++ b/R/HDI_quantregforest.R @@ -5,7 +5,7 @@ # Date First Created: 2019-09-13 # Reference: Zhu, Lin, Jiaxin Lu, and Yihong Chen. "HDI-Forest: Highest Density Interval Regression Forest." arXiv preprint arXiv:1905.10101 (2019). -#' implements HDI RF prediction interval method in ... +#' Implements HDI RF prediction interval method in Zhu 2019. Helper function. #' #' This function implements an HDI RF prediction interval method. #' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables. @@ -19,11 +19,6 @@ #' @param replace Sample with replacement, or not. Utilized for the two different variants outlined in Ghosal, Hooker 2018. Currently variant 2 not implemented. #' @param num_threads The number of threads to use in parallel. Default is the current number of cores. #' @keywords internal -#' @examples -#' HDI_quantregforest <- function(formula = NULL, train_data = NULL, test_data = NULL, alpha = NULL, -#' num_tree = NULL, mtry = NULL, min_node_size = NULL, max_depth = NULL, replace = TRUE, verbose = FALSE, -#' num_threads = NULL) -#' @noRd HDI_quantregforest <- function(formula = NULL, train_data = NULL, test_data = NULL, diff --git a/R/Romano_Patterson_Candes_2018.R b/R/Romano_Patterson_Candes_2018.R index 92c1fba..ccf8dba 100644 --- a/R/Romano_Patterson_Candes_2018.R +++ b/R/Romano_Patterson_Candes_2018.R @@ -18,10 +18,9 @@ ## ## -------------------------- -#' implements RF prediction interval using split conformal prediction as outlined in Romano, Patterson, Candes 2018. +#' implements RF prediction interval using split conformal prediction as outlined in Romano, Patterson, Candes 2018. Helper function. #' -#' This function implements split conformal prediction intervals for RFs. -#' @param love Do you love cats? Defaults to TRUE. +#' This function implements split conformal prediction intervals for RFs. Currently used in rfint(). #' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables. #' @param train_data Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements. #' @param pred_data Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data. @@ -32,17 +31,11 @@ #' @param intervals Generate prediction intervals or not. #' @param alpha Significance level for prediction intervals. #' @param forest_type Determines what type of forest: regression forest vs. quantile regression forest. *Should not be an option... -#' @param replace Sample with replacement, or not. Utilized for the two different variants outlined in Ghosal, Hooker 2018. Currently variant 2 not implemented. -#' @param prop Proportion of training data to sample for each tree. Currently variant 2 not implemented. -#' @param variant Choose which variant to use. Currently variant 2 not implemented. #' @param num_threads The number of threads to use in parallel. Default is the current number of cores. +#' @param interval_type Type of prediction interval to generate. +#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}. #' @keywords internal #' @import stats -#' @examples -#' CQRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, -#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, -#' intervals = TRUE, alpha = NULL, forest_type = "RF", num_threads = NULL) -#' @noRd CQRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, alpha = NULL, forest_type = "RF", num_threads = NULL, diff --git a/R/Roy_Larocque_2019.R b/R/Roy_Larocque_2019.R index d48e062..39dc462 100644 --- a/R/Roy_Larocque_2019.R +++ b/R/Roy_Larocque_2019.R @@ -20,8 +20,9 @@ ## ## -------------------------- -#' implements RF prediction interval method in Roy, Larocque 2019. -#' Currently implemented is the quantile method with BOP intervals. +#' implements RF prediction interval method in Roy, Larocque 2019. Helper function. +#' +#' Currently implemented is the quantile method with BOP intervals. Used inside rfint(). #' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables. #' @param train_data Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements. #' @param pred_data Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data. @@ -34,12 +35,9 @@ #' @param calibrate calibrate prediction intervals based on out-of-bag performance. Adjusts alpha to get nominal coverage. #' @param alpha Significance level for prediction intervals. #' @param num_threads The number of threads to use in parallel. Default is the current number of cores. +#' @param interval_type Type of prediction interval to generate. +#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}. #' @keywords internal -#' @examples -#' RoyRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, -#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, -#' intervals = TRUE, interval_method = "quantile", calibrate = FALSE, alpha = NULL, num_threads = num_threads) -#' @noRd RoyRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, interval_method = "quantile", calibrate = FALSE, alpha = NULL, num_threads = NULL, @@ -95,12 +93,10 @@ RoyRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees return(list(preds = rf_preds, pred_intervals = rf$int, alpha = alpha)) } -#' generate BOP sets from Roy, Larocque 2019 +#' Generate BOP sets from Roy, Larocque 2019 #' #' This function is primarily meant to be used within the RoyRF() function. All parameters are same as in RoyRF(). #' @keywords internal -#' @noRd -#generates the BOP values for each prediction value genBOP <- function(rf, inbag = rf$inbag.counts, alpha = alpha, pred_data, train_data, num_threads = num_threads, calibrate = calibrate){ @@ -189,15 +185,10 @@ genBOP <- function(rf, inbag = rf$inbag.counts, alpha = alpha, return(list(BOP = BOP, oobBOP = oobBOP, dep = dep)) } -#' generates BOP quantile prediction intervals from Roy, Larocque 2019. +#' Generates BOP quantile prediction intervals from Roy, Larocque 2019. #' #' This function is primarily meant to be used within the RoyRF() function. -#' @param BOP BOP object generated from genBOP() function. #' @keywords internal -#' @examples -#' genqInt <- function(BOP, alpha = alpha) -#' @noRd -#quantile prediction using BOP genqInt <- function(BOP, alpha = alpha, interval_type = interval_type){ #one sided intervals @@ -230,10 +221,7 @@ genqInt <- function(BOP, alpha = alpha, interval_type = interval_type){ #' generates BOP HDI prediction intervals from Roy, Larocque 2019 #' #' This function is primarily meant to be used within the RoyRF() function. Could ptentially result in non-contiguous intervals. -#' @param BOP BOP object generated from genBOP() function. #' @keywords internal -#' @noRd -#HDI intervals using density estimation of BOP; outputs a list due to potential for HDI to be non-contiguous genHDInt <- function(BOP, alpha = alpha){ #getting hdr function from hdrcde package @@ -253,10 +241,7 @@ genHDInt <- function(BOP, alpha = alpha){ #' generates BOP contiguous HDI prediction intervals from Roy, Larocque 2019 #' #' This function is primarily meant to be used within the RoyRF() function. -#' @param BOP BOP object generated from genBOP() function. #' @keywords internal -#' @noRd -#connects the noninterval HDI genCHDInt <- function(BOP, alpha = alpha){ #prediction intervals based on BOP HDI; connects HDI; uses density estimation... hdi <- genHDInt(BOP, alpha = alpha) diff --git a/R/Tung_Huang_Nyugen_Khan_2014.R b/R/Tung_Huang_Nyugen_Khan_2014.R index 4ee4f49..f8bcb33 100644 --- a/R/Tung_Huang_Nyugen_Khan_2014.R +++ b/R/Tung_Huang_Nyugen_Khan_2014.R @@ -20,7 +20,7 @@ ## make parallel; optimize when parallel is used vs not, based on R value... ## -------------------------- -#' implements RF prediction interval method in Tung, Huang, Nyugen, Khan 2014. +#' Implements RF prediction interval method in Tung, Huang, Nyugen, Khan 2014. #' #' This function implements the feature bias and prediction bias methods outlined in Tung 2014. #' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables. @@ -37,15 +37,9 @@ #' @param R number of RFs generated in feature bias stage of Tung 2014 prediction interval. Defualt is 10. #' @param alpha Significance level for prediction intervals. #' @param num_threads The number of threads to use in parallel. Default is the current number of cores. +#' @param interval_type Type of prediction interval to generate. +#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}. #' @keywords internal -#' @examples -#' TungUbRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, -#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, -#' intervals = TRUE, feature_num_trees = NULL, -#' alpha = NULL, forest_type = "QRF", featureBias = TRUE, predictionBias = TRUE, R = NULL, -#' num_threads = NULL) -#' @noRd -#bias reduction; choice for feature and/or prediction bias correction TungUbRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, feature_num_trees = NULL, @@ -88,12 +82,10 @@ TungUbRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_tr return(list(preds = rf$preds[,2], pred_intervals = rf$preds[,c(1,3)], weights = featureWeights)) } -#' generate quantile RF +#' Generate quantile RF #' -#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRf(). +#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). #' @keywords internal -#' @noRd -#changes made to genRF; add to previous versions to maintain one function? genRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, @@ -122,12 +114,10 @@ genRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees } -#' generate weights for RF through feature bias reduction method outlined in Tung 2014. +#' Generate weights for RF through feature bias reduction method outlined in Tung 2014. #' -#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRf(). +#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). #' @keywords internal -#' @noRd -#call genRF in this function after sampling training data genWeights <- function(formula = NULL, train_data = NULL, pred_data = NULL, feature_num_trees = feature_num_trees, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, alpha = alpha, forest_type = "RF", importance = "permutation", @@ -176,26 +166,16 @@ genWeights <- function(formula = NULL, train_data = NULL, pred_data = NULL, feat return(weights) } -#' performs prediction debiasing from Tung 2014 +#' Performs prediction debiasing from Tung 2014 #' -#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRf(). +#' This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). #' @keywords internal -#' @noRd -#prediction bias correction; two stage random forest; takes first stage rf object as input predictionUbRF <- function(rf, formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL, min_node_size = NULL, m_try = NULL, keep_inbag = TRUE, intervals = TRUE, alpha = alpha, forest_type = "QRF", weights = NULL, num_threads = num_threads, interval_type = NULL){ - - #one sided intervals - #if(interval_type == "two-sided"){ - # alpha <- alpha - #} else { - # alpha <- alpha*2 - #} - #one sided intervals if(interval_type == "two-sided"){ alpha1 <- alpha/2 @@ -219,7 +199,6 @@ predictionUbRF <- function(rf, formula = NULL, train_data = NULL, pred_data = NU #get dependent variable dep <- names(train_data)[1] - #print(dep) #keeping inbag by default; get oob for each tree #getting oob index for each training data point @@ -263,8 +242,6 @@ predictionUbRF <- function(rf, formula = NULL, train_data = NULL, pred_data = NU #bis corrected quantile predictions bias_correct_preds <- stage1_preds$predictions - mat_pred_bias - #print(bias_correct_preds) - return(list(stage1rf = rf, stage2rf = rf2, bias = bias, preds = bias_correct_preds)) } diff --git a/R/calibrate.R b/R/calibrate.R index f41f34e..c899580 100644 --- a/R/calibrate.R +++ b/R/calibrate.R @@ -27,14 +27,12 @@ #' @param alpha nominal significance level. Defaults to 0.01. #' @param response_data response data of class data.frame. Must have names() attribute. #' @param tolerance tolerance allowed around nominal alpha. Default is 0.25. -#' @param step_ratio ratio absolute difference between empirical oob coverage and nominal coverage to adjust when calibrating. Defaults to 0.618. +#' @param step_percent ratio absolute difference between empirical oob coverage and nominal coverage to adjust when calibrating. Defaults to 0.618. #' @param undercoverage Allow undercoverage. Defaults to TRUE. Not currently implemented. #' @param method Method to calibrate prediction intervals with. Defaults to "quantile"). Current only "quantile" implemented. #' @param max_iter Maximum number of iterations. Defaults to 10. #' @keywords random forest, calibration, internal -#' @examples #' calibrate <- function(oob, alpha = alpha, response_data, dep, tolerance = .025) -#' @noRd calibrate <- function(oob, alpha = .1, response_data, tolerance = .025, step_percent = .618, undercoverage = FALSE, method = "quantile", max_iter = 10) { diff --git a/R/combined.R b/R/combined.R index d7b422f..d923c79 100644 --- a/R/combined.R +++ b/R/combined.R @@ -117,7 +117,7 @@ #' segments(x0 = res$int[[i]][,1], x1 = res$int[[i]][,2], #' y1 = test$pressure, y0 = test$pressure, lwd = 1, col = col) #' } -#' par(par) +#' par(opar) #' } #' @references #' \insertRef{breiman2001random}{piRF} diff --git a/R/formula.R b/R/formula.R index 1f41557..6ff887d 100644 --- a/R/formula.R +++ b/R/formula.R @@ -10,7 +10,6 @@ #' @param data Training data of class \code{data.frame}. #' @param env The environment in which the left hand side of \code{formula} is evaluated. #' @return Dataset including selected columns and interactions. -#' @noRd parse.formula <- function(formula, data, env = parent.frame()) { f <- as.formula(formula) t <- terms(f, data = data) diff --git a/cran-comments.md b/cran-comments.md index 20a6e10..ca7ece3 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -16,3 +16,8 @@ Additional license not added because their code holds the same license (GPL-3). 05-02-2020 -Added C Johnstone, and H Zhang as cph in DESCRIPTION -Correctly reverted par() setting back to original after example + +05-08-2020 +-Corrected par() mistake; reverted par() setting back to original after example +-Reduced references in documentation to seven; all methods explicitly used in package +-Added documentation for each function in package diff --git a/man/CQRF.Rd b/man/CQRF.Rd new file mode 100644 index 0000000..bdf30ba --- /dev/null +++ b/man/CQRF.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Romano_Patterson_Candes_2018.R +\name{CQRF} +\alias{CQRF} +\title{implements RF prediction interval using split conformal prediction as outlined in Romano, Patterson, Candes 2018. Helper function.} +\usage{ +CQRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = NULL, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = NULL, + forest_type = "RF", + num_threads = NULL, + interval_type = NULL +) +} +\arguments{ +\item{formula}{Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.} + +\item{train_data}{Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.} + +\item{pred_data}{Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.} + +\item{num_trees}{Number of trees.} + +\item{min_node_size}{Minimum number of observations before split at a node.} + +\item{m_try}{Number of variables to randomly select from at each split.} + +\item{keep_inbag}{Saves matrix of observations and which tree(s) they occur in. Required to be true to generate variance estimates for Ghosal, Hooker 2018 method. *Should not be an option...} + +\item{intervals}{Generate prediction intervals or not.} + +\item{alpha}{Significance level for prediction intervals.} + +\item{forest_type}{Determines what type of forest: regression forest vs. quantile regression forest. *Should not be an option...} + +\item{num_threads}{The number of threads to use in parallel. Default is the current number of cores.} + +\item{interval_type}{Type of prediction interval to generate. +Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.} +} +\description{ +This function implements split conformal prediction intervals for RFs. Currently used in rfint(). +} +\keyword{internal} diff --git a/man/GHVar.Rd b/man/GHVar.Rd new file mode 100644 index 0000000..8c3674b --- /dev/null +++ b/man/GHVar.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ghosal_Hooker_2018.R +\name{GHVar} +\alias{GHVar} +\title{Generate prediction intervals for Ghosal, Hooker 2018 implementation. Helper function.} +\usage{ +GHVar( + boostRF, + train_data, + pred_data, + variant, + dep, + alpha, + num_threads = num_threads, + interval_type = interval_type +) +} +\description{ +This function is primarily meant to be used within GhosalBoostRF(). +} +\keyword{internal} diff --git a/man/GhosalBoostRF.Rd b/man/GhosalBoostRF.Rd new file mode 100644 index 0000000..27779a5 --- /dev/null +++ b/man/GhosalBoostRF.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ghosal_Hooker_2018.R +\name{GhosalBoostRF} +\alias{GhosalBoostRF} +\title{Implements RF prediction interval method in Ghosal, Hooker 2018. Helper function.} +\usage{ +GhosalBoostRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = NULL, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = FALSE, + alpha = NULL, + prop = NULL, + variant = 1, + num_threads = NULL, + num_stages = NULL, + interval_type = NULL +) +} +\arguments{ +\item{formula}{Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.} + +\item{train_data}{Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.} + +\item{pred_data}{Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.} + +\item{num_trees}{Number of trees.} + +\item{min_node_size}{Minimum number of observations before split at a node.} + +\item{m_try}{Number of variables to randomly select from at each split.} + +\item{keep_inbag}{Saves matrix of observations and which tree(s) they occur in. Required to be true to generate variance estimates for Ghosal, Hooker 2018 method.} + +\item{intervals}{Generate prediction intervals or not. Defaults to FALSE.} + +\item{alpha}{Significance level for prediction intervals.} + +\item{prop}{Proportion of training data to sample for each tree. Currently variant 2 not implemented.} + +\item{variant}{Choose which variant to use. Currently variant 2 not implemented.} + +\item{num_threads}{The number of threads to use in parallel. Default is the current number of cores.} + +\item{num_stages}{Number of boosting stages. Functional for >= 2; variance estimates need adjustment for variant 2.} + +\item{interval_type}{Type of prediction interval to generate. +Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.} +} +\description{ +This function implements variant one and two of the prediction interval methods in Ghosal, Hooker 2018. Used inside rfint(). +} +\keyword{internal} diff --git a/man/HDI_quantregforest.Rd b/man/HDI_quantregforest.Rd new file mode 100644 index 0000000..2c0788a --- /dev/null +++ b/man/HDI_quantregforest.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/HDI_quantregforest.R +\name{HDI_quantregforest} +\alias{HDI_quantregforest} +\title{Implements HDI RF prediction interval method in Zhu 2019. Helper function.} +\usage{ +HDI_quantregforest( + formula = NULL, + train_data = NULL, + test_data = NULL, + alpha = NULL, + num_tree = NULL, + mtry = NULL, + min_node_size = NULL, + max_depth = NULL, + replace = TRUE, + verbose = FALSE, + num_threads = NULL +) +} +\arguments{ +\item{formula}{Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.} + +\item{train_data}{Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.} + +\item{test_data}{Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.} + +\item{alpha}{Significance level for prediction intervals.} + +\item{num_tree}{Number of trees.} + +\item{mtry}{Number of variables to randomly select from at each split.} + +\item{min_node_size}{Minimum number of observations before split at a node.} + +\item{max_depth}{maximum depth of each tree in RF. ranger parameter.} + +\item{replace}{Sample with replacement, or not. Utilized for the two different variants outlined in Ghosal, Hooker 2018. Currently variant 2 not implemented.} + +\item{num_threads}{The number of threads to use in parallel. Default is the current number of cores.} +} +\description{ +This function implements an HDI RF prediction interval method. +} +\keyword{internal} diff --git a/man/RoyRF.Rd b/man/RoyRF.Rd new file mode 100644 index 0000000..4d91499 --- /dev/null +++ b/man/RoyRF.Rd @@ -0,0 +1,59 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Roy_Larocque_2019.R +\name{RoyRF} +\alias{RoyRF} +\title{implements RF prediction interval method in Roy, Larocque 2019. Helper function.} +\usage{ +RoyRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = NULL, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + interval_method = "quantile", + calibrate = FALSE, + alpha = NULL, + num_threads = NULL, + tolerance = NULL, + step_percent = NULL, + under = NULL, + method = NULL, + max_iter = NULL, + interval_type = NULL +) +} +\arguments{ +\item{formula}{Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.} + +\item{train_data}{Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.} + +\item{pred_data}{Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.} + +\item{num_trees}{Number of trees.} + +\item{min_node_size}{Minimum number of observations before split at a node.} + +\item{m_try}{Number of variables to randomly select from at each split.} + +\item{keep_inbag}{Saves matrix of observations and which tree(s) they occur in. Required to be true to generate variance estimates for Ghosal, Hooker 2018 method. *Should not be an option...} + +\item{intervals}{Generate prediction intervals or not.} + +\item{interval_method}{which prediction interval type to generate. Several outlined in paper; currently only one method implemented.} + +\item{calibrate}{calibrate prediction intervals based on out-of-bag performance. Adjusts alpha to get nominal coverage.} + +\item{alpha}{Significance level for prediction intervals.} + +\item{num_threads}{The number of threads to use in parallel. Default is the current number of cores.} + +\item{interval_type}{Type of prediction interval to generate. +Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.} +} +\description{ +Currently implemented is the quantile method with BOP intervals. Used inside rfint(). +} +\keyword{internal} diff --git a/man/TungUbRF.Rd b/man/TungUbRF.Rd new file mode 100644 index 0000000..cff38ae --- /dev/null +++ b/man/TungUbRF.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Tung_Huang_Nyugen_Khan_2014.R +\name{TungUbRF} +\alias{TungUbRF} +\title{Implements RF prediction interval method in Tung, Huang, Nyugen, Khan 2014.} +\usage{ +TungUbRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = NULL, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + feature_num_trees = NULL, + alpha = NULL, + forest_type = "QRF", + featureBias = TRUE, + predictionBias = TRUE, + R = NULL, + num_threads = NULL, + interval_type = NULL +) +} +\arguments{ +\item{formula}{Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.} + +\item{train_data}{Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.} + +\item{pred_data}{Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.} + +\item{num_trees}{Number of trees.} + +\item{min_node_size}{Minimum number of observations before split at a node.} + +\item{m_try}{Number of variables to randomly select from at each split.} + +\item{keep_inbag}{Saves matrix of observations and which tree(s) they occur in. Required to be true to generate variance estimates for Ghosal, Hooker 2018 method. *Should not be an option...} + +\item{intervals}{Generate prediction intervals or not.} + +\item{alpha}{Significance level for prediction intervals.} + +\item{featureBias}{perform feature bias step.} + +\item{predictionBias}{perform prediction bias.} + +\item{R}{number of RFs generated in feature bias stage of Tung 2014 prediction interval. Defualt is 10.} + +\item{num_threads}{The number of threads to use in parallel. Default is the current number of cores.} + +\item{interval_type}{Type of prediction interval to generate. +Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.} + +\item{feature_num_tree}{number of trees to be used in ech random forest generated for feature bias step.} +} +\description{ +This function implements the feature bias and prediction bias methods outlined in Tung 2014. +} +\keyword{internal} diff --git a/man/boostStage.Rd b/man/boostStage.Rd new file mode 100644 index 0000000..19feb18 --- /dev/null +++ b/man/boostStage.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ghosal_Hooker_2018.R +\name{boostStage} +\alias{boostStage} +\title{Generates stage 2 (and more) RF for Ghosal, Hooker RF implementation. Helper function.} +\usage{ +boostStage( + rf, + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = num_trees, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = alpha, + weights = NULL, + num_stages = 2, + prop = prop, + num_threads = num_threads, + variant = NULL +) +} +\description{ +Used within GhosalBoostRF(). +} +\keyword{internal} diff --git a/man/calibrate.Rd b/man/calibrate.Rd new file mode 100644 index 0000000..0f02411 --- /dev/null +++ b/man/calibrate.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/calibrate.R +\name{calibrate} +\alias{calibrate} +\title{calibrate()} +\usage{ +calibrate( + oob, + alpha = 0.1, + response_data, + tolerance = 0.025, + step_percent = 0.618, + undercoverage = FALSE, + method = "quantile", + max_iter = 10 +) +} +\arguments{ +\item{oob}{collection of oob predictions for training data (in list form).} + +\item{alpha}{nominal significance level. Defaults to 0.01.} + +\item{response_data}{response data of class data.frame. Must have names() attribute.} + +\item{tolerance}{tolerance allowed around nominal alpha. Default is 0.25.} + +\item{step_percent}{ratio absolute difference between empirical oob coverage and nominal coverage to adjust when calibrating. Defaults to 0.618.} + +\item{undercoverage}{Allow undercoverage. Defaults to TRUE. Not currently implemented.} + +\item{method}{Method to calibrate prediction intervals with. Defaults to "quantile"). Current only "quantile" implemented.} + +\item{max_iter}{Maximum number of iterations. Defaults to 10.} +} +\description{ +This function outputs a calibrated significance level based on coverage of prediction intervals generated using oob collections. Primarily for use in RoyRF(). Attempting to see which other methods could utilize this procedure. +} +\keyword{.025)} +\keyword{<-} +\keyword{=} +\keyword{alpha} +\keyword{alpha,} +\keyword{calibrate} +\keyword{calibration,} +\keyword{dep,} +\keyword{forest,} +\keyword{function(oob,} +\keyword{internal} +\keyword{random} +\keyword{response_data,} +\keyword{tolerance} diff --git a/man/genBOP.Rd b/man/genBOP.Rd new file mode 100644 index 0000000..d89adb8 --- /dev/null +++ b/man/genBOP.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Roy_Larocque_2019.R +\name{genBOP} +\alias{genBOP} +\title{Generate BOP sets from Roy, Larocque 2019} +\usage{ +genBOP( + rf, + inbag = rf$inbag.counts, + alpha = alpha, + pred_data, + train_data, + num_threads = num_threads, + calibrate = calibrate +) +} +\description{ +This function is primarily meant to be used within the RoyRF() function. All parameters are same as in RoyRF(). +} +\keyword{internal} diff --git a/man/genCHDInt.Rd b/man/genCHDInt.Rd new file mode 100644 index 0000000..fe635c4 --- /dev/null +++ b/man/genCHDInt.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Roy_Larocque_2019.R +\name{genCHDInt} +\alias{genCHDInt} +\title{generates BOP contiguous HDI prediction intervals from Roy, Larocque 2019} +\usage{ +genCHDInt(BOP, alpha = alpha) +} +\description{ +This function is primarily meant to be used within the RoyRF() function. +} +\keyword{internal} diff --git a/man/genCombRF.Rd b/man/genCombRF.Rd new file mode 100644 index 0000000..c87a29a --- /dev/null +++ b/man/genCombRF.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Ghosal_Hooker_2018.R +\name{genCombRF} +\alias{genCombRF} +\title{Generates stage 1 RF for Ghosal, Hooker RF implementation. Helper function.} +\usage{ +genCombRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = num_trees, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = NULL, + importance = "none", + weights = NULL, + prop = NULL, + inbag = NULL, + num_threads = NULL +) +} +\description{ +This function is primarily meant to be used within GhosalBoostRF(). +} +\keyword{internal} diff --git a/man/genHDInt.Rd b/man/genHDInt.Rd new file mode 100644 index 0000000..462859a --- /dev/null +++ b/man/genHDInt.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Roy_Larocque_2019.R +\name{genHDInt} +\alias{genHDInt} +\title{generates BOP HDI prediction intervals from Roy, Larocque 2019} +\usage{ +genHDInt(BOP, alpha = alpha) +} +\description{ +This function is primarily meant to be used within the RoyRF() function. Could ptentially result in non-contiguous intervals. +} +\keyword{internal} diff --git a/man/genRF.Rd b/man/genRF.Rd new file mode 100644 index 0000000..2e303d3 --- /dev/null +++ b/man/genRF.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Tung_Huang_Nyugen_Khan_2014.R +\name{genRF} +\alias{genRF} +\title{Generate quantile RF} +\usage{ +genRF( + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = num_trees, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = alpha, + forest_type = "RF", + importance = "none", + weights = NULL, + num_threads = num_threads +) +} +\description{ +This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). +} +\keyword{internal} diff --git a/man/genWeights.Rd b/man/genWeights.Rd new file mode 100644 index 0000000..820283e --- /dev/null +++ b/man/genWeights.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Tung_Huang_Nyugen_Khan_2014.R +\name{genWeights} +\alias{genWeights} +\title{Generate weights for RF through feature bias reduction method outlined in Tung 2014.} +\usage{ +genWeights( + formula = NULL, + train_data = NULL, + pred_data = NULL, + feature_num_trees = feature_num_trees, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = alpha, + forest_type = "RF", + importance = "permutation", + R = R, + num_threads = num_threads +) +} +\description{ +This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). +} +\keyword{internal} diff --git a/man/genqInt.Rd b/man/genqInt.Rd new file mode 100644 index 0000000..16724f7 --- /dev/null +++ b/man/genqInt.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Roy_Larocque_2019.R +\name{genqInt} +\alias{genqInt} +\title{Generates BOP quantile prediction intervals from Roy, Larocque 2019.} +\usage{ +genqInt(BOP, alpha = alpha, interval_type = interval_type) +} +\description{ +This function is primarily meant to be used within the RoyRF() function. +} +\keyword{internal} diff --git a/man/parse.formula.Rd b/man/parse.formula.Rd new file mode 100644 index 0000000..3fbd337 --- /dev/null +++ b/man/parse.formula.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/formula.R +\name{parse.formula} +\alias{parse.formula} +\title{Authors: Marvin N. Wright, Gregor DeCillia} +\usage{ +parse.formula(formula, data, env = parent.frame()) +} +\arguments{ +\item{formula}{Object of class \code{formula} or \code{character} describing the model to fit.} + +\item{data}{Training data of class \code{data.frame}.} + +\item{env}{The environment in which the left hand side of \code{formula} is evaluated.} +} +\value{ +Dataset including selected columns and interactions. +} +\description{ +taken from source code for ranger package; not exported with package +} +\details{ +Parse formula and return dataset containing selected columns. +Interactions are supported for numerical columns only. +An interaction column is the product of all interacting columns. +} diff --git a/man/predictionUbRF.Rd b/man/predictionUbRF.Rd new file mode 100644 index 0000000..406a9ff --- /dev/null +++ b/man/predictionUbRF.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Tung_Huang_Nyugen_Khan_2014.R +\name{predictionUbRF} +\alias{predictionUbRF} +\title{Performs prediction debiasing from Tung 2014} +\usage{ +predictionUbRF( + rf, + formula = NULL, + train_data = NULL, + pred_data = NULL, + num_trees = NULL, + min_node_size = NULL, + m_try = NULL, + keep_inbag = TRUE, + intervals = TRUE, + alpha = alpha, + forest_type = "QRF", + weights = NULL, + num_threads = num_threads, + interval_type = NULL +) +} +\description{ +This function is primarily meant to be used within the TungUbRF() function. All parameters are same as in TungUbRF(). +} +\keyword{internal} diff --git a/man/rfint.Rd b/man/rfint.Rd index f83d2bb..857ff87 100644 --- a/man/rfint.Rd +++ b/man/rfint.Rd @@ -141,7 +141,6 @@ length opar <- par(mfrow = c(2,2)) #plotting intervals and predictions -par(mfrow = c(2,2)) for(i in 1:7){ col <- ((test$pressure >= res$int[[i]][,1]) * (test$pressure <= res$int[[i]][,2])-1)*(-1)+1 @@ -151,7 +150,7 @@ for(i in 1:7){ segments(x0 = res$int[[i]][,1], x1 = res$int[[i]][,2], y1 = test$pressure, y0 = test$pressure, lwd = 1, col = col) } -par(par) +par(opar) } } \references{