Skip to content

Commit

Permalink
new documentation for helper functions
Browse files Browse the repository at this point in the history
  • Loading branch information
chancejohnstone committed May 8, 2020
1 parent 213beee commit 89b7124
Show file tree
Hide file tree
Showing 30 changed files with 692 additions and 217 deletions.
178 changes: 89 additions & 89 deletions .Rhistory
Original file line number Diff line number Diff line change
@@ -1,92 +1,3 @@
abline(a = 0, b = 1)
plot(x = coverage[[3]], y = alpha_vec, pch = 20, ylim = c(0,1), xlim = c(0,1))
abline(a = 0, b = 1)
library(rfinterval)
?rfinterval
library(devtools)
document()
setwd("~/piRF")
document()
setwd("..")
install("piRF")
library(piRF)
library(rcdk)
library(piRF)
#physical checmistry MoleculeNet datasets
lipo <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/lipophilicity/Lipophilicity.csv")
esol <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/ESOL/esol.csv")
freesolv <- read.csv("C:/Users/thechanceyman/Documents/Research/Data/Drug Design/FreeSolv/freesolv.csv")
#combine data
all_data <- list(lipo, freesolv, esol)
names(all_data[[1]])[2] <- "resp"
names(all_data[[3]])[9] <- "resp"
names(all_data[[2]])[4] <- "resp"
#type <- c("lower", "lower", "upper")
type <- c("two-sided", "two-sided", "two-sided")
res <- cov_color <- test <- list()
#par(mfrow = c(3,1))
for(i in 1:length(all_data)){
bit_mat <- convert_smiles(all_data[[i]]$smiles)
#data frame form
data <- cbind(all_data[[i]]$resp, bit_mat)
names(data)[1] <- "resp"
set.seed(2020)
n <- nrow(data)
ratio <- .975
samp <- sample(1:n, ratio*n)
train <- data[samp,]
test[[i]] <- data[-samp,]
res[[i]] <- rfint(resp~., train_data = train, test_data = test[[i]], concise = FALSE, interval_type = type[i])
cov_color[[i]] <- (test[[i]]$resp <= res[[i]]$int[[1]][,2])*(test[[i]]$resp>= res[[i]]$int[[1]][,1])
}
for(i in 1:length(all_data)){
#changing to color
range_pred <- range(res[[i]]$int)
range_true <- range(test[[i]]$resp)
lim <- .5
cov_color[[i]][cov_color[[i]] == 1] <- "black"
cov_color[[i]][cov_color[[i]] == 0] <- "red"
p_color <- rep("black", times = nrow(test[[i]]))
p_color[res[[i]]$int[[1]][,1] > lim] <- "skyblue"
#segments
#plotting intervals vs true
plot(x = res[[i]]$preds[[1]], y = test[[i]]$resp, pch = 20,
ylab = "true", xlab = "predicted", ylim = range_true, xlim = range_pred)
abline(a = 0, b = 1)
segments(x0 = res[[i]]$int[[1]][,1], x1 = res[[i]]$int[[1]][,2],
y1 = test[[i]]$resp, y0 = test[[i]]$resp,
col = cov_color[[i]], lwd = .5)
}
#points(x = res$preds[[1]], y = test$lipo, pch = 20, col = p_color)
#one sided for freesolv data
alpha_vec <- seq(.01,.99,length.out = 33)
int <- rep(0, times = 2*length(alpha_vec))
dim(int) <- c(length(alpha_vec), 2)
track <- 1
k <- 5
for(alpha in alpha_vec){
test2 <- data[k,]
train2 <- data[-k,]
int[track,] <- unlist(rfint(resp~., train_data = train2, test_data = test2,
interval_type = "upper", alpha = alpha)$int$Zhang)
track <- track + 1
}
int[,1] <- -100
plot(x = int[,2], y = 1-alpha_vec, pch = 20,
ylab = expression((1-alpha)), xlab = "upper one-sided interval", ylim = c(0,1), xlim = c(-3,3))
segments(x0 = int[,1], x1 = int[,2],
y1 = 1-alpha_vec, y0 = 1-alpha_vec,
lwd = .5)
abline(v = -.5, lty = 2, col = "red")
#checking calibration of probabilities...
#insert code here...
#one sided for freesolv data
alpha_vec <- seq(.01,.48,length.out = 33)
k <- 1:100
method_vec <- c("Zhang", "quantile", "Romano")
int <- vector(mode = "list", length = length(method_vec))
for(m in 1:length(int)){
int[[m]] <- matrix(0, nrow = length(k), ncol = length(alpha_vec))
}
track <- 1
for(alpha in alpha_vec){
Expand Down Expand Up @@ -510,3 +421,92 @@ abline(a = 0, b = 1)
segments(x0 = res$int[[i]][,1], x1 = res$int[[i]][,2],
y1 = test$pressure, y0 = test$pressure, lwd = 1, col = col)
}
par.get()
o <- par()
par(o)
opar <- par()
opar
par(opar)
opar <- par()$mfrow
par(mfrow = opar)
opar <- par(mfrow = c(1,1))
opar
par(opar)
opar <- par(mfrow = c(12,2))
opar
par(opar)
par()
par()$mfrow
opar <- par(mfrow = c(12,2))
par()$mfrow
library(devtools)
check()
opar <- par(mfrow = c(2,17))
par
par()$mfrow
par(opar)
par()$mfrow
par(mfrow = c(1,1))
opar <- par(mfrow = c(2,17))
par()$mfrow
par(opar)
par()$mfrow
release()
shiny::runApp('~/Trivia')
runApp('~/Trivia')
runApp('~/Trivia')
runApp('~/Trivia')
runApp('~/Trivia')
library(rsconnect)
deployApp()
getwd()
setwd("~/")
getwd()
setwd("~/Trivia")
getwd()
ls
ls(0)
getwd()
deployApp()
runApp()
deployApp()
key <- "1oDgk4uSTEJgNQu9HCkrmIltZOk_Bi1eQFdceH2qmU8Y"
#trivia_sheet <- read_sheet(key)
fieldNames <- c("team_name",
"round",
"q1",
"q2",
"q3",
"q4",
"q5",
"q6",
"q7",
"q8",
"q9",
"q10")
runApp()
logical(1)
logical(0)
runApp()
runApp()
runApp()
runApp()
runApp()
runApp()
runApp()
runApp()
runApp()
runApp()
deployApp()
library(devtools)
document()
document()
setwd("..")
install("piRF")
setwd("~/piRF")
check()
document()
?rfint
document()
setwd("..")
install("piRF")
4 changes: 2 additions & 2 deletions CRAN-RELEASE
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
This package was submitted to CRAN on 2020-04-28.
Once it is accepted, delete this file and tag the release (commit a409810de7).
This package was submitted to CRAN on 2020-05-02.
Once it is accepted, delete this file and tag the release (commit 213beee10d).
2 changes: 0 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,13 @@ Description: Implements multiple state-of-the-art prediction interval methodolog
original random forest methodology and novel prediction interval methodologies. All of these
methodologies can be utilized using solely this package, rather than a collection of separate
packages. Currently, only regression trees are supported. Also capable of handling high dimensional data.
Breiman, Leo (2001) <doi:10.1023/A:1010933404324>.
Roy, Marie-Helene and Larocque, Denis (2019) <doi:10.1177/0962280219829885>.
Ghosal, Indrayudh and Hooker, Giles (2018) <arXiv:1803.08000>.
Zhu, Lin and Lu, Jiaxin and Chen, Yihong (2019) <arXiv:1905.10101>.
Zhang, Haozhe and Zimmerman, Joshua and Nettleton, Dan and Nordman, Daniel J. (2019) <doi:10.1080/00031305.2019.1585288>.
Meinshausen, Nicolai (2006) <http://www.jmlr.org/papers/volume7/meinshausen06a/meinshausen06a.pdf>.
Romano, Yaniv and Patterson, Evan and Candes, Emmanuel (2019) <arXiv:1905.03222>.
Tung, Nguyen Thanh and Huang, Joshua Zhexue and Nguyen, Thuy Thi and Khan, Imran (2014) <doi:10.13140/2.1.2500.8002>.
Lopez, Roberto and Balsa-Canto, E. and Onate, E. (2008) <doi:10.1002/nme.2304>.
License: GPL-3
Encoding: UTF-8
Depends:
Expand Down
57 changes: 11 additions & 46 deletions R/Ghosal_Hooker_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,10 @@
## Email: cjohnsto@iastate.edu
##
## ---------------------------
##
## Notes:
## multiple boosts functional; testing needs to occur; seems to be an issue with variance estimates
## variant 2 implemented
## --------------------------

#' implements RF prediction interval method in Ghosal, Hooker 2018
#' Implements RF prediction interval method in Ghosal, Hooker 2018. Helper function.
#'
#' This function implements variant one of the prediction interval methods in Ghosal, Hooker 2018.
#' This function implements variant one and two of the prediction interval methods in Ghosal, Hooker 2018. Used inside rfint().
#' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.
#' @param train_data Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.
#' @param pred_data Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.
Expand All @@ -34,14 +29,9 @@
#' @param variant Choose which variant to use. Currently variant 2 not implemented.
#' @param num_stages Number of boosting stages. Functional for >= 2; variance estimates need adjustment for variant 2.
#' @param num_threads The number of threads to use in parallel. Default is the current number of cores.
#' @param interval_type Type of prediction interval to generate.
#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.
#' @keywords internal
#' @examples
#' GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = 500,
#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
#' intervals = FALSE, alpha = NULL, forest_type = "RF",
#' replace = TRUE, prop = 1, variant = 1,
#' num_threads = num_threads)
#' @noRd
GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL,
min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
intervals = FALSE, alpha = NULL, prop = NULL, variant = 1,
Expand Down Expand Up @@ -76,17 +66,10 @@ GhosalBoostRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, n
interval_type = interval_type)
}

#' generate stage 1 RF for Ghosal, Hooker RF implementation
#' Generates stage 1 RF for Ghosal, Hooker RF implementation. Helper function.
#'
#' This function is primarily meant to be used within the GhosalBoostRF function. All parameters are same as in GhosalBoostRF().
#' This function is primarily meant to be used within GhosalBoostRF().
#' @keywords internal
#' @examples
#' genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees,
#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
#' intervals = TRUE,
#' alpha = alpha, forest_type = "RF", importance = "none" , weights = NULL,
#' replace = replace, prop = prop, inbag = NULL, num_threads = num_threads)
#' @noRd
genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees,
min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
intervals = TRUE,
Expand All @@ -103,12 +86,10 @@ genCombRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_t

}

#' generate stage 2 (and more) RF for Ghosal, Hooker RF implementation
#' Generates stage 2 (and more) RF for Ghosal, Hooker RF implementation. Helper function.
#'
#' This function is primarily meant to be used within the GhosalBoostRF() function. All parameters are same as in GhosalBoostRF().
#' Used within GhosalBoostRF().
#' @keywords internal
#' @noRd
#boosting function
boostStage <- function(rf, formula = NULL, train_data = NULL, pred_data = NULL, num_trees = num_trees,
min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
intervals = TRUE, alpha = alpha, weights = NULL, num_stages = 2,
Expand Down Expand Up @@ -187,24 +168,11 @@ boostStage <- function(rf, formula = NULL, train_data = NULL, pred_data = NULL,
inbag = rf$inbag.counts))
}

#' generate prediction intervals for Ghosal, Hooker 2018 implementation.
#' Generate prediction intervals for Ghosal, Hooker 2018 implementation. Helper function.
#'
#' This function is primarily meant to be used within the GhosalBoostRF() function. All parameters are same as in GhosalBoostRF().
#' @param love Do you love cats? Defaults to TRUE.
#' This function is primarily meant to be used within GhosalBoostRF().
#' @keywords internal
#' @examples
#' GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threads = num_threads)
#' @noRd
#get variance estimate
GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threads = num_threads, interval_type = interval_type){
#add variance estimate procedure for variant 2; requires estimates, and inbag for each stage...

#one sided intervals
#if(interval_type == "two-sided"){
# alpha <- alpha
#} else {
# alpha <- alpha*2
#}

#one sided intervals
if(interval_type == "two-sided"){
Expand All @@ -218,8 +186,6 @@ GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threa
alpha2 <- 1
}



#includes original rf
num_stages <- length(boostRF$boostrf)

Expand All @@ -232,11 +198,10 @@ GHVar <- function(boostRF, train_data, pred_data, variant, dep, alpha, num_threa
cov_est <- rep(0, times = pred_n)

#needs to get predictions from boostRF
#maybe call this something different
tree_preds <- boostRF$tree_preds
num_trees <- ncol(tree_preds)

#test this; dont know which one is correct...
#further testing needed
in_bag <- unlist(boostRF$inbag)
dim(in_bag) <- c(dim(train_data)[1], num_trees)
in_bag <- in_bag >= 1
Expand Down
7 changes: 1 addition & 6 deletions R/HDI_quantregforest.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
# Date First Created: 2019-09-13
# Reference: Zhu, Lin, Jiaxin Lu, and Yihong Chen. "HDI-Forest: Highest Density Interval Regression Forest." arXiv preprint arXiv:1905.10101 (2019).

#' implements HDI RF prediction interval method in ...
#' Implements HDI RF prediction interval method in Zhu 2019. Helper function.
#'
#' This function implements an HDI RF prediction interval method.
#' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.
Expand All @@ -19,11 +19,6 @@
#' @param replace Sample with replacement, or not. Utilized for the two different variants outlined in Ghosal, Hooker 2018. Currently variant 2 not implemented.
#' @param num_threads The number of threads to use in parallel. Default is the current number of cores.
#' @keywords internal
#' @examples
#' HDI_quantregforest <- function(formula = NULL, train_data = NULL, test_data = NULL, alpha = NULL,
#' num_tree = NULL, mtry = NULL, min_node_size = NULL, max_depth = NULL, replace = TRUE, verbose = FALSE,
#' num_threads = NULL)
#' @noRd
HDI_quantregforest <- function(formula = NULL,
train_data = NULL,
test_data = NULL,
Expand Down
15 changes: 4 additions & 11 deletions R/Romano_Patterson_Candes_2018.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@
##
## --------------------------

#' implements RF prediction interval using split conformal prediction as outlined in Romano, Patterson, Candes 2018.
#' implements RF prediction interval using split conformal prediction as outlined in Romano, Patterson, Candes 2018. Helper function.
#'
#' This function implements split conformal prediction intervals for RFs.
#' @param love Do you love cats? Defaults to TRUE.
#' This function implements split conformal prediction intervals for RFs. Currently used in rfint().
#' @param formula Object of class formula or character describing the model to fit. Interaction terms supported only for numerical variables.
#' @param train_data Training data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Matches ranger() requirements.
#' @param pred_data Test data of class data.frame, matrix, dgCMatrix (Matrix) or gwaa.data (GenABEL). Utilizes ranger::predict() to get prediction intervals for test data.
Expand All @@ -32,17 +31,11 @@
#' @param intervals Generate prediction intervals or not.
#' @param alpha Significance level for prediction intervals.
#' @param forest_type Determines what type of forest: regression forest vs. quantile regression forest. *Should not be an option...
#' @param replace Sample with replacement, or not. Utilized for the two different variants outlined in Ghosal, Hooker 2018. Currently variant 2 not implemented.
#' @param prop Proportion of training data to sample for each tree. Currently variant 2 not implemented.
#' @param variant Choose which variant to use. Currently variant 2 not implemented.
#' @param num_threads The number of threads to use in parallel. Default is the current number of cores.
#' @param interval_type Type of prediction interval to generate.
#' Options are \code{method = c("two-sided", "lower", "upper")}. Default is \code{method = "two-sided"}.
#' @keywords internal
#' @import stats
#' @examples
#' CQRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL,
#' min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
#' intervals = TRUE, alpha = NULL, forest_type = "RF", num_threads = NULL)
#' @noRd
CQRF <- function(formula = NULL, train_data = NULL, pred_data = NULL, num_trees = NULL,
min_node_size = NULL, m_try = NULL, keep_inbag = TRUE,
intervals = TRUE, alpha = NULL, forest_type = "RF", num_threads = NULL,
Expand Down
Loading

0 comments on commit 89b7124

Please sign in to comment.