Merge pull request #4 from s3alfisc/dev

v0.5 -address multiple comments from the first cran submission
s3alfisc · Feb 4, 2023 · a8e2d68 · a8e2d68
2 parents 454bdbf + 202b33d
commit a8e2d68
Show file tree

Hide file tree

Showing 15 changed files with 154 additions and 142 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -12,3 +12,4 @@
 ^\.data
 
 ^cran-comments\.md$
+^CRAN-SUBMISSION$
diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md
diff --git a/CRAN-SUBMISSION b/CRAN-SUBMISSION
@@ -0,0 +1,3 @@
+Version: 0.4
+Date: 2023-01-31 21:17:12 UTC
+SHA: 330a5c75091bdc8379beee2152514daaa21838fa
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -2,12 +2,20 @@ Package: wildrwolf
 Type: Package
 Title: Fast Computation of Romano-Wolf Corrected p-Values for
        Linear Regression Models
-Version: 0.3.3
-Author: Alexander Fischer
+Version: 0.5
+Authors@R: 
+    c(
+    person(given = "Alexander",
+           family = "Fischer",
+           role = c("aut", "cre"),
+           email = "alexander-fischer1801@t-online.de")
+    )
 Maintainer: Alexander Fischer <alexander-fischer1801@t-online.de>
-Description: Fast Routines to Compute Romano-Wolf corrected p-Values for 
-             objects of type "fixest" and "fixest_multi" via a wild (cluster) 
-             bootstrap.
+Description: Fast Routines to Compute Romano-Wolf corrected p-Values 
+             (Romano and Wolf (2005a) <DOI:10.1198/016214504000000539>, 
+             Romano and Wolf (2005b) <DOI:10.1111/j.1468-0262.2005.00615.x>) 
+             for objects of type "fixest" and "fixest_multi" from the "fixest" 
+             package via a wild (cluster) bootstrap.
 License: GPL (>= 3)
 Encoding: UTF-8
 LazyData: true

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,6 +1,5 @@
 # Generated by roxygen2: do not edit by hand
 
-S3method(summary,rwolf)
 export(run_fwer_sim)
 export(rwolf)
 importFrom(MASS,mvrnorm)

diff --git a/R/rwolf.R b/R/rwolf.R
@@ -46,7 +46,13 @@
 #' 
 #' @return 
 #' 
-#' An object of type `rwolf`
+#' A data.frame containing the following columns: 
+#' \item{model}{Index of Models}
+#' \item{Estimate}{The estimated coefficient of `param` in the respective model.}
+#' \item{Std. Error}{The estimated standard error of `param` in the respective model.}
+#' \item{t value}{The t statistic of `param` in the respective model.}
+#' \item{Pr(>|t|)}{The uncorrected pvalue for `param` in the respective model.}
+#' \item{RW Pr(>|t|)}{The Romano-Wolf corrected pvalue of hypothesis test for `param` in the respective model.}
 #' 
 #' @examples
 #'  
@@ -75,7 +81,7 @@
 #' 
 #' res <- feols(c(Y1, Y2, Y3) ~ X1, data = data, cluster = ~ cluster)
 #' res_rwolf <- rwolf(models = res, param = "X1", B = B)
-#' summary(res_rwolf)
+#' res_rwolf
 #' 
 #' @references 
 #' Clarke, Romano & Wolf (2019), STATA Journal. 
@@ -149,6 +155,7 @@ rwolf <- function(
   # absolute value for t-stats
   t_stats <- abs(
     unlist(lapply(1:S, function(x) get_stats_fixest(x, stat = "t value"))))
+
   # repeat line: for multiway clustering, it is not clear how many bootstrap 
   # test statistics will be invalied - for oneway, 
   # all vectors of length(boot_coefs) \leq B
@@ -239,33 +246,6 @@ rwolf <- function(
   rownames(models_info) <- NULL
   models_info[, "RW Pr(>|t|)"] <- pval
 
-  res <- list(
-    call = call,
-    models_info = models_info,
-    coefs = coefs,
-    t_stats = t_stats,
-    boot_coefs = boot_coefs,
-    boot_ses = boot_ses,
-    boot_t_stats = boot_t_stats,
-    pval = pval
-  )
-
-  # create class of type rwolf
-  class(res) <- "rwolf"
-
-  invisible(res)
-
+  models_info
 
 }
-
-summary.rwolf <- function(object, digits, ...){
-  #' Summary method for objects of type rwolf
-  #' @param object An object of type rwolf
-  #' @param digits Rounding of digits
-  #' @param ... misc. function arguments
-  #' @export
-  as.data.frame(object$models_info)
-}
-
-
-
diff --git a/R/rwolf_sim.R b/R/rwolf_sim.R
@@ -15,14 +15,15 @@ fwer_sim <- function(rho, N, s, B, G = 20){
   #' 
   #' @return
   #' A 'data.frame' containing unadjusted p-values & p-values adjusted using the
-  #' methods by Holm and Romano & Wolf (2005)
+  #' methods by Holm and Romano & Wolf (2005), with the following columns
+  #' 
 
 
   dreamerr::check_arg(N, "integer scalar")
   dreamerr::check_arg(B, "integer scalar")
   dreamerr::check_arg(s, "integer scalar")
   dreamerr::check_arg(rho, "numeric scalar")
-  dreamerr::check_arg(G, "integer scalar")
+  dreamerr::check_arg(G, "integer scalar | NULL")
 
 
   if(s %% 2 != 0){
@@ -98,7 +99,9 @@ fwer_sim <- function(rho, N, s, B, G = 20){
       B = B,
       nthreads = 1,
       bootstrap_type = "11"
-    )$pval
+    )
+
+    fit_padjust_rw <- fit_padjust_rw[, "RW Pr(>|t|)"]
 
     # fit_padjust_wy <- wildwyoung::wyoung(
     #   fit,
@@ -130,6 +133,8 @@ run_fwer_sim <- function(
     s = 6, 
     G = 20){
   #' 
+  #' Family Wise Error Rate Simulations
+  #' 
   #' Run a MC simulation study on family-wise error rates (FWERs)
   #' for the Holm and Romano & Wolf Methods multiple
   #' hypothesis adjustment methods given true null effects
@@ -145,7 +150,15 @@ run_fwer_sim <- function(
   #'
   #' @export
   #'
-  #'
+  #' @return 
+  #' A data frame containing familiy wise rejection rates for uncorrected 
+  #' pvalues and corrected pvalues using Holm's and the Romano-Wolf method.
+  #' 
+  #' \item{reject_5}{The family wise rejection rate at a 5\%  level}
+  #' \item{reject_10}{The family wise rejection rate at a 10\%  level}
+  #' \item{rho}{The correlation between the outcome variables. See function
+  #'  argument`rho` for more information.}
+  #'  
   #' @examples
   #' \dontrun{
   #'
@@ -157,6 +170,11 @@ run_fwer_sim <- function(
   #'   s = 10
   #' )
   #' res
+  #' 
+  #' # >                 reject_5 reject_10 rho
+  #' # > fit_pvalue         0.282     0.502 0.5
+  #' # > fit_pvalue_holm    0.061     0.104 0.5
+  #' # > fit_padjust_rw     0.059     0.105 0.5
   #'
   #' }
 
@@ -168,7 +186,6 @@ run_fwer_sim <- function(
 
     all_rho_i <- list()
     for(i in 1:n_sims){
-      cat("n_sims: ", i, "\n")
       all_rho_i[[i]] <- fwer_sim(rho = rho[x], B = B, N = N, s = s, G = G)
     }
     all_rho[[x]] <- all_rho_i

diff --git a/README.Rmd b/README.Rmd
@@ -96,10 +96,8 @@ res_rwolf1 <- wildrwolf::rwolf(
 pvals <- lapply(fit, function(x) pvalue(x)["X1"]) |> unlist()
 
 # Romano-Wolf Corrected P-values
-summary(res_rwolf1)
+res_rwolf1
 
-# Holm Corrected P-values
-p.adjust(pvals, method = "holm") |> round(4)
 ```
 
 ## Example II
@@ -115,14 +113,14 @@ res_rwolf2 <- rwolf(
   param = "X1",  
   B = 9999
 )
-summary(res_rwolf2)
+res_rwolf2
 ```
 
 ## Performance
 
 The above procedure with `S=8` hypotheses, `N=1000` observations and `k %in% (1,2)` parameters finishes in around 5 seconds.
 
-```{r, warning = FALSE, message = FALSE, eval = FALSE}
+```{r, warning = FALSE, message = FALSE}
 if(requireNamespace("microbenchmark")){
   
   microbenchmark::microbenchmark(
@@ -135,11 +133,6 @@ if(requireNamespace("microbenchmark")){
     times = 1
   )
  
-# t: seconds
-#         expr      min       lq     mean   median       uq
-#  Romano-Wolf 4.834184 4.834184 4.834184 4.834184 4.834184
-#       max neval
-#  4.834184     1   
 }
 ```
 
@@ -240,7 +233,7 @@ models <- feols(c(Y1, Y2, Y3, Y4) ~ X1 + X2
 ```
 
 ```{r}
-summary(rwolf(models, param = "X1", B = 9999, seed = 123))
+rwolf(models, param = "X1", B = 9999, seed = 123)
 
 ```
 

diff --git a/README.md b/README.md
@@ -95,7 +95,7 @@ res_rwolf1 <- wildrwolf::rwolf(
 pvals <- lapply(fit, function(x) pvalue(x)["X1"]) |> unlist()
 
 # Romano-Wolf Corrected P-values
-summary(res_rwolf1)
+res_rwolf1
 #>   model     Estimate Std. Error    t value      Pr(>|t|) RW Pr(>|t|)
 #> 1     1    0.9896609 0.04204902   23.53588  8.811393e-98      0.0001
 #> 2     2    0.9713667 0.03201663   30.33945 9.318861e-144      0.0001
@@ -105,15 +105,6 @@ summary(res_rwolf1)
 #> 6     6    0.3925661 0.03096423   12.67805  2.946569e-34      0.0001
 #> 7     7    0.0206361 0.04405654  0.4684003     0.6396006      0.9542
 #> 8     8  0.001657765 0.03337464 0.04967138     0.9603942      0.9798
-
-# Holm Corrected P-values
-p.adjust(pvals, method = "holm") |> round(4)
-#>      lhs: Y1; rhs: X1.X1 lhs: Y1; rhs: X1 + X2.X1      lhs: Y2; rhs: X1.X1 
-#>                        0                        0                        1 
-#> lhs: Y2; rhs: X1 + X2.X1      lhs: Y3; rhs: X1.X1 lhs: Y3; rhs: X1 + X2.X1 
-#>                        1                        0                        0 
-#>      lhs: Y4; rhs: X1.X1 lhs: Y4; rhs: X1 + X2.X1 
-#>                        1                        1
 ```
 
 ## Example II
@@ -130,7 +121,7 @@ res_rwolf2 <- rwolf(
   B = 9999
 )
 #>   |                                                                              |                                                                      |   0%  |                                                                              |==================                                                    |  25%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================================| 100%
-summary(res_rwolf2)
+res_rwolf2
 #>   model     Estimate Std. Error    t value      Pr(>|t|) RW Pr(>|t|)
 #> 1     1    0.9896609 0.04341633   22.79467  6.356963e-93      0.0001
 #> 2     2    0.9713667 0.03186495   30.48386 9.523796e-145      0.0001
@@ -156,12 +147,11 @@ if(requireNamespace("microbenchmark")){
     times = 1
   )
 
-# t: seconds
-#         expr      min       lq     mean   median       uq
-#  Romano-Wolf 4.834184 4.834184 4.834184 4.834184 4.834184
-#       max neval
-#  4.834184     1   
 }
+#>   |                                                                              |                                                                      |   0%  |                                                                              |=========                                                             |  12%  |                                                                              |==================                                                    |  25%  |                                                                              |==========================                                            |  38%  |                                                                              |===================================                                   |  50%  |                                                                              |============================================                          |  62%  |                                                                              |====================================================                  |  75%  |                                                                              |=============================================================         |  88%  |                                                                              |======================================================================| 100%
+#> Unit: seconds
+#>         expr      min       lq     mean   median       uq      max neval
+#>  Romano-Wolf 3.621236 3.621236 3.621236 3.621236 3.621236 3.621236     1
 ```
 
 ## But does it work? Monte Carlo Experiments
@@ -270,7 +260,7 @@ models <- feols(c(Y1, Y2, Y3, Y4) ~ X1 + X2
 ```
 
 ``` r
-summary(rwolf(models, param = "X1", B = 9999, seed = 123))
+rwolf(models, param = "X1", B = 9999, seed = 123)
 #>   |                                                                              |                                                                      |   0%  |                                                                              |==================                                                    |  25%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================================                  |  75%  |                                                                              |======================================================================| 100%
 #>   model    Estimate Std. Error    t value      Pr(>|t|) RW Pr(>|t|)
 #> 1     1   0.9713667 0.03201663   30.33945 9.318861e-144      0.0001