Merge pull request #20 from ocbe-uio/issue-5

Finishing issue #5
ocbe-uio · Jan 12, 2024 · ce752c7 · ce752c7
2 parents c8a4566 + 062fd9e
commit ce752c7
Show file tree

Hide file tree

Showing 16 changed files with 220 additions and 204 deletions.
diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R
@@ -6,51 +6,51 @@
 #' Categorical varables should be coded by 0-1 dummy variables: for a k-level variable, one can use either k or k-1  dummy variables.
 #' @param y N by D matrix  of responses. The X and Z variables are centered in the function. We recommend that X and Z also be standardized before the call
 #' @param maxgrid  number of lambda_3 values desired (default 50)
-#' @param nlambda  number of lambda_3 values desired (default 50). Similar to maxgrid but can have a value less than or equal to maxgrid. 
-#' @param alpha mixing parameter- default 0.5. When the goal is to include more interactions, alpha should be very small and vice versa. 
+#' @param nlambda  number of lambda_3 values desired (default 50). Similar to maxgrid but can have a value less than or equal to maxgrid.
+#' @param alpha mixing parameter- default 0.5. When the goal is to include more interactions, alpha should be very small and vice versa.
 #' @param max_it maximum number of iterations in the ADMM algorithm for one lambda. Default 50000
-#' @param rho the Lagrange variable for the ADMM (default 5 ). This value is updated during the ADMM call based on a certain condition. 
+#' @param rho the Lagrange variable for the ADMM (default 5 ). This value is updated during the ADMM call based on a certain condition.
 #' @param e.abs absolute error for the admm. default is 1E-3
 #' @param e.rel relative error for the admm-default is 1E-3
-#' @param gg penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit. 
+#' @param gg penalty term for the tree structure. This is a 2x2 matrix values in the first row representing the maximum to the minimum values for lambda_1 and the second row representing the maximum to the minimum values for lambda_2. In the current setting, we set both maximum and the minimum to be same because cross validation is not carried across the lambda_1 and lambda_2. However, setting different values will work during the model fit.
 #' @param my_lambda user specified lambda_3 values. Default NULL
 #' @param lambda_min the smallest value for lambda_3 , as a fraction of max(lambda_3), the (data derived (lammax)) entry value (i.e. the smallest value for which all coefficients are zero). Default is 0.001 if N>p, and 0.01 if N< p.
 #' @param max_it 	maximum number of iterations in loop for one lambda during the ADMM optimization. Default 50000
 #' @param my_print Should information form each ADMM iteration be printed along the way? Default FALSE. This prints  the dual and primal residuals
-#' @param alph an overrelaxation parameter in [1,1.8]. Default 1. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code}
-#' @param tree The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure. 
+#' @param alph an overrelaxation parameter in \[1, 1.8\]. Default 1. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code}
+#' @param tree The results from the hierarchical clustering of the response matrix. The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. However, user decide on a specific structure and then input a tree that follows such structure.
 #' @param parallel should parallel processing be used or not? Default True. If set to true, pal should be set 0.
-#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available. Default is 0. 
-#' @param tol threshold for the non-zero coefficients. Default 1E-4 
+#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available. Default is 0.
+#' @param tol threshold for the non-zero coefficients. Default 1E-4
 #' @param cl The number of cpu to be used for parallel processing. default 4
 #' @param legacy If \code{TRUE}, use the R version of the algorithm. Defaults to
 #' C++.
 #' @return   predicted values for the MADMMplasso object with the following components:
 #' path: a table containing the summary of the model for each lambda_3.
-#'  
-#' beta0: a list (length=nlambda) of estimated beta_0 coefficients each having a size of 1 by ncol(y) 
-#' 
-#' beta: a list (length=nlambda) of estimated beta coefficients each having a matrix   ncol(X) by ncol(y) 
-#' 
-#' BETA_hat: a list (length=nlambda) of estimated beta and theta coefficients each having a matrix   (ncol(X)+ncol(X) by ncol(Z)) by ncol(y) 
-#' 
-#'  theta0: a list (length=nlambda) of estimated theta_0 coefficients each having a matrix   ncol(Z) by ncol(y) 
-#'  
-#'  theta: a list (length=nlambda) of estimated theta coefficients each having a an array   ncol(X) by ncol(Z) by ncol(y) 
-#'  
+#'
+#' beta0: a list (length=nlambda) of estimated beta_0 coefficients each having a size of 1 by ncol(y)
+#'
+#' beta: a list (length=nlambda) of estimated beta coefficients each having a matrix   ncol(X) by ncol(y)
+#'
+#' BETA_hat: a list (length=nlambda) of estimated beta and theta coefficients each having a matrix   (ncol(X)+ncol(X) by ncol(Z)) by ncol(y)
+#'
+#'  theta0: a list (length=nlambda) of estimated theta_0 coefficients each having a matrix   ncol(Z) by ncol(y)
+#'
+#'  theta: a list (length=nlambda) of estimated theta coefficients each having a an array   ncol(X) by ncol(Z) by ncol(y)
+#'
 #'  Lambdas: values of lambda_3 used
-#'  
+#'
 #'  non_zero: number of nonzero betas for each model in path
-#'  
+#'
 #'  LOSS: sum of squared of the error for each model in path
-#'  
-#'   Y_HAT: a list (length=nlambda) of predicted response nrow(X) by ncol(y) 
-#'  
+#'
+#'   Y_HAT: a list (length=nlambda) of predicted response nrow(X) by ncol(y)
+#'
 #'  gg: penalty term for the tree structure (lambda_1 and lambda_2) for each lambda_3 nlambda by 2
 
 #' @example inst/examples/MADMMplasso_example.R
 #' @export
-MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = F, alph = 1.8, tree, cv = F, parallel = T, pal = 0, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) {
+MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = F, alph = 1.8, tree, parallel = T, pal = 0, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) {
   N <- nrow(X)
 
   p <- ncol(X)
@@ -135,7 +135,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max
   non_zero_theta <- c()
   my_obj <- list()
 
-  my_W_hat <- generate_my_w(X = X, Z = Z, quad = TRUE)
+  my_W_hat <- generate_my_w(X = X, Z = Z)
 
   svd.w <- svd(my_W_hat)
   svd.w$tu <- t(svd.w$u)
@@ -211,7 +211,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max
       admm_MADMMplasso(
         beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
         y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd.w, tree, my_print,
-        invmat, cv, gg[i, ],legacy
+        invmat, gg[i, ],legacy
       )
     }
     parallel::stopCluster(cl)
@@ -222,7 +222,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max
         admm_MADMMplasso(
           beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat,
           XtY, y, N, e.abs, e.rel, alpha, lam[g, ], alph, svd.w, tree, my_print,
-          invmat, cv, gg[g, ],legacy
+          invmat, gg[g, ],legacy
         )
       }
     )
@@ -241,7 +241,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max
       my_values <- admm_MADMMplasso(
         beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
         y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print, invmat,
-        cv, gg[hh, ],legacy
+        gg[hh, ],legacy
       )
 
       beta <- my_values$beta
@@ -297,18 +297,10 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = .001, max
     Y_HAT[[hh]] <- y_hat
     THETA[[hh]] <- as.sparse3Darray(theta1)
 
-    if (cv == F) {
-      if (hh == 1) {
-        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj1))
-      } else {
-        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj[hh - 1], obj1))
-      }
+    if (hh == 1) {
+      print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj1))
     } else {
-      if (hh == 1) {
-        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj1))
-      } else {
-        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj[hh - 1], obj1))
-      }
+      print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj[hh - 1], obj1))
     }
 
     hh <- hh + 1

diff --git a/R/admm_MADMMplasso.R b/R/admm_MADMMplasso.R
@@ -7,49 +7,48 @@
 #' variable, one can use either k or k-1  dummy variables.
 #' @param y  N by D matrix  of responses. The X and Z variables are centered in the function. We recommend that X and Z also be standardized before the call
 #' @param beta0 a vector of length ncol(y) of estimated beta_0 coefficients
-#' @param theta0  matrix of the initial theta_0 coefficients  ncol(Z) by ncol(y) 
-#' @param beta  a matrix of the initial beta coefficients    ncol(X) by ncol(y) 
-#' @param beta_hat  a matrix of the initial beta and theta coefficients    (ncol(X)+ncol(X) by ncol(Z)) by ncol(y) 
-#' @param theta an array of initial theta coefficients    ncol(X) by ncol(Z) by ncol(y) 
-#' @param rho1 the Lagrange variable for the ADMM which is usually included as rho in the MADMMplasso call. 
+#' @param theta0  matrix of the initial theta_0 coefficients  ncol(Z) by ncol(y)
+#' @param beta  a matrix of the initial beta coefficients    ncol(X) by ncol(y)
+#' @param beta_hat  a matrix of the initial beta and theta coefficients    (ncol(X)+ncol(X) by ncol(Z)) by ncol(y)
+#' @param theta an array of initial theta coefficients    ncol(X) by ncol(Z) by ncol(y)
+#' @param rho1 the Lagrange variable for the ADMM which is usually included as rho in the MADMMplasso call.
 #' @param max_it maximum number of iterations in loop for one lambda during the ADMM optimization. This is usually included  in the MADMMplasso call
-#' @param W_hat N by (p+(p by nz)) of the main and interaction predictors. This generated internally  when MADMMplasso is called or by using the function generate_my_w. 
+#' @param W_hat N by (p+(p by nz)) of the main and interaction predictors. This generated internally  when MADMMplasso is called or by using the function generate_my_w.
 #' @param XtY a matrix formed by multiplying the transpose of X by y.
 #' @param N nrow(X)
 #' @param e.abs absolute error for the admm. This is included int the call of MADMMplasso.
 #' @param e.rel relative error for the admm. This is included int the call of MADMMplasso.
 #' @param alpha mixing parameter, usually obtained from the MADMMplasso call. When the goal is to include more interactions, alpha should be very small and vice versa.
-#' @param lambda a vector  lambda_3 values for the admm call with length ncol(y). This is usually calculated in the MADMMplasso call.   In our current setting, we use the same the lambda_3 value for all responses. 
-#' @param alph an overrelaxation parameter in [1,1.8], usually obtained from the MADMMplasso call.
+#' @param lambda a vector  lambda_3 values for the admm call with length ncol(y). This is usually calculated in the MADMMplasso call.   In our current setting, we use the same the lambda_3 value for all responses.
+#' @param alph an overrelaxation parameter in \[1, 1.8\], usually obtained from the MADMMplasso call.
 #' @param svd.w singular value decomposition of W
-#' @param tree The results from the hierarchical clustering of the response matrix. 
-#' The easy way to obtain this is by using the function (tree_parms) which gives a default clustering. 
-#' However, user decide on a specific structure and then input a tree that follows such structure. 
+#' @param tree The results from the hierarchical clustering of the response matrix.
+#' The easy way to obtain this is by using the function (tree_parms) which gives a default clustering.
+#' However, user decide on a specific structure and then input a tree that follows such structure.
 #' @param my_print Should information form each ADMM iteration be printed along the way? Default TRUE. This prints  the dual and primal residuals
 #' @param invmat A list of length ncol(y), each containing the C_d part of equation 32 in the paper
-#' @param cv TODO: fill paramater description
 #' @param gg penalty terms for the tree structure for lambda_1 and  lambda_2 for the admm call.
 #' @param legacy If \code{TRUE}, use the R version of the algorithm. Defaults to
 #' C++.
 #' @return  predicted values for the ADMM part
 
-#' beta0:  estimated beta_0 coefficients  having a size of 1 by ncol(y) 
-#' 
-#' beta: estimated beta coefficients  having a matrix   ncol(X) by ncol(y) 
-#' 
-#' BETA_hat:  estimated beta and theta coefficients  having a matrix   (ncol(X)+ncol(X) by ncol(Z)) by ncol(y) 
-#' 
-#'  theta0:  estimated theta_0 coefficients  having a matrix   ncol(Z) by ncol(y) 
-#'  
-#'  theta:  estimated theta coefficients  having a an array   ncol(X) by ncol(Z) by ncol(y) 
+#' beta0:  estimated beta_0 coefficients  having a size of 1 by ncol(y)
+#'
+#' beta: estimated beta coefficients  having a matrix   ncol(X) by ncol(y)
+#'
+#' BETA_hat:  estimated beta and theta coefficients  having a matrix   (ncol(X)+ncol(X) by ncol(Z)) by ncol(y)
+#'
+#'  theta0:  estimated theta_0 coefficients  having a matrix   ncol(Z) by ncol(y)
+#'
+#'  theta:  estimated theta coefficients  having a an array   ncol(X) by ncol(Z) by ncol(y)
 #'  converge: did the algorithm converge?
-#'  
-#'  Y_HAT:  predicted response nrow(X) by ncol(y) 
+#'
+#'  Y_HAT:  predicted response nrow(X) by ncol(y)
 
 
 
 #' @export
-admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print = T, invmat, cv = cv, gg = 0.2, legacy = FALSE) {
+admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print = T, invmat, gg = 0.2, legacy = FALSE) {
   if (!legacy) {
     out <- admm_MADMMplasso_cpp(
       beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y,

diff --git a/R/cv_MADMMplasso.R b/R/cv_MADMMplasso.R
@@ -10,21 +10,21 @@
 #' the function. We recommend that x and z also be standardized before the call
 #' @param nfolds  number of cross-validation folds
 #' @param foldid  vector with values in 1:K, indicating folds for K-fold CV. Default NULL
-#' @param alpha mixing parameter- default 0.5. This value should be same as the one used for the MADMMplasso call. 
-#' @param lambda  user specified lambda_3 values. Default fit$Lambdas.  
+#' @param alpha mixing parameter- default 0.5. This value should be same as the one used for the MADMMplasso call.
+#' @param lambda  user specified lambda_3 values. Default fit$Lambdas.
 #' @param max_it maximum number of iterations in loop for one lambda during the ADMM optimization. Default 50000
 #' @param e.abs absolute error for the admm. default is 1E-3
 #' @param e.rel relative error for the admm-default is 1E-3
-#' @param nlambda number of lambda_3 values desired (default 50). Similar to maxgrid but can have a value less than or equal to maxgrid. 
-#' @param rho the Lagrange variable for the ADMM (default 5 ). This value is updated during the ADMM call based on a certain condition. 
+#' @param nlambda number of lambda_3 values desired (default 50). Similar to maxgrid but can have a value less than or equal to maxgrid.
+#' @param rho the Lagrange variable for the ADMM (default 5 ). This value is updated during the ADMM call based on a certain condition.
 #' @param my_print Should information form each ADMM iteration be printed along the way? Default FALSE. This prints  the dual and primal residuals
-#' @param alph an overelaxation parameter in [1,1.8]. Default 1. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code}
+#' @param alph an overelaxation parameter in \[1, 1.8\]. Default 1. The implementation is borrowed from Stephen Boyd's \href{https://stanford.edu/~boyd/papers/admm/lasso/lasso.html}{MATLAB code}
 #' @param parallel should parallel processing be used during the admm call or not? Default True. If set to true, pal should be set 0.
-#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available. Default is 0. 
-#' @param gg penalty term for the tree structure obtained from the fit. 
-#' @param TT The results from the hierarchical clustering of the response matrix. 
-#' This should same as the parameter tree used during the MADMMplasso call. 
-#' @param tol threshold for the non-zero coefficients. Default 1E-4 
+#' @param pal Should the lapply function be applied for an alternative quicker optimization when there no parallel package available. Default is 0.
+#' @param gg penalty term for the tree structure obtained from the fit.
+#' @param TT The results from the hierarchical clustering of the response matrix.
+#' This should same as the parameter tree used during the MADMMplasso call.
+#' @param tol threshold for the non-zero coefficients. Default 1E-4
 #' @param cl The number of cpu to be used for parallel processing. default 2
 #' @return  results containing the CV values
 #' @example inst/examples/cv_MADMMplasso_example.R
@@ -52,7 +52,7 @@ cv_MADMMplasso <- function(fit, nfolds, X, Z, y, alpha = 0.5, lambda = fit$Lambd
     print(c("fold,", ii))
     oo <- foldid == ii
 
-    ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = F], Z = Z[!oo, , drop = F], y = y[!oo, , drop = F], alpha = alpha, my_lambda = lambda, lambda_min = .01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, cv = T, parallel = parallel, pal = pal, gg = gg, tol = tol, cl = cl)
+    ggg[[ii]] <- MADMMplasso(X = X[!oo, , drop = F], Z = Z[!oo, , drop = F], y = y[!oo, , drop = F], alpha = alpha, my_lambda = lambda, lambda_min = .01, max_it = max_it, e.abs = e.abs, e.rel = e.rel, nlambda = length(lambda[, 1]), rho = rho, tree = TT, my_print = my_print, alph = alph, parallel = parallel, pal = pal, gg = gg, tol = tol, cl = cl)
 
     cv_p <- predict.MADMMplasso(ggg[[ii]], X = X[oo, , drop = F], Z = Z[oo, ], y = y[oo, ])
     ggg[[ii]] <- 0

diff --git a/R/generate_my_w.R b/R/generate_my_w.R
@@ -6,20 +6,13 @@
 #' may represent quantitative or categorical variables, or a mixture of the two.
 #' Categorical variables should be coded by 0-1 dummy variables: for a k-level
 #' variable, one can use either k or k-1  dummy variables.
-#' @param quad TODO: fill in description
-#' 
+#'
 #' @return  Generated W matrix nrow(X) by (ncol(X)+ncol(X) by ncol(Z))
 #' @export
-generate_my_w <- function(X = matrix(), Z = matrix(), quad = TRUE) {
+generate_my_w <- function(X = matrix(), Z = matrix()) {
   p1 <- ncol(X)
   p2 <- ncol(Z)
 
-  if (quad == FALSE) {
-    p <- p1
-    if (p1 != p2) stop("To remove quadtratic terms p1 must be equal to p2")
-    ind <- (1:p) * p + (2 * (1:p) + 1)
-  }
-
   # Just in case we have only one oberservation? Not sure why I did this
   if (is.vector(X)) p1 <- length(X)
   if (is.vector(Z)) p2 <- length(Z)
@@ -29,9 +22,6 @@ generate_my_w <- function(X = matrix(), Z = matrix(), quad = TRUE) {
   z <- cbind(1, Z)
 
   W <- t(apply(cbind(x, z), 1, quick_func, xn = p1))
-  if (quad == FALSE) {
-    W <- W[, -ind]
-  }
 
   return(W)
 }