diff --git a/.Rbuildignore b/.Rbuildignore
index fd6fe61..91a3227 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -4,3 +4,4 @@
 CITATION.cff
 aux/
 ^\.github$
+^.*\.out
diff --git a/.gitignore b/.gitignore
index c9dc225..be9a66a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@ src/*.o
 src/*.so
 src/*.dll
 aux/
+*.out
diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R
index 68082b4..b7e6f40 100644
--- a/R/MADMMplasso.R
+++ b/R/MADMMplasso.R
@@ -49,7 +49,10 @@
 
 #' @example inst/examples/MADMMplasso_example.R
 #' @export
-MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, parallel = TRUE, pal = FALSE, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) {
+MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, parallel = TRUE, pal = !parallel, gg = NULL, tol = 1E-4, cl = 4, legacy = FALSE) {
+  if (parallel && pal) {
+    stop("parallel and pal cannot be TRUE at the same time")
+  }
   N <- nrow(X)
 
   p <- ncol(X)
@@ -181,8 +184,8 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma
 
   r_current <- y
   b <- reg(r_current, Z)
-  beta0 <- b$beta0
-  theta0 <- b$theta0
+  beta0 <- b[1, ]
+  theta0 <- b[-1, ]
 
   new_y <- y - (matrix(1, N) %*% beta0 + Z %*% ((theta0)))
 
@@ -190,51 +193,100 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma
 
 
   cl1 <- cl
+
+  # Adjusting objects for C++
+  if (!legacy) {
+    C <- TT$Tree
+    CW <- TT$Tw
+    svd_w_tu <- t(svd.w$u)
+    svd_w_tv <- t(svd.w$v)
+    svd_w_d <- svd.w$d
+    BETA <- array(0, c(p, D, nlambda))
+    BETA_hat <- array(0, c(p + p * K, D, nlambda))
+  }
+
+  # Pre-calculating my_values through my_values_matrix
   if (parallel) {
     cl <- makeCluster(cl1, type = "FORK")
-
     doParallel::registerDoParallel(cl = cl)
     foreach::getDoParRegistered()
-
-    my_values_matrix <- foreach(i = 1:nlambda, .packages = "MADMMplasso", .combine = rbind) %dopar% {
-      admm_MADMMplasso(
-        beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
-        y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd.w, tree, my_print,
-        invmat, gg[i, ], legacy
-      )
+    if (legacy) {
+      my_values_matrix <- foreach(i = 1:nlambda, .packages = "MADMMplasso", .combine = rbind) %dopar% {
+        admm_MADMMplasso(
+          beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
+          y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd.w, tree, my_print,
+          invmat, gg[i, ]
+        )
+      }
+    } else {
+      my_values_matrix <- foreach(i = 1:nlambda, .packages = "MADMMplasso", .combine = rbind) %dopar% {
+        admm_MADMMplasso_cpp(
+          beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
+          y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd_w_tu, svd_w_tv, svd_w_d,
+          C, CW, gg[i, ], my_print
+        )
+      }
     }
     parallel::stopCluster(cl)
 
     # Converting to list so hh_nlambda_loop_cpp can handle it
-    for (hh in seq_len(nrow(my_values_matrix))) {
-      my_values[[hh]] <- my_values_matrix[hh, ]
+    if (nlambda == 1) {
+      my_values <- list(my_values_matrix)
+    } else {
+      my_values <- list()
+      for (hh in seq_len(nlambda)) {
+        my_values[[hh]] <- my_values_matrix[hh, ]
+      }
     }
   } else if (!parallel && !pal) {
-    my_values <- lapply(
-      seq_len(nlambda),
-      function(g) {
-        admm_MADMMplasso(
-          beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat,
-          XtY, y, N, e.abs, e.rel, alpha, lam[g, ], alph, svd.w, tree, my_print,
-          invmat, gg[g, ], legacy
-        )
-      }
-    )
+    if (legacy) {
+      my_values <- lapply(
+        seq_len(nlambda),
+        function(g) {
+          admm_MADMMplasso(
+            beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat,
+            XtY, y, N, e.abs, e.rel, alpha, lam[g, ], alph, svd.w, tree, my_print,
+            invmat, gg[g, ]
+          )
+        }
+      )
+    } else {
+      my_values <- lapply(
+        seq_len(nlambda),
+        function(i) {
+          admm_MADMMplasso_cpp(
+            beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
+            y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd_w_tu, svd_w_tv, svd_w_d,
+            C, CW, gg[i, ], my_print
+          )
+        }
+      )
+    }
   } else {
-    # This is triggered when parallel is FALSE and pal is 1
+    # This is triggered when parallel is FALSE and pal is TRUE
     my_values <- list()
   }
 
-  loop_output <- hh_nlambda_loop(
-    lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it,
-    my_W_hat, XtY, y, N, e.abs, e.rel, alpha, alph, svd.w, tree, my_print,
-    invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA,
-    BETA_hat, Y_HAT, THETA, D, my_values, legacy
-  )
-
-  remove(invmat)
-  remove(my_W_hat)
+  # Big calculations
+  if (legacy) {
+    loop_output <- hh_nlambda_loop(
+      lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it,
+      my_W_hat, XtY, y, N, e.abs, e.rel, alpha, alph, svd.w, tree, my_print,
+      invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA,
+      BETA_hat, Y_HAT, THETA, D, my_values
+    )
+  } else {
+    loop_output <- hh_nlambda_loop_cpp(
+      lam, as.integer(nlambda), beta0, theta0, beta, beta_hat, theta, rho1, X, Z, as.integer(max_it),
+      my_W_hat, XtY, y, as.integer(N), e.abs, e.rel, alpha, alph, my_print,
+      gg, tol, parallel, pal, simplify2array(BETA0), simplify2array(THETA0),
+      BETA, BETA_hat, simplify2array(Y_HAT),
+      as.integer(D), C, CW, svd_w_tu, svd_w_tv, svd_w_d, my_values
+    )
+    loop_output <- post_process_cpp(loop_output)
+  }
 
+  # Final adjustments in output
   loop_output$obj[1] <- loop_output$obj[2]
 
   pred <- data.frame(
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8f073e3..9b0b51d 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -23,18 +23,20 @@
 #' @param alpha mixing parameter, usually obtained from the MADMMplasso call. When the goal is to include more interactions, alpha should be very small and vice versa.
 #' @param lambda a vector  lambda_3 values for the admm call with length ncol(y). This is usually calculated in the MADMMplasso call.   In our current setting, we use the same the lambda_3 value for all responses.
 #' @param alph an overrelaxation parameter in \[1, 1.8\], usually obtained from the MADMMplasso call.
-#' @param svd_w singular value decomposition of W
-#' @param tree The results from the hierarchical clustering of the response matrix.
+#' @param svd_w_tu the transpose of the U matrix from the SVD of W_hat
+#' @param svd_w_tv the transpose of the V matrix from the SVD of W_hat
+#' @param svd_w_d the D matrix from the SVD of W_hat
+#' @param C the trained tree
+#' @param CW weights for the trained tree
 #' The easy way to obtain this is by using the function (tree_parms) which gives a default clustering.
 #' However, user decide on a specific structure and then input a tree that follows such structure.
 #' @param my_print Should information form each ADMM iteration be printed along the way? Default TRUE. This prints  the dual and primal residuals
-#' @param invmat A list of length ncol(y), each containing the C_d part of equation 32 in the paper
 #' @param gg penalty terms for the tree structure for lambda_1 and  lambda_2 for the admm call.
 #' @return  predicted values for the ADMM part
-#' @description TODO: add description
+#' @description This function fits a multi-response pliable lasso model over a path of regularization values.
 #' @export
-admm_MADMMplasso_cpp <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w, tree, invmat, gg, my_print = TRUE) {
-    .Call(`_MADMMplasso_admm_MADMMplasso_cpp`, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w, tree, invmat, gg, my_print)
+admm_MADMMplasso_cpp <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w_tu, svd_w_tv, svd_w_d, C, CW, gg, my_print = TRUE) {
+    .Call(`_MADMMplasso_admm_MADMMplasso_cpp`, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w_tu, svd_w_tv, svd_w_d, C, CW, gg, my_print)
 }
 
 count_nonzero_a_cpp <- function(x) {
@@ -49,16 +51,20 @@ count_nonzero_a_cube <- function(x) {
     .Call(`_MADMMplasso_count_nonzero_a_cube`, x)
 }
 
-hh_nlambda_loop_cpp <- function(lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, svd_w, tree, my_print, invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, THETA, D, my_values) {
-    .Call(`_MADMMplasso_hh_nlambda_loop_cpp`, lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, svd_w, tree, my_print, invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, THETA, D, my_values)
+count_nonzero_a_mat <- function(x) {
+    .Call(`_MADMMplasso_count_nonzero_a_mat`, x)
 }
 
-model_intercept <- function(beta0, theta0, beta, theta, X, Z) {
-    .Call(`_MADMMplasso_model_intercept`, beta0, theta0, beta, theta, X, Z)
+hh_nlambda_loop_cpp <- function(lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, my_print, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, D, C, CW, svd_w_tu, svd_w_tv, svd_w_d, my_values) {
+    .Call(`_MADMMplasso_hh_nlambda_loop_cpp`, lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, my_print, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, D, C, CW, svd_w_tu, svd_w_tv, svd_w_d, my_values)
 }
 
-model_p <- function(beta0, theta0, beta, theta, X, Z) {
-    .Call(`_MADMMplasso_model_p`, beta0, theta0, beta, theta, X, Z)
+model_intercept <- function(beta, X) {
+    .Call(`_MADMMplasso_model_intercept`, beta, X)
+}
+
+model_p <- function(beta0, theta0, beta, X, Z) {
+    .Call(`_MADMMplasso_model_p`, beta0, theta0, beta, X, Z)
 }
 
 modulo <- function(x, n) {
diff --git a/R/admm_MADMMplasso.R b/R/admm_MADMMplasso.R
index 0b5b54c..37f135f 100644
--- a/R/admm_MADMMplasso.R
+++ b/R/admm_MADMMplasso.R
@@ -32,19 +32,7 @@
 
 
 #' @export
-admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print, invmat, gg = 0.2, legacy = FALSE) {
-  if (!legacy) {
-    out <- admm_MADMMplasso_cpp(
-      beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y,
-      N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, invmat, gg, my_print
-    )
-    return(out)
-  }
-  warning(
-    "Using legacy R code for MADMMplasso. ",
-    "This functionality will be removed in a future release. ",
-    "Please consider using legacy = FALSE instead."
-  )
+admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print, invmat, gg = 0.2) {
   TT <- tree
 
   C <- TT$Tree
@@ -115,10 +103,10 @@ admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, m
   rho <- rho1
   Big_beta11 <- V
   for (i in 2:max_it) {
-    r_current <- (y - model_intercept(beta0, theta0, beta = beta_hat, theta, X = W_hat, Z))
+    r_current <- (y - model_intercept(beta_hat, W_hat))
     b <- reg(r_current, Z) # Analytic solution how no sample lower bound (Z.T @ Z + cI)^-1 @ (Z.T @ r)
-    beta0 <- b$beta0
-    theta0 <- b$theta0
+    beta0 <- b[1, ]
+    theta0 <- b[-1, ]
 
     new_y <- y - (matrix(1, N) %*% beta0 + Z %*% ((theta0)))
 
@@ -368,7 +356,7 @@ admm_MADMMplasso <- function(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, m
     theta[, , jj] <- (beta_hat1[, -1])
     beta_hat[, jj] <- c(c(beta_hat1[, 1], as.vector(theta[, , jj])))
   }
-  y_hat <- model_p(beta0, theta0, beta = beta_hat, theta, X = W_hat, Z)
+  y_hat <- model_p(beta0, theta0, beta = beta_hat, X = W_hat, Z)
 
   out <- list(beta0 = beta0, theta0 = theta0, beta = beta, theta = theta, converge = converge, obj = obj, beta_hat = beta_hat, y_hat = y_hat)
 
diff --git a/R/hh_nlambda_loop.R b/R/hh_nlambda_loop.R
index b067bed..991f14f 100644
--- a/R/hh_nlambda_loop.R
+++ b/R/hh_nlambda_loop.R
@@ -2,101 +2,91 @@ hh_nlambda_loop <- function(
   lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it,
   my_W_hat, XtY, y, N, e.abs, e.rel, alpha, alph, svd.w, tree, my_print,
   invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA,
-  BETA_hat, Y_HAT, THETA, D, my_values, legacy = TRUE
+  BETA_hat, Y_HAT, THETA, D, my_values
 ) {
-  if (legacy) {
-    obj <- NULL
-    non_zero_theta <- NULL
-    my_obj <- list()
-    n_main_terms <- NULL
-    lam_list <- list()
-    hh <- 1
-    while (hh <= nlambda) {
-      lambda <- lam[hh, ]
+  obj <- NULL
+  non_zero_theta <- NULL
+  my_obj <- list()
+  n_main_terms <- NULL
+  lam_list <- list()
+  hh <- 1
+  while (hh <= nlambda) {
+    lambda <- lam[hh, ]
 
-      start_time <- Sys.time()
-      if (pal) {
-        my_values <- admm_MADMMplasso(
-          beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
-          y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print, invmat,
-          gg[hh, ], legacy
-        )
+    start_time <- Sys.time()
+    if (pal) {
+      my_values <- admm_MADMMplasso(
+        beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
+        y, N, e.abs, e.rel, alpha, lambda, alph, svd.w, tree, my_print, invmat,
+        gg[hh, ]
+      )
+      beta <- my_values$beta
+      theta <- my_values$theta
+      my_obj[[hh]] <- list(my_values$obj)
+      beta0 <- my_values$beta0
+      theta0 <- my_values$theta0 ### iteration
+      beta_hat <- my_values$beta_hat
+      y_hat <- my_values$y_hat
+    }
+    cost_time <- Sys.time() - start_time
+    if (my_print) {
+      print(cost_time)
+    }
+    if (parallel && !pal) {
+      beta <- my_values[[hh]]$beta
+      theta <- my_values[[hh]]$theta
+      my_obj[[hh]] <- list(my_values[[hh]]$obj)
+      beta0 <- my_values[[hh]]$beta0
+      theta0 <- my_values[[hh]]$theta0 ### iteration
+      beta_hat <- my_values[[hh]]$beta_hat
+      y_hat <- my_values[[hh]]$y_hat
+    } else if (!parallel && !pal) {
+      beta <- my_values[[hh]]$beta
+      theta <- my_values[[hh]]$theta
+      my_obj[[hh]] <- list(my_values[[hh]]$obj)
+      beta0 <- my_values[[hh]]$beta0
+      theta0 <- my_values[[hh]]$theta0 ### iteration
+      beta_hat <- my_values[[hh]]$beta_hat
+      y_hat <- my_values[[hh]]$y_hat
+    }
+    # Executed if par == TRUE, independent of parallel
 
-        beta <- my_values$beta
-        theta <- my_values$theta
-        my_obj[[hh]] <- list(my_values$obj)
-        beta0 <- my_values$beta0
-        theta0 <- my_values$theta0 ### iteration
-        beta_hat <- my_values$beta_hat
-        y_hat <- my_values$y_hat
-      }
-      cost_time <- Sys.time() - start_time
-      if (my_print) {
-        print(cost_time)
-      }
-      if (parallel && !pal) {
-        beta <- my_values[hh, ]$beta
-        theta <- my_values[hh, ]$theta
-        my_obj[[hh]] <- list(my_values[hh, ]$obj)
-        beta0 <- my_values[hh, ]$beta0
-        theta0 <- my_values[hh, ]$theta0 ### iteration
-        beta_hat <- my_values[hh, ]$beta_hat
-        y_hat <- my_values[hh, ]$y_hat
-      } else if (!parallel && !pal) {
-        beta <- my_values[[hh]]$beta
-        theta <- my_values[[hh]]$theta
-        my_obj[[hh]] <- list(my_values[[hh]]$obj)
-        beta0 <- my_values[[hh]]$beta0
-        theta0 <- my_values[[hh]]$theta0 ### iteration
-        beta_hat <- my_values[[hh]]$beta_hat
-        y_hat <- my_values[[hh]]$y_hat
-      }
-      # Executed if par == TRUE, independent of parallel
-
-      beta1 <- as(beta * (abs(beta) > tol), "sparseMatrix")
-      theta1 <- as.sparse3Darray(theta * (abs(theta) > tol))
-      beta_hat1 <- as(beta_hat * (abs(beta_hat) > tol), "sparseMatrix")
+    beta1 <- as(beta * (abs(beta) > tol), "sparseMatrix")
+    theta1 <- as.sparse3Darray(theta * (abs(theta) > tol))
+    beta_hat1 <- as(beta_hat * (abs(beta_hat) > tol), "sparseMatrix")
 
-      n_interaction_terms <- count_nonzero_a((theta1))
+    n_interaction_terms <- count_nonzero_a((theta1))
 
-      n_main_terms <- (c(n_main_terms, count_nonzero_a((beta1))))
+    n_main_terms <- (c(n_main_terms, count_nonzero_a((beta1))))
 
-      obj1 <- (sum(as.vector((y - y_hat)^2))) / (D * N)
-      obj <- c(obj, obj1)
+    obj1 <- (sum(as.vector((y - y_hat)^2))) / (D * N)
+    obj <- c(obj, obj1)
 
-      non_zero_theta <- (c(non_zero_theta, n_interaction_terms))
-      lam_list <- (c(lam_list, lambda))
+    non_zero_theta <- (c(non_zero_theta, n_interaction_terms))
+    lam_list <- (c(lam_list, lambda))
 
-      BETA0[[hh]] <- beta0
-      THETA0[[hh]] <- theta0
-      BETA[[hh]] <- as(beta1, "sparseMatrix")
-      BETA_hat[[hh]] <- as(beta_hat1, "sparseMatrix")
+    BETA0[[hh]] <- beta0
+    THETA0[[hh]] <- theta0
+    BETA[[hh]] <- as(beta1, "sparseMatrix")
+    BETA_hat[[hh]] <- as(beta_hat1, "sparseMatrix")
 
-      Y_HAT[[hh]] <- y_hat
-      THETA[[hh]] <- as.sparse3Darray(theta1)
+    Y_HAT[[hh]] <- y_hat
+    THETA[[hh]] <- as.sparse3Darray(theta1)
 
-      if (my_print) {
-        if (hh == 1) {
-          print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj1))
-        } else {
-          print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj[hh - 1], obj1))
-        }
+    if (my_print) {
+      if (hh == 1) {
+        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj1))
+      } else {
+        print(c(hh, (n_main_terms[hh]), non_zero_theta[hh], obj[hh - 1], obj1))
       }
+    }
 
-      hh <- hh + 1
-    } ### lambda
-    out <- list(
-      obj = obj, n_main_terms = n_main_terms, non_zero_theta = non_zero_theta,
-      BETA0 = BETA0, THETA0 = THETA0, BETA = BETA, BETA_hat = BETA_hat,
-      Y_HAT = Y_HAT, THETA = THETA
-    )
-  } else {
-    out <- hh_nlambda_loop_cpp(
-      lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it,
-      my_W_hat, XtY, y, N, e.abs, e.rel, alpha, alph, svd.w, tree, my_print,
-      invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA,
-      BETA_hat, Y_HAT, THETA, D, my_values
-    )
-  }
+    hh <- hh + 1
+  } ### lambda
+  out <- list(
+    obj = obj, n_main_terms = n_main_terms, non_zero_theta = non_zero_theta,
+    BETA0 = BETA0, THETA0 = THETA0, BETA = BETA, BETA_hat = BETA_hat,
+    Y_HAT = Y_HAT, THETA = THETA
+  )
   return(out)
 }
diff --git a/R/objective.R b/R/objective.R
index fc65411..0a99ce5 100644
--- a/R/objective.R
+++ b/R/objective.R
@@ -1,5 +1,5 @@
 objective <- function(beta0, theta0, beta, theta, X, Z, y, alpha, lambda, p, N, IB, W, beta1) {
-  loss <- (norm(y - model_p(beta0, theta0, beta = beta1, theta, X = W, Z), type = "F")^2)
+  loss <- (norm(y - model_p(beta0, theta0, beta = beta1, X = W, Z), type = "F")^2)
   mse <- (1 / (2 * N)) * loss
 
   l_1 <- sum(abs(beta))
diff --git a/R/post_process_cpp.R b/R/post_process_cpp.R
new file mode 100644
index 0000000..84ce1fa
--- /dev/null
+++ b/R/post_process_cpp.R
@@ -0,0 +1,11 @@
+post_process_cpp <- function(lst) {
+  array2list <- function(ra) {
+    return(apply(ra, 3, function(x) x, simplify = FALSE))
+  }
+  lst$BETA0 <- array2list(lst$BETA0)
+  lst$THETA0 <- array2list(lst$THETA0)
+  lst$BETA <- array2list(lst$BETA)
+  lst$BETA_hat <- array2list(lst$BETA_hat)
+  lst$Y_HAT <- array2list(lst$Y_HAT)
+  return(lst)
+}
diff --git a/R/predict.MADMMplasso.R b/R/predict.MADMMplasso.R
index 858ba11..46fb99d 100644
--- a/R/predict.MADMMplasso.R
+++ b/R/predict.MADMMplasso.R
@@ -78,7 +78,7 @@ predict.MADMMplasso <- function(object, X, Z, y, lambda = NULL, ...) {
     pTHETA0[[ii]] <- theta0
 
 
-    n_i <- (model_p(beta0, theta0, beta = beta_hat, theta, X = my_W_hat, Z))
+    n_i <- (model_p(beta0, theta0, beta = beta_hat, X = my_W_hat, Z))
     Dev <- (sum(as.vector((y - (n_i))^2))) / (D * N)
     DEV[ii] <- (Dev)
     yh[, , ii] <- as.matrix(n_i)
diff --git a/man/MADMMplasso.Rd b/man/MADMMplasso.Rd
index d49ffb3..b30a406 100644
--- a/man/MADMMplasso.Rd
+++ b/man/MADMMplasso.Rd
@@ -23,7 +23,7 @@ MADMMplasso(
   alph = 1.8,
   tree,
   parallel = TRUE,
-  pal = FALSE,
+  pal = !parallel,
   gg = NULL,
   tol = 1e-04,
   cl = 4,
diff --git a/man/admm_MADMMplasso.Rd b/man/admm_MADMMplasso.Rd
index 67c5c1a..e2a50a2 100644
--- a/man/admm_MADMMplasso.Rd
+++ b/man/admm_MADMMplasso.Rd
@@ -27,8 +27,7 @@ admm_MADMMplasso(
   tree,
   my_print,
   invmat,
-  gg = 0.2,
-  legacy = FALSE
+  gg = 0.2
 )
 }
 \arguments{
@@ -78,8 +77,6 @@ Categorical variables should be coded by 0-1 dummy variables: for a k-level vari
 \item{invmat}{A list of length ncol(y), each containing the C_d part of equation 32 in the paper}
 
 \item{gg}{penalty terms for the tree structure for lambda_1 and  lambda_2 for the admm call.}
-
-\item{legacy}{If \code{TRUE}, use the R version of the algorithm}
 }
 \value{
 predicted values for the ADMM part
diff --git a/man/admm_MADMMplasso_cpp.Rd b/man/admm_MADMMplasso_cpp.Rd
index 5e7a1b2..d266146 100644
--- a/man/admm_MADMMplasso_cpp.Rd
+++ b/man/admm_MADMMplasso_cpp.Rd
@@ -23,9 +23,11 @@ admm_MADMMplasso_cpp(
   alpha,
   lambda,
   alph,
-  svd_w,
-  tree,
-  invmat,
+  svd_w_tu,
+  svd_w_tv,
+  svd_w_d,
+  C,
+  CW,
   gg,
   my_print = TRUE
 )
@@ -70,14 +72,18 @@ variable, one can use either k or k-1  dummy variables.}
 
 \item{alph}{an overrelaxation parameter in [1, 1.8], usually obtained from the MADMMplasso call.}
 
-\item{svd_w}{singular value decomposition of W}
+\item{svd_w_tu}{the transpose of the U matrix from the SVD of W_hat}
 
-\item{tree}{The results from the hierarchical clustering of the response matrix.
+\item{svd_w_tv}{the transpose of the V matrix from the SVD of W_hat}
+
+\item{svd_w_d}{the D matrix from the SVD of W_hat}
+
+\item{C}{the trained tree}
+
+\item{CW}{weights for the trained tree
 The easy way to obtain this is by using the function (tree_parms) which gives a default clustering.
 However, user decide on a specific structure and then input a tree that follows such structure.}
 
-\item{invmat}{A list of length ncol(y), each containing the C_d part of equation 32 in the paper}
-
 \item{gg}{penalty terms for the tree structure for lambda_1 and  lambda_2 for the admm call.}
 
 \item{my_print}{Should information form each ADMM iteration be printed along the way? Default TRUE. This prints  the dual and primal residuals}
@@ -86,5 +92,5 @@ However, user decide on a specific structure and then input a tree that follows
 predicted values for the ADMM part
 }
 \description{
-TODO: add description
+This function fits a multi-response pliable lasso model over a path of regularization values.
 }
diff --git a/src/MADMMplasso.h b/src/MADMMplasso.h
index 6c05ed2..f93a684 100644
--- a/src/MADMMplasso.h
+++ b/src/MADMMplasso.h
@@ -1,5 +1,5 @@
 #include <RcppArmadillo.h>
-Rcpp::List admm_MADMMplasso_cpp(
+arma::field<arma::cube> admm_MADMMplasso_cpp(
   const arma::vec beta0,
   const arma::mat theta0,
   arma::mat beta,
@@ -18,9 +18,11 @@ Rcpp::List admm_MADMMplasso_cpp(
   const double alpha,
   const arma::vec lambda,
   const double alph,
-  const Rcpp::List svd_w,
-  const Rcpp::List tree,
-  const Rcpp::List invmat,
-  const arma::vec gg,
+  const arma::mat svd_w_tu,
+  const arma::mat svd_w_tv,
+  const arma::vec svd_w_d,
+  const arma::sp_mat C,
+  const arma::vec CW,
+  const arma::rowvec gg,
   const bool my_print = true
 );
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index f98ce3d..2bda25d 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -12,13 +12,13 @@ Rcpp::Rostream<false>& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get();
 #endif
 
 // admm_MADMMplasso_cpp
-Rcpp::List admm_MADMMplasso_cpp(const arma::vec beta0, const arma::mat theta0, arma::mat beta, arma::mat beta_hat, arma::cube theta, const double rho1, const arma::mat X, const arma::mat Z, const int max_it, const arma::mat W_hat, const arma::mat XtY, const arma::mat y, const int N, const double e_abs, const double e_rel, const double alpha, const arma::vec lambda, const double alph, const Rcpp::List svd_w, const Rcpp::List tree, const Rcpp::List invmat, const arma::vec gg, const bool my_print);
-RcppExport SEXP _MADMMplasso_admm_MADMMplasso_cpp(SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP beta_hatSEXP, SEXP thetaSEXP, SEXP rho1SEXP, SEXP XSEXP, SEXP ZSEXP, SEXP max_itSEXP, SEXP W_hatSEXP, SEXP XtYSEXP, SEXP ySEXP, SEXP NSEXP, SEXP e_absSEXP, SEXP e_relSEXP, SEXP alphaSEXP, SEXP lambdaSEXP, SEXP alphSEXP, SEXP svd_wSEXP, SEXP treeSEXP, SEXP invmatSEXP, SEXP ggSEXP, SEXP my_printSEXP) {
+arma::field<arma::cube> admm_MADMMplasso_cpp(arma::vec beta0, arma::mat theta0, arma::mat beta, arma::mat beta_hat, arma::cube theta, const double rho1, const arma::mat X, const arma::mat Z, const int max_it, const arma::mat W_hat, arma::mat XtY, const arma::mat y, const int N, const double e_abs, const double e_rel, const double alpha, const arma::vec lambda, const double alph, const arma::mat svd_w_tu, const arma::mat svd_w_tv, const arma::vec svd_w_d, const arma::sp_mat C, const arma::vec CW, const arma::rowvec gg, const bool my_print);
+RcppExport SEXP _MADMMplasso_admm_MADMMplasso_cpp(SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP beta_hatSEXP, SEXP thetaSEXP, SEXP rho1SEXP, SEXP XSEXP, SEXP ZSEXP, SEXP max_itSEXP, SEXP W_hatSEXP, SEXP XtYSEXP, SEXP ySEXP, SEXP NSEXP, SEXP e_absSEXP, SEXP e_relSEXP, SEXP alphaSEXP, SEXP lambdaSEXP, SEXP alphSEXP, SEXP svd_w_tuSEXP, SEXP svd_w_tvSEXP, SEXP svd_w_dSEXP, SEXP CSEXP, SEXP CWSEXP, SEXP ggSEXP, SEXP my_printSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::vec >::type beta0(beta0SEXP);
-    Rcpp::traits::input_parameter< const arma::mat >::type theta0(theta0SEXP);
+    Rcpp::traits::input_parameter< arma::vec >::type beta0(beta0SEXP);
+    Rcpp::traits::input_parameter< arma::mat >::type theta0(theta0SEXP);
     Rcpp::traits::input_parameter< arma::mat >::type beta(betaSEXP);
     Rcpp::traits::input_parameter< arma::mat >::type beta_hat(beta_hatSEXP);
     Rcpp::traits::input_parameter< arma::cube >::type theta(thetaSEXP);
@@ -27,7 +27,7 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const arma::mat >::type Z(ZSEXP);
     Rcpp::traits::input_parameter< const int >::type max_it(max_itSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type W_hat(W_hatSEXP);
-    Rcpp::traits::input_parameter< const arma::mat >::type XtY(XtYSEXP);
+    Rcpp::traits::input_parameter< arma::mat >::type XtY(XtYSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type y(ySEXP);
     Rcpp::traits::input_parameter< const int >::type N(NSEXP);
     Rcpp::traits::input_parameter< const double >::type e_abs(e_absSEXP);
@@ -35,12 +35,14 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP);
     Rcpp::traits::input_parameter< const arma::vec >::type lambda(lambdaSEXP);
     Rcpp::traits::input_parameter< const double >::type alph(alphSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type svd_w(svd_wSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type tree(treeSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type invmat(invmatSEXP);
-    Rcpp::traits::input_parameter< const arma::vec >::type gg(ggSEXP);
+    Rcpp::traits::input_parameter< const arma::mat >::type svd_w_tu(svd_w_tuSEXP);
+    Rcpp::traits::input_parameter< const arma::mat >::type svd_w_tv(svd_w_tvSEXP);
+    Rcpp::traits::input_parameter< const arma::vec >::type svd_w_d(svd_w_dSEXP);
+    Rcpp::traits::input_parameter< const arma::sp_mat >::type C(CSEXP);
+    Rcpp::traits::input_parameter< const arma::vec >::type CW(CWSEXP);
+    Rcpp::traits::input_parameter< const arma::rowvec >::type gg(ggSEXP);
     Rcpp::traits::input_parameter< const bool >::type my_print(my_printSEXP);
-    rcpp_result_gen = Rcpp::wrap(admm_MADMMplasso_cpp(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w, tree, invmat, gg, my_print));
+    rcpp_result_gen = Rcpp::wrap(admm_MADMMplasso_cpp(beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, W_hat, XtY, y, N, e_abs, e_rel, alpha, lambda, alph, svd_w_tu, svd_w_tv, svd_w_d, C, CW, gg, my_print));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -77,9 +79,20 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// count_nonzero_a_mat
+int count_nonzero_a_mat(arma::mat x);
+RcppExport SEXP _MADMMplasso_count_nonzero_a_mat(SEXP xSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< arma::mat >::type x(xSEXP);
+    rcpp_result_gen = Rcpp::wrap(count_nonzero_a_mat(x));
+    return rcpp_result_gen;
+END_RCPP
+}
 // hh_nlambda_loop_cpp
-Rcpp::List hh_nlambda_loop_cpp(const arma::mat lam, const unsigned int nlambda, arma::vec beta0, arma::mat theta0, arma::mat beta, arma::mat beta_hat, arma::cube theta, const double rho1, const arma::mat X, const arma::mat Z, const int max_it, const arma::mat my_W_hat, const arma::mat XtY, const arma::mat y, const int N, const double e_abs, const double e_rel, const double alpha, const double alph, const Rcpp::List svd_w, const Rcpp::List tree, const bool my_print, const Rcpp::List invmat, const arma::mat gg, const double tol, const bool parallel, const bool pal, Rcpp::List BETA0, Rcpp::List THETA0, Rcpp::List BETA, Rcpp::List BETA_hat, Rcpp::List Y_HAT, Rcpp::List THETA, const unsigned int D, Rcpp::List my_values);
-RcppExport SEXP _MADMMplasso_hh_nlambda_loop_cpp(SEXP lamSEXP, SEXP nlambdaSEXP, SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP beta_hatSEXP, SEXP thetaSEXP, SEXP rho1SEXP, SEXP XSEXP, SEXP ZSEXP, SEXP max_itSEXP, SEXP my_W_hatSEXP, SEXP XtYSEXP, SEXP ySEXP, SEXP NSEXP, SEXP e_absSEXP, SEXP e_relSEXP, SEXP alphaSEXP, SEXP alphSEXP, SEXP svd_wSEXP, SEXP treeSEXP, SEXP my_printSEXP, SEXP invmatSEXP, SEXP ggSEXP, SEXP tolSEXP, SEXP parallelSEXP, SEXP palSEXP, SEXP BETA0SEXP, SEXP THETA0SEXP, SEXP BETASEXP, SEXP BETA_hatSEXP, SEXP Y_HATSEXP, SEXP THETASEXP, SEXP DSEXP, SEXP my_valuesSEXP) {
+Rcpp::List hh_nlambda_loop_cpp(const arma::mat lam, const unsigned int nlambda, arma::vec beta0, arma::mat theta0, arma::mat beta, arma::mat beta_hat, arma::cube theta, const double rho1, const arma::mat X, const arma::mat Z, const int max_it, const arma::mat my_W_hat, const arma::mat XtY, const arma::mat y, const int N, const double e_abs, const double e_rel, const double alpha, const double alph, const bool my_print, const arma::mat gg, const double tol, const bool parallel, const bool pal, arma::cube BETA0, arma::cube THETA0, arma::cube BETA, arma::cube BETA_hat, arma::cube Y_HAT, const unsigned int D, const arma::sp_mat C, const arma::vec CW, const arma::mat svd_w_tu, const arma::mat svd_w_tv, const arma::vec svd_w_d, Rcpp::List my_values);
+RcppExport SEXP _MADMMplasso_hh_nlambda_loop_cpp(SEXP lamSEXP, SEXP nlambdaSEXP, SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP beta_hatSEXP, SEXP thetaSEXP, SEXP rho1SEXP, SEXP XSEXP, SEXP ZSEXP, SEXP max_itSEXP, SEXP my_W_hatSEXP, SEXP XtYSEXP, SEXP ySEXP, SEXP NSEXP, SEXP e_absSEXP, SEXP e_relSEXP, SEXP alphaSEXP, SEXP alphSEXP, SEXP my_printSEXP, SEXP ggSEXP, SEXP tolSEXP, SEXP parallelSEXP, SEXP palSEXP, SEXP BETA0SEXP, SEXP THETA0SEXP, SEXP BETASEXP, SEXP BETA_hatSEXP, SEXP Y_HATSEXP, SEXP DSEXP, SEXP CSEXP, SEXP CWSEXP, SEXP svd_w_tuSEXP, SEXP svd_w_tvSEXP, SEXP svd_w_dSEXP, SEXP my_valuesSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
@@ -102,55 +115,51 @@ BEGIN_RCPP
     Rcpp::traits::input_parameter< const double >::type e_rel(e_relSEXP);
     Rcpp::traits::input_parameter< const double >::type alpha(alphaSEXP);
     Rcpp::traits::input_parameter< const double >::type alph(alphSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type svd_w(svd_wSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type tree(treeSEXP);
     Rcpp::traits::input_parameter< const bool >::type my_print(my_printSEXP);
-    Rcpp::traits::input_parameter< const Rcpp::List >::type invmat(invmatSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type gg(ggSEXP);
     Rcpp::traits::input_parameter< const double >::type tol(tolSEXP);
     Rcpp::traits::input_parameter< const bool >::type parallel(parallelSEXP);
     Rcpp::traits::input_parameter< const bool >::type pal(palSEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type BETA0(BETA0SEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type THETA0(THETA0SEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type BETA(BETASEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type BETA_hat(BETA_hatSEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type Y_HAT(Y_HATSEXP);
-    Rcpp::traits::input_parameter< Rcpp::List >::type THETA(THETASEXP);
+    Rcpp::traits::input_parameter< arma::cube >::type BETA0(BETA0SEXP);
+    Rcpp::traits::input_parameter< arma::cube >::type THETA0(THETA0SEXP);
+    Rcpp::traits::input_parameter< arma::cube >::type BETA(BETASEXP);
+    Rcpp::traits::input_parameter< arma::cube >::type BETA_hat(BETA_hatSEXP);
+    Rcpp::traits::input_parameter< arma::cube >::type Y_HAT(Y_HATSEXP);
     Rcpp::traits::input_parameter< const unsigned int >::type D(DSEXP);
+    Rcpp::traits::input_parameter< const arma::sp_mat >::type C(CSEXP);
+    Rcpp::traits::input_parameter< const arma::vec >::type CW(CWSEXP);
+    Rcpp::traits::input_parameter< const arma::mat >::type svd_w_tu(svd_w_tuSEXP);
+    Rcpp::traits::input_parameter< const arma::mat >::type svd_w_tv(svd_w_tvSEXP);
+    Rcpp::traits::input_parameter< const arma::vec >::type svd_w_d(svd_w_dSEXP);
     Rcpp::traits::input_parameter< Rcpp::List >::type my_values(my_valuesSEXP);
-    rcpp_result_gen = Rcpp::wrap(hh_nlambda_loop_cpp(lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, svd_w, tree, my_print, invmat, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, THETA, D, my_values));
+    rcpp_result_gen = Rcpp::wrap(hh_nlambda_loop_cpp(lam, nlambda, beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e_abs, e_rel, alpha, alph, my_print, gg, tol, parallel, pal, BETA0, THETA0, BETA, BETA_hat, Y_HAT, D, C, CW, svd_w_tu, svd_w_tv, svd_w_d, my_values));
     return rcpp_result_gen;
 END_RCPP
 }
 // model_intercept
-arma::mat model_intercept(const arma::vec beta0, const arma::mat theta0, const arma::mat beta, const arma::cube theta, const arma::mat X, const arma::mat Z);
-RcppExport SEXP _MADMMplasso_model_intercept(SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP thetaSEXP, SEXP XSEXP, SEXP ZSEXP) {
+arma::mat model_intercept(const arma::mat beta, const arma::mat X);
+RcppExport SEXP _MADMMplasso_model_intercept(SEXP betaSEXP, SEXP XSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
-    Rcpp::traits::input_parameter< const arma::vec >::type beta0(beta0SEXP);
-    Rcpp::traits::input_parameter< const arma::mat >::type theta0(theta0SEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type beta(betaSEXP);
-    Rcpp::traits::input_parameter< const arma::cube >::type theta(thetaSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type X(XSEXP);
-    Rcpp::traits::input_parameter< const arma::mat >::type Z(ZSEXP);
-    rcpp_result_gen = Rcpp::wrap(model_intercept(beta0, theta0, beta, theta, X, Z));
+    rcpp_result_gen = Rcpp::wrap(model_intercept(beta, X));
     return rcpp_result_gen;
 END_RCPP
 }
 // model_p
-arma::mat model_p(const arma::vec beta0, const arma::mat theta0, const arma::mat beta, const arma::cube theta, const arma::mat X, const arma::mat Z);
-RcppExport SEXP _MADMMplasso_model_p(SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP thetaSEXP, SEXP XSEXP, SEXP ZSEXP) {
+arma::mat model_p(const arma::vec beta0, const arma::mat theta0, const arma::mat beta, const arma::mat X, const arma::mat Z);
+RcppExport SEXP _MADMMplasso_model_p(SEXP beta0SEXP, SEXP theta0SEXP, SEXP betaSEXP, SEXP XSEXP, SEXP ZSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const arma::vec >::type beta0(beta0SEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type theta0(theta0SEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type beta(betaSEXP);
-    Rcpp::traits::input_parameter< const arma::cube >::type theta(thetaSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type X(XSEXP);
     Rcpp::traits::input_parameter< const arma::mat >::type Z(ZSEXP);
-    rcpp_result_gen = Rcpp::wrap(model_p(beta0, theta0, beta, theta, X, Z));
+    rcpp_result_gen = Rcpp::wrap(model_p(beta0, theta0, beta, X, Z));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -192,7 +201,7 @@ BEGIN_RCPP
 END_RCPP
 }
 // reg
-Rcpp::List reg(const arma::mat r, const arma::mat Z);
+arma::mat reg(const arma::mat r, const arma::mat Z);
 RcppExport SEXP _MADMMplasso_reg(SEXP rSEXP, SEXP ZSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
@@ -228,13 +237,14 @@ END_RCPP
 }
 
 static const R_CallMethodDef CallEntries[] = {
-    {"_MADMMplasso_admm_MADMMplasso_cpp", (DL_FUNC) &_MADMMplasso_admm_MADMMplasso_cpp, 23},
+    {"_MADMMplasso_admm_MADMMplasso_cpp", (DL_FUNC) &_MADMMplasso_admm_MADMMplasso_cpp, 25},
     {"_MADMMplasso_count_nonzero_a_cpp", (DL_FUNC) &_MADMMplasso_count_nonzero_a_cpp, 1},
     {"_MADMMplasso_count_nonzero_a_sp_mat", (DL_FUNC) &_MADMMplasso_count_nonzero_a_sp_mat, 1},
     {"_MADMMplasso_count_nonzero_a_cube", (DL_FUNC) &_MADMMplasso_count_nonzero_a_cube, 1},
-    {"_MADMMplasso_hh_nlambda_loop_cpp", (DL_FUNC) &_MADMMplasso_hh_nlambda_loop_cpp, 35},
-    {"_MADMMplasso_model_intercept", (DL_FUNC) &_MADMMplasso_model_intercept, 6},
-    {"_MADMMplasso_model_p", (DL_FUNC) &_MADMMplasso_model_p, 6},
+    {"_MADMMplasso_count_nonzero_a_mat", (DL_FUNC) &_MADMMplasso_count_nonzero_a_mat, 1},
+    {"_MADMMplasso_hh_nlambda_loop_cpp", (DL_FUNC) &_MADMMplasso_hh_nlambda_loop_cpp, 36},
+    {"_MADMMplasso_model_intercept", (DL_FUNC) &_MADMMplasso_model_intercept, 2},
+    {"_MADMMplasso_model_p", (DL_FUNC) &_MADMMplasso_model_p, 5},
     {"_MADMMplasso_modulo", (DL_FUNC) &_MADMMplasso_modulo, 2},
     {"_MADMMplasso_multiples_of", (DL_FUNC) &_MADMMplasso_multiples_of, 3},
     {"_MADMMplasso_lm_arma", (DL_FUNC) &_MADMMplasso_lm_arma, 2},
diff --git a/src/admm.MADMMplasso.cpp b/src/admm.MADMMplasso.cpp
deleted file mode 100644
index e69de29..0000000
diff --git a/src/admm_MADMMplasso.cpp b/src/admm_MADMMplasso.cpp
index 23d2533..7d752f9 100644
--- a/src/admm_MADMMplasso.cpp
+++ b/src/admm_MADMMplasso.cpp
@@ -24,20 +24,22 @@
 //' @param alpha mixing parameter, usually obtained from the MADMMplasso call. When the goal is to include more interactions, alpha should be very small and vice versa.
 //' @param lambda a vector  lambda_3 values for the admm call with length ncol(y). This is usually calculated in the MADMMplasso call.   In our current setting, we use the same the lambda_3 value for all responses.
 //' @param alph an overrelaxation parameter in \[1, 1.8\], usually obtained from the MADMMplasso call.
-//' @param svd_w singular value decomposition of W
-//' @param tree The results from the hierarchical clustering of the response matrix.
+//' @param svd_w_tu the transpose of the U matrix from the SVD of W_hat
+//' @param svd_w_tv the transpose of the V matrix from the SVD of W_hat
+//' @param svd_w_d the D matrix from the SVD of W_hat
+//' @param C the trained tree
+//' @param CW weights for the trained tree
 //' The easy way to obtain this is by using the function (tree_parms) which gives a default clustering.
 //' However, user decide on a specific structure and then input a tree that follows such structure.
 //' @param my_print Should information form each ADMM iteration be printed along the way? Default TRUE. This prints  the dual and primal residuals
-//' @param invmat A list of length ncol(y), each containing the C_d part of equation 32 in the paper
 //' @param gg penalty terms for the tree structure for lambda_1 and  lambda_2 for the admm call.
 //' @return  predicted values for the ADMM part
-//' @description TODO: add description
+//' @description This function fits a multi-response pliable lasso model over a path of regularization values.
 //' @export
 // [[Rcpp::export]]
-Rcpp::List admm_MADMMplasso_cpp(
-  const arma::vec beta0,
-  const arma::mat theta0,
+arma::field<arma::cube> admm_MADMMplasso_cpp(
+  arma::vec beta0,
+  arma::mat theta0,
   arma::mat beta,
   arma::mat beta_hat,
   arma::cube theta,
@@ -46,7 +48,7 @@ Rcpp::List admm_MADMMplasso_cpp(
   const arma::mat Z,
   const int max_it,
   const arma::mat W_hat,
-  const arma::mat XtY,
+  arma::mat XtY,
   const arma::mat y,
   const int N,
   const double e_abs,
@@ -54,17 +56,14 @@ Rcpp::List admm_MADMMplasso_cpp(
   const double alpha,
   const arma::vec lambda,
   const double alph,
-  const Rcpp::List svd_w,
-  const Rcpp::List tree,
-  const Rcpp::List invmat,
-  const arma::vec gg,
+  const arma::mat svd_w_tu,
+  const arma::mat svd_w_tv,
+  const arma::vec svd_w_d,
+  const arma::sp_mat C,
+  const arma::vec CW,
+  const arma::rowvec gg,
   const bool my_print = true
 ) {
-  const Rcpp::List TT = tree;
-  const arma::sp_mat C = TT["Tree"];
-  const arma::vec CW = TT["Tw"];
-  const arma::mat svd_w_tu = Rcpp::as<arma::mat>(svd_w["u"]).t();
-  const arma::mat svd_w_tv = Rcpp::as<arma::mat>(svd_w["v"]).t();
   const int D = y.n_cols;
   const int p = X.n_cols;
   const unsigned int K = Z.n_cols;
@@ -79,7 +78,6 @@ Rcpp::List admm_MADMMplasso_cpp(
   arma::mat H(y.n_cols * C.n_rows, p + p * K);
   arma::cube HH(p, 1 + K, D, arma::fill::zeros);
 
-
   // for response groups =======================================================
   const arma::ivec input = Rcpp::seq_len(D * C.n_rows);
   arma::mat I = arma::zeros<arma::mat>(C.n_rows * D, D);
@@ -111,8 +109,8 @@ Rcpp::List admm_MADMMplasso_cpp(
   arma::cube EE_old = EE;
   double res_pri = 0.;
   double res_dual = 0.;
-  const arma::mat SVD_D = arma::diagmat(Rcpp::as<arma::vec>(svd_w["d"]));
-  const arma::mat R_svd = (svd_w_tu.t() * SVD_D) / N;
+  const arma::mat SVD_D = arma::diagmat(svd_w_d);
+  const arma::mat R_svd_inv = arma::inv((svd_w_tu.t() * SVD_D) / N);
   double rho = rho1;
   arma::cube Big_beta11 = V;
   arma::mat res_val;  // declared here because it's also needed outside the loop
@@ -120,58 +118,54 @@ Rcpp::List admm_MADMMplasso_cpp(
   if (my_print) {
     Rcpp::Rcout << "\ni\tres_dual\te_dual\t\tres_pri\t\te_primal" << std::endl;
   }
+  arma::mat r_current = y;
+  arma::vec v_diff1(D, arma::fill::zeros);
+  arma::vec q_diff1(D, arma::fill::zeros);
+  arma::vec ee_diff1(D, arma::fill::zeros);
+  arma::vec new_G(p + p * K, arma::fill::zeros);
+  arma::mat new_group(p, K + 1);
+  arma::mat invmat(new_G.n_rows, D);  // denominator of the beta estimates
+  arma::mat b;
+  const arma::mat W_hat_t = W_hat.t();
+  arma::vec DD3_diag(W_hat_t.n_rows);
+  arma::mat part_z(W_hat_t.n_rows, W_hat_t.n_cols);
+  arma::vec part_y(W_hat_t.n_rows);
+  arma::vec my_beta_jj(W_hat_t.n_rows);
+  arma::mat beta_hat1(p, 1 + K);
+  arma::mat b_hat(p, 1 + K);
   for (int i = 1; i < max_it + 1; i++) {
-    arma::mat shared_model = model_intercept(beta0, theta0, beta_hat, theta, W_hat, Z);
-    arma::mat r_current = y - shared_model;
-    Rcpp::List b = reg(r_current, Z);
-    arma::mat beta0 = b["beta0"];
-    arma::mat theta0 = b["theta0"];
-    arma::mat new_y = y - (arma::ones(N) * beta0 + Z * theta0);
-    arma::mat XtY = W_hat.t() * new_y;
-    arma::cube main_beta(p, K + 1, D, arma::fill::zeros);
+    r_current = y - model_intercept(beta_hat, W_hat);
+    b = reg(r_current, Z);
+    beta0 = b.row(0).t();
+    theta0 = b.tail_rows(b.n_rows - 1);
+    XtY = W_hat.t() * (y - (arma::ones(N) * beta0.t() + Z * theta0));
     res_val = rho * (I.t() * E - (I.t() * H));
-    arma::vec v_diff1(D, arma::fill::zeros);
-    arma::vec q_diff1(D, arma::fill::zeros);
-    arma::vec ee_diff1(D, arma::fill::zeros);
-
-    arma::vec new_G(p + p * K, arma::fill::zeros);
     new_G.rows(0, p - 1).fill(1);
     new_G.rows(p, p + p * K - 1).fill(2);
     new_G = rho * (1 + new_G);
-    arma::cube invmat(new_G.n_rows, 1, D);  // denominator of the beta estimates
     for (arma::uword slc = 0; slc < D; slc++) {
-      invmat.slice(slc) = new_G + rho * (new_I(slc) + 1);
-    }
-
-    for (int rr = 0; rr < D; rr++) {
-      double DD1 = rho * (new_I(rr) + 1);
-      arma::vec DD2 = new_G + DD1;
-      invmat.slice(rr) = DD2;  // Matrix::chol2inv( Matrix::chol(new_sparse) )
+      invmat.col(slc) = new_G + rho * (new_I(slc) + 1);
     }
-
+    arma::mat DD3 = 1 / invmat;
     for (int jj = 0; jj < D; jj++) {
       arma::mat group = rho * (G.t() * V.slice(jj).t() - G.t() * O.slice(jj).t());
-      arma::vec group1 = group.row(0).t();
-      arma::mat group2 = group.tail_rows(group.n_rows - 1).t();
-      arma::mat new_group(p, K + 1, arma::fill::zeros);
-      new_group.col(0) = group1;
-      new_group.tail_cols(new_group.n_cols - 1) = group2;
-      arma::vec my_beta_jj = XtY.col(jj) / N +\
+      new_group *= 0;
+      new_group.col(0) = group.row(0).t();
+      new_group.tail_cols(new_group.n_cols - 1) = group.tail_rows(group.n_rows - 1).t();
+      my_beta_jj = XtY.col(jj) / N +\
         arma::vectorise(new_group) + res_val.row(jj).t() +\
         arma::vectorise(rho * (Q.slice(jj) - P.slice(jj))) +\
         arma::vectorise(rho * (EE.slice(jj) - HH.slice(jj)));
-      arma::mat DD3 = arma::diagmat(1 / invmat.slice(jj));
-      arma::mat part_z = DD3 * W_hat.t();
-      arma::mat part_y = DD3 * my_beta_jj;
-
-      arma::mat beta_hat_j = arma::inv(arma::inv(R_svd) + svd_w_tv * part_z);
-      beta_hat_j = beta_hat_j * (svd_w_tv * part_y);
-      beta_hat_j = part_z * beta_hat_j;
-
-      arma::vec beta_hat_JJ = arma::vectorise(part_y - beta_hat_j);
-      beta_hat.col(jj) = beta_hat_JJ;
-      arma::mat beta_hat1 = arma::reshape(beta_hat_JJ, p, 1 + K);
-      arma::mat b_hat = alph * beta_hat1 + (1 - alph) * Q.slice(jj);
+
+      for (arma::uword j = 0; j < W_hat_t.n_cols; ++j) {
+          part_z.col(j) = DD3.col(jj) % W_hat_t.col(j);
+      }
+      part_y = DD3.col(jj) % my_beta_jj;
+
+      part_y -= part_z * arma::solve(R_svd_inv + svd_w_tv * part_z, svd_w_tv * part_y, arma::solve_opts::fast);
+      beta_hat.col(jj) = part_y;
+      beta_hat1 = arma::reshape(part_y, p, 1 + K);
+      b_hat = alph * beta_hat1 + (1 - alph) * Q.slice(jj);
       Q.slice(jj).col(0) = b_hat.col(0) + P.slice(jj).col(0);
       arma::mat new_mat = b_hat.tail_cols(b_hat.n_cols - 1) + P.slice(jj).tail_cols(P.slice(jj).n_cols - 1);
       Q.slice(jj).tail_cols(Q.n_cols - 1) = arma::sign(new_mat) % arma::max(arma::abs(new_mat) - ((alpha * lambda(jj)) / rho), arma::zeros(arma::size(new_mat)));
@@ -207,7 +201,6 @@ Rcpp::List admm_MADMMplasso_cpp(
       q_diff1(jj) = arma::accu(arma::pow(beta_hat1 - Q.slice(jj), 2));
       ee_diff1(jj) = arma::accu(arma::pow(beta_hat1 - EE.slice(jj), 2));
     }
-
     arma::mat Big_beta_response = I * beta_hat.t();
     arma::mat b_hat_response = alph * Big_beta_response + (1 - alph) * E;
     arma::mat new_mat = b_hat_response + H;
@@ -283,87 +276,86 @@ Rcpp::List admm_MADMMplasso_cpp(
       e = II(c_count);
     }
 
-      E.rows(0, C.n_cols - 1) = N_E.slice(0);
-      e = II(0);
+    E.rows(0, C.n_cols - 1) = N_E.slice(0);
+    e = II(0);
 
-      for (arma::uword c_count = 1; c_count < C.n_rows; c_count++) {
-        E.rows(e, ((c_count + 1) * y.n_cols) - 1) = N_E.slice(c_count);
-        e = II(c_count);
-      }
+    for (arma::uword c_count = 1; c_count < C.n_rows; c_count++) {
+      E.rows(e, ((c_count + 1) * y.n_cols) - 1) = N_E.slice(c_count);
+      e = II(c_count);
+    }
 
-      H += Big_beta_response - E;
-
-      double v_diff = arma::accu(arma::pow(-rho * (V - V_old), 2));
-      double q_diff = arma::accu(arma::pow(-rho * (Q - Q_old), 2));
-      double e_diff = arma::accu(arma::pow(-rho * (E - E_old), 2));
-      double ee_diff = arma::accu(arma::pow(-rho * (EE - EE_old), 2));
-      double s = sqrt(v_diff + q_diff + e_diff + ee_diff);
-
-      double v_diff1_sum = arma::accu(v_diff1);
-      double q_diff1_sum = arma::accu(q_diff1);
-      double e_diff1 = arma::accu(arma::pow(Big_beta_response - E, 2));
-      double ee_diff1_sum = arma::accu(ee_diff1);
-      double r = sqrt(v_diff1_sum + q_diff1_sum + e_diff1 + ee_diff1_sum);
-
-      res_dual = s;
-      res_pri = r;
-
-      double part_1 = sqrt(Big_beta11.n_elem + 2 * beta_hat.n_elem + Big_beta_response.n_elem);
-      arma::vec Big_beta11_vec = arma::vectorise(Big_beta11);
-      arma::vec beta_hat_vec = arma::vectorise(beta_hat);
-      arma::vec Big_beta_response_vec = arma::vectorise(Big_beta_response);
-      arma::vec part_2 = arma::join_vert(Big_beta11_vec, beta_hat_vec, beta_hat_vec, Big_beta_response_vec);
-      double part_2_norm = arma::norm(part_2);
-
-      arma::vec V_vec = arma::vectorise(V);
-      arma::vec Q_vec = arma::vectorise(Q);
-      arma::vec E_vec = arma::vectorise(E);
-      arma::vec EE_vec = arma::vectorise(EE);
-      arma::vec part_3 = -1 * arma::join_vert(V_vec, Q_vec, E_vec, EE_vec);
-      double part_3_norm = arma::norm(part_3);
-
-      double part_2_3 = std::max(part_2_norm, part_3_norm);
-
-      double e_primal = part_1 * e_abs + e_rel * part_2_3;
-
-      arma::vec O_vec = arma::vectorise(O);
-      arma::vec P_vec = arma::vectorise(P);
-      arma::vec H_vec = arma::vectorise(H);
-      arma::vec HH_vec = arma::vectorise(HH);
-      arma::vec part_4 = arma::join_vert(O_vec, P_vec, H_vec, HH_vec);
-      double e_dual = part_1 * e_abs + e_rel * arma::max(part_4);
-
-      V_old = V,
-      Q_old = Q,
-      E_old = E,
-      EE_old = EE;
-
-      if (res_pri > 10 * res_dual) {
-        rho *= 2;
-      } else if (res_pri * 10 < res_dual ) {
-        rho /= 2;
-      }
+    H += Big_beta_response - E;
+
+    double v_diff = arma::accu(arma::pow(-rho * (V - V_old), 2));
+    double q_diff = arma::accu(arma::pow(-rho * (Q - Q_old), 2));
+    double e_diff = arma::accu(arma::pow(-rho * (E - E_old), 2));
+    double ee_diff = arma::accu(arma::pow(-rho * (EE - EE_old), 2));
+    double s = sqrt(v_diff + q_diff + e_diff + ee_diff);
+
+    double v_diff1_sum = arma::accu(v_diff1);
+    double q_diff1_sum = arma::accu(q_diff1);
+    double e_diff1 = arma::accu(arma::pow(Big_beta_response - E, 2));
+    double ee_diff1_sum = arma::accu(ee_diff1);
+    double r = sqrt(v_diff1_sum + q_diff1_sum + e_diff1 + ee_diff1_sum);
+
+    res_dual = s;
+    res_pri = r;
+
+    double part_1 = sqrt(Big_beta11.n_elem + 2 * beta_hat.n_elem + Big_beta_response.n_elem);
+    arma::vec Big_beta11_vec = arma::vectorise(Big_beta11);
+    arma::vec beta_hat_vec = arma::vectorise(beta_hat);
+    arma::vec Big_beta_response_vec = arma::vectorise(Big_beta_response);
+    arma::vec part_2 = arma::join_vert(Big_beta11_vec, beta_hat_vec, beta_hat_vec, Big_beta_response_vec);
+    double part_2_norm = arma::norm(part_2);
+
+    arma::vec V_vec = arma::vectorise(V);
+    arma::vec Q_vec = arma::vectorise(Q);
+    arma::vec E_vec = arma::vectorise(E);
+    arma::vec EE_vec = arma::vectorise(EE);
+    arma::vec part_3 = -1 * arma::join_vert(V_vec, Q_vec, E_vec, EE_vec);
+    double part_3_norm = arma::norm(part_3);
+
+    double part_2_3 = std::max(part_2_norm, part_3_norm);
+
+    double e_primal = part_1 * e_abs + e_rel * part_2_3;
+
+    arma::vec O_vec = arma::vectorise(O);
+    arma::vec P_vec = arma::vectorise(P);
+    arma::vec H_vec = arma::vectorise(H);
+    arma::vec HH_vec = arma::vectorise(HH);
+    arma::vec part_4 = arma::join_vert(O_vec, P_vec, H_vec, HH_vec);
+    double e_dual = part_1 * e_abs + e_rel * arma::max(part_4);
+
+    V_old = V,
+    Q_old = Q,
+    E_old = E,
+    EE_old = EE;
+
+    if (res_pri > 10 * res_dual) {
+      rho *= 2;
+    } else if (res_pri * 10 < res_dual ) {
+      rho /= 2;
+    }
 
-      if (my_print) {
-        Rprintf("%u\t%e\t%e\t%e\t%e\n", i, res_dual, e_dual, res_pri, e_primal);
-      }
+    if (my_print) {
+      Rprintf("%u\t%e\t%e\t%e\t%e\n", i, res_dual, e_dual, res_pri, e_primal);
+    }
 
-      if (res_pri <= e_primal && res_dual <= e_dual) {
+    if (res_pri <= e_primal && res_dual <= e_dual) {
+      if (my_print) {
         Rprintf("Convergence reached after %u iterations\n", i);
-        converge = true;
-        break;
       }
+      converge = true;
+      break;
+    }
   }
 
   res_val = I.t() * E;
-
   for (arma::uword jj = 0; jj < y.n_cols; jj++) {
     arma::mat group = G.t() * V.slice(jj).t();
-    arma::vec group1 = group.row(0).t();
-    arma::mat group2 = group.tail_rows(group.n_rows - 1).t();
     arma::mat new_group = arma::zeros<arma::mat>(p, K + 1);
-    new_group.col(0) = group1;
-    new_group.tail_cols(new_group.n_cols - 1) = group2;
+    new_group.col(0) = group.row(0).t();
+    new_group.tail_cols(new_group.n_cols - 1) = group.tail_rows(group.n_rows - 1).t();
     arma::vec new_g_theta = arma::vectorise(new_group);
 
     arma::mat beta_hat_B1 = beta_hat.submat(0, jj, p - 1, jj);
@@ -381,18 +373,29 @@ Rcpp::List admm_MADMMplasso_cpp(
     theta.slice(jj) = beta_hat1.tail_cols(beta_hat1.n_cols - 1);
     beta_hat.col(jj) = arma::join_vert(beta_hat1.col(0), arma::vectorise(theta.slice(jj)));
   }
+  arma::mat y_hat = model_p(beta0, theta0, beta_hat, W_hat, Z);
+
+  // Return important values
+  arma::field<arma::cube> out(7);
+  out(0) = arma::cube(beta0.n_elem, 1, 1);
+  out(0).slice(0) = beta0;
+
+  out(1) = arma::cube(theta0.n_rows, theta0.n_cols, 1);
+  out(1).slice(0) = theta0;
+
+  out(2) = arma::cube(beta.n_rows, beta.n_cols, 1);
+  out(2).slice(0) = beta;
+
+  out(3) = theta;
+
+  out(4) = arma::cube(1, 1, 1);
+  out(4).slice(0) = converge;
+
+  out(5) = arma::cube(beta_hat.n_rows, beta_hat.n_cols, 1);
+  out(5).slice(0) = beta_hat;
+
+  out(6) = arma::cube(y_hat.n_rows, y_hat.n_cols, 1);
+  out(6).slice(0) = y_hat;
 
-  arma::mat y_hat = model_p(beta0, theta0, beta_hat, theta, W_hat, Z);
-
-  Rcpp::List out = Rcpp::List::create(
-    Rcpp::Named("beta0") = beta0,
-    Rcpp::Named("theta0") = theta0,
-    Rcpp::Named("beta") = beta,
-    Rcpp::Named("theta") = theta,
-    Rcpp::Named("converge") = converge,
-    Rcpp::Named("obj") = NULL,
-    Rcpp::Named("beta_hat") = beta_hat,
-    Rcpp::Named("y_hat") = y_hat
-  );
   return out;
 }
diff --git a/src/count_nonzero_a.cpp b/src/count_nonzero_a.cpp
index 3f8d0ad..ac49c0a 100644
--- a/src/count_nonzero_a.cpp
+++ b/src/count_nonzero_a.cpp
@@ -46,3 +46,12 @@ int count_nonzero_a_cube(arma::cube x) {
   }
   return arma::max(count1);
 }
+
+// [[Rcpp::export]]
+int count_nonzero_a_mat(arma::mat x) {
+  arma::vec count1(x.n_cols, arma::fill::zeros);
+  for (unsigned int ww = 0; ww < x.n_cols; ++ww) {
+    count1(ww) = arma::accu(x.col(ww) != 0);
+  }
+  return arma::max(count1);
+}
diff --git a/src/hh_nlambda_loop_cpp.cpp b/src/hh_nlambda_loop_cpp.cpp
index d827708..7c734b8 100644
--- a/src/hh_nlambda_loop_cpp.cpp
+++ b/src/hh_nlambda_loop_cpp.cpp
@@ -23,63 +23,70 @@ Rcpp::List hh_nlambda_loop_cpp(
   const double e_rel,
   const double alpha,
   const double alph,
-  const Rcpp::List svd_w,
-  const Rcpp::List tree,
   const bool my_print,
-  const Rcpp::List invmat,
   const arma::mat gg,
   const double tol,
   const bool parallel,
   const bool pal,
-  Rcpp::List BETA0,
-  Rcpp::List THETA0,
-  Rcpp::List BETA,
-  Rcpp::List BETA_hat,
-  Rcpp::List Y_HAT,
-  Rcpp::List THETA,
+  arma::cube BETA0,
+  arma::cube THETA0,
+  arma::cube BETA,
+  arma::cube BETA_hat,
+  arma::cube Y_HAT,
   const unsigned int D,
+  const arma::sp_mat C,
+  const arma::vec CW,
+  const arma::mat svd_w_tu,
+  const arma::mat svd_w_tv,
+  const arma::vec svd_w_d,
   Rcpp::List my_values
 ) {
   arma::vec obj;
   arma::vec non_zero_theta;
-  Rcpp::List my_obj;
   arma::vec n_main_terms;
   arma::vec lam_list;
   arma::mat y_hat = y;
   unsigned int hh = 0;
-
+  arma::field<arma::cube> THETA(nlambda);
   while (hh <= nlambda - 1) {
     arma::vec lambda = lam.row(hh).t();
 
-    Rcpp::List my_values_hh;
-    if (parallel) { // TODO: recheck all conditions (all parallel-pal combinations)
-      // my_values is already a list of length hh
-      my_values_hh = my_values[hh];
-    } else if (pal) {
+    if (pal) {
       // In this case, my_values is an empty list to be created now
-      my_values_hh = admm_MADMMplasso_cpp(
+      arma::field<arma::cube> my_values_hh = admm_MADMMplasso_cpp(
         beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY,
-        y, N, e_abs, e_rel, alpha, lambda, alph, svd_w, tree, invmat,
-        gg.row(hh).t(), my_print
+        y, N, e_abs, e_rel, alpha, lambda, alph, svd_w_tu, svd_w_tv, svd_w_d, C, CW,
+        gg.row(hh), my_print
       );
+      beta0 = my_values_hh(0).slice(0);
+      theta0 = my_values_hh(1).slice(0);
+      beta = my_values_hh(2).slice(0);
+      theta = my_values_hh(3);
+      beta_hat = my_values_hh(5).slice(0);
+      y_hat = my_values_hh(6).slice(0);
+    } else {
+      // Gets triggered regardless of parallel. Whatever the case,
+      // my_values is already a list of length hh
+      arma::field<arma::cube> my_values_hh = my_values[hh];
+      beta0 = my_values_hh(0).slice(0);
+      theta0 = my_values_hh(1).slice(0);
+      beta = my_values_hh(2).slice(0);
+      theta = my_values_hh(3);
+      beta_hat = my_values_hh(5).slice(0);
+      y_hat = my_values_hh(6).slice(0);
     }
 
-    beta = Rcpp::as<arma::mat>(my_values_hh["beta"]);
-    theta = Rcpp::as<arma::cube>(my_values_hh["theta"]);
-    beta0 = Rcpp::as<arma::vec>(my_values_hh["beta0"]);
-    theta0 = Rcpp::as<arma::mat>(my_values_hh["theta0"]);
-    beta_hat = Rcpp::as<arma::mat>(my_values_hh["beta_hat"]);
-    y_hat = Rcpp::as<arma::mat>(my_values_hh["y_hat"]);
-
-    arma::sp_mat beta1(beta % (abs(beta) > tol));
-    arma::cube theta1(theta % (abs(theta) > tol)); // should be sparse, but Arma doesn't have sp_cube
-    arma::sp_mat beta_hat1(beta_hat % (abs(beta_hat) > tol));
+    // should be sparse, but Arma doesn't have sp_cube; beta1 and beta_hat1
+    // are going into a cube, so they need to be dense as well
+    arma::mat beta1(beta % (abs(beta) > tol));
+    arma::cube theta1(theta % (abs(theta) > tol));
+    arma::mat beta_hat1(beta_hat % (abs(beta_hat) > tol));
 
     // TODO: messy! Simplify!
     arma::vec n_interaction_terms(1);
     n_interaction_terms = count_nonzero_a_cube(theta1);
     arma::vec n_beta_terms(1);
-    n_beta_terms = count_nonzero_a_sp_mat(beta1);
+    n_beta_terms = count_nonzero_a_mat(beta1);
     n_main_terms = arma::join_vert(n_main_terms, n_beta_terms);
 
     double obj1 = arma::accu(arma::pow(y - y_hat, 2)) / (D * N);
@@ -88,12 +95,12 @@ Rcpp::List hh_nlambda_loop_cpp(
     non_zero_theta = arma::join_vert(non_zero_theta, n_interaction_terms);
     lam_list = arma::join_vert(lam_list, lambda);
 
-    BETA0[hh] = beta0;
-    THETA0[hh] = theta0;
-    BETA[hh] = arma::conv_to<arma::sp_mat>::from(beta1);
-    BETA_hat[hh] = arma::conv_to<arma::sp_mat>::from(beta_hat1);
-    Y_HAT[hh] = y_hat;
-    THETA[hh] = theta1;
+    BETA0.slice(hh) = beta0;
+    THETA0.slice(hh) = theta0;
+    BETA.slice(hh) = beta1;
+    BETA_hat.slice(hh) = beta_hat1;
+    Y_HAT.slice(hh) = y_hat;
+    THETA(hh) = theta1;
 
     if (my_print) {
       if (hh == 0) {
diff --git a/src/misc.h b/src/misc.h
index e07d559..e2c096a 100644
--- a/src/misc.h
+++ b/src/misc.h
@@ -3,23 +3,16 @@ arma::ivec multiples_of(arma::ivec, int, bool = false);
 arma::mat scale_cpp(arma::mat, arma::vec);
 arma::vec sqrt_sum_squared_rows(arma::mat);
 arma::uvec modulo(arma::uvec, int);
-arma::mat model_intercept(
-  const arma::vec,
-  const arma::mat,
-  const arma::mat,
-  const arma::cube,
-  const arma::mat,
-  const arma::mat
-);
+arma::mat model_intercept(const arma::mat beta, const arma::mat X);
 arma::mat model_p(
-  const arma::vec,
-  const arma::mat,
-  const arma::mat,
-  const arma::cube,
-  const arma::mat,
-  const arma::mat
+  const arma::vec beta0,
+  const arma::mat theta0,
+  const arma::mat beta,
+  const arma::mat X,
+  const arma::mat Z
 );
-Rcpp::List reg(const arma::mat, const arma::mat);
+arma::mat reg(const arma::mat, const arma::mat);
 int count_nonzero_a_cpp(SEXP);
 int count_nonzero_a_sp_mat(arma::sp_mat);
 int count_nonzero_a_cube(arma::cube);
+int count_nonzero_a_mat(arma::mat);
diff --git a/src/model_intercept.cpp b/src/model_intercept.cpp
index af92586..60b9325 100644
--- a/src/model_intercept.cpp
+++ b/src/model_intercept.cpp
@@ -1,14 +1,7 @@
 #include <RcppArmadillo.h>
 // [[Rcpp::depends(RcppArmadillo)]]
 // [[Rcpp::export]]
-arma::mat model_intercept(
-  const arma::vec beta0,
-  const arma::mat theta0,
-  const arma::mat beta,
-  const arma::cube theta,
-  const arma::mat X,
-  const arma::mat Z
-) {
+arma::mat model_intercept(const arma::mat beta, const arma::mat X) {
   // The pliable lasso model described in the paper
   // y ~ f(X)
   // formulated as
diff --git a/src/model_p.cpp b/src/model_p.cpp
index be284e3..858839f 100644
--- a/src/model_p.cpp
+++ b/src/model_p.cpp
@@ -5,7 +5,6 @@ arma::mat model_p(
   const arma::vec beta0,
   const arma::mat theta0,
   const arma::mat beta,
-  const arma::cube theta,
   const arma::mat X,
   const arma::mat Z
 ){
diff --git a/src/reg.cpp b/src/reg.cpp
index 27efc6c..ced67b6 100644
--- a/src/reg.cpp
+++ b/src/reg.cpp
@@ -9,14 +9,18 @@ arma::vec lm_arma(const arma::vec &R, const arma::mat &Z) {
     // Solve the system of linear equations
     arma::vec coefficients = arma::solve(Z_intercept, R);
 
+    // Replace 0 with NA (arma::datum::nan)
+    for (arma::uword i = 0; i < coefficients.n_elem; i++) {
+        if (coefficients(i) == 0) {
+            coefficients(i) = arma::datum::nan;
+        }
+    }
+
     return coefficients;
 }
 
 // [[Rcpp::export]]
-Rcpp::List reg(
-  const arma::mat r,
-  const arma::mat Z
-){
+arma::mat reg(const arma::mat r, const arma::mat Z) {
 
   arma::rowvec beta01(r.n_cols, arma::fill::zeros);
   arma::mat theta01(Z.n_cols, r.n_cols, arma::fill::zeros);
@@ -27,9 +31,6 @@ Rcpp::List reg(
     theta01.col(e) = new1.tail(new1.n_elem - 1);
   }
 
-  Rcpp::List out = Rcpp::List::create(
-    Rcpp::Named("beta0") = beta01,
-    Rcpp::Named("theta0") = theta01
-  );
+  arma::mat out = arma::join_vert(beta01, theta01);
   return out;
 }
diff --git a/tests/testthat/test-MADMMplasso.R b/tests/testthat/test-MADMMplasso.R
index 185fadd..33df6ba 100644
--- a/tests/testthat/test-MADMMplasso.R
+++ b/tests/testthat/test-MADMMplasso.R
@@ -68,7 +68,6 @@ y <- y_train
 
 colnames(y) <- c(paste0("y", seq_len(ncol(y))))
 TT <- tree_parms(y)
-plot(TT$h_clust)
 gg1 <- matrix(0, 2, 2)
 gg1[1, ] <- c(0.02, 0.02)
 gg1[2, ] <- c(0.02, 0.02)
@@ -104,17 +103,21 @@ fit_R <- suppressWarnings(
 test_that("C++ and R versions basically output the same thing", {
   expect_named(fit_C$beta, names(fit_R$beta))
   tl <- 1e1
-  expect_equal(fit_C$beta0[[1]], t(fit_R$beta0[[1]]), tolerance = tl)
+  expect_equal(fit_C$beta0[[1]], as.matrix(fit_R$beta0[[1]]), tolerance = tl)
   expect_equal(as.vector(fit_C$beta[[1]]), as.vector(fit_R$beta[[1]]), tolerance = tl)
   expect_equal(as.vector(fit_C$BETA_hat[[1]]), as.vector(fit_R$BETA_hat[[1]]), tolerance = tl)
-  expect_equal(fit_C$theta0, fit_R$theta0, tolerance = tl)
+  expect_equal(fit_C$theta0[[1]], fit_R$theta0[[1]], tolerance = tl)
   for (i in 1:6) {
-    expect_equal(as.vector(fit_C$theta[[1]][, , i]), as.vector(fit_R$theta[[1]][, , i]), tolerance = tl)
+    expect_equal(
+      as.vector(fit_C$theta[[1]][, , i]),
+      as.vector(fit_R$theta[[1]][, , i]),
+      tolerance = tl
+    )
   }
   expect_equal(fit_C$path, fit_R$path, tolerance = tl)
   expect_identical(fit_C$Lambdas, fit_R$Lambdas)
   expect_identical(fit_C$non_zero[1], fit_R$non_zero)
   expect_identical(fit_C$LOSS[1], fit_R$LOSS)
-  expect_equal(fit_C$Y_HAT, fit_R$Y_HAT, tolerance = tl)
+  expect_equal(fit_C$Y_HAT[[1]], fit_R$Y_HAT[[1]], tolerance = tl)
   expect_identical(fit_C$gg, fit_R$gg)
 })
diff --git a/tests/testthat/test-admm_MADMMplasso_cpp.R b/tests/testthat/test-admm_MADMMplasso_cpp.R
index e19b144..108a099 100644
--- a/tests/testthat/test-admm_MADMMplasso_cpp.R
+++ b/tests/testthat/test-admm_MADMMplasso_cpp.R
@@ -160,7 +160,7 @@ my_values <- suppressWarnings(suppressMessages(admm_MADMMplasso(
   beta0 = beta0, theta0 = theta0, beta = beta, beta_hat = beta_hat,
   theta = theta, rho, X, Z, max_it, W_hat = my_W_hat, XtY, y, N, e.abs,
   e.rel, alpha, lambda = lambda, alph, svd.w = svd.w, tree = TT,
-  my_print = mprt, invmat = invmat, gg = gg, legacy = TRUE
+  my_print = mprt, invmat = invmat, gg = gg
 )))
 beta <- my_values$beta
 theta <- my_values$theta
@@ -171,7 +171,7 @@ beta_hat <- my_values$beta_hat
 y_hat <- my_values$y_hat
 
 test_that("final objects have correct dimensions", {
-  expect_identical(dim(beta0), c(1L, 6L))
+  expect_identical(length(beta0), 6L)
   expect_identical(dim(theta0), c(4L, 6L))
   expect_identical(dim(beta), c(50L, 6L))
   expect_identical(dim(theta), c(50L, 4L, 6L))
@@ -192,25 +192,22 @@ test_that("mean values of final objects are expected", {
 })
 
 # Testing the C++ function =====================================================
-my_values_cpp <- admm_MADMMplasso(
-  beta0 = beta0, theta0 = theta0, beta = beta, beta_hat = beta_hat,
-  theta = theta, rho, X, Z, max_it, W_hat = my_W_hat, XtY, y, N, e.abs,
-  e.rel, alpha, lambda = lambda, alph, svd.w = svd.w, tree = TT,
-  my_print = mprt, invmat = invmat, gg = gg
+my_values_cpp <- admm_MADMMplasso_cpp(
+  beta0, theta0, beta, beta_hat, theta, rho, X, Z, max_it, my_W_hat, XtY, y, N,
+  e.abs, e.rel, alpha, lambda, alph, t(svd.w$u), t(svd.w$v), svd.w$d, TT$Tree,
+  TT$Tw, gg, mprt
 )
 
 test_that("C++ function output structure", {
-  expect_identical(length(my_values_cpp), length(my_values))
-  expect_identical(names(my_values_cpp), names(my_values))
+  expect_identical(length(my_values_cpp), length(my_values) - 1L)
 })
 
 test_that("Values are the same", {
   tl <- 1e-1
-  expect_equal(my_values$beta0, t(my_values_cpp$beta0), tolerance = tl)
-  expect_equal(my_values$theta0, my_values_cpp$theta0, tolerance = tl)
-  expect_equal(my_values$beta, my_values_cpp$beta, tolerance = tl)
-  expect_equal(my_values$theta, my_values_cpp$theta, tolerance = tl)
-  expect_identical(my_values$converge, my_values_cpp$converge)
-  expect_equal(my_values$beta_hat, my_values_cpp$beta_hat, tolerance = tl)
-  expect_equal(my_values$y_hat, my_values_cpp$y_hat, tolerance = tl)
+  expect_equal(my_values$beta0, my_values_cpp[[1]][, 1, 1], tolerance = tl)
+  expect_equal(my_values$theta0, my_values_cpp[[2]][, , 1], tolerance = tl)
+  expect_equal(my_values$beta, my_values_cpp[[3]][, , 1], tolerance = tl)
+  expect_equal(my_values$theta, my_values_cpp[[4]], tolerance = tl)
+  expect_equal(my_values$beta_hat, my_values_cpp[[6]][, , 1], tolerance = tl)
+  expect_equal(my_values$y_hat, my_values_cpp[[7]][, , 1], tolerance = tl)
 })
diff --git a/tests/testthat/test-parallel.R b/tests/testthat/test-parallel.R
new file mode 100644
index 0000000..7decb79
--- /dev/null
+++ b/tests/testthat/test-parallel.R
@@ -0,0 +1,114 @@
+# Setting up objects =========================================================
+set.seed(1235)
+N <- 100
+p <- 50
+nz <- 4
+K <- nz
+X <- matrix(rnorm(n = N * p), nrow = N, ncol = p)
+mx <- colMeans(X)
+sx <- sqrt(apply(X, 2, var))
+X <- scale(X, mx, sx)
+X <- matrix(as.numeric(X), N, p)
+Z <- matrix(rnorm(N * nz), N, nz)
+mz <- colMeans(Z)
+sz <- sqrt(apply(Z, 2, var))
+Z <- scale(Z, mz, sz)
+beta_1 <- rep(x = 0, times = p)
+beta_2 <- rep(x = 0, times = p)
+beta_3 <- rep(x = 0, times = p)
+beta_4 <- rep(x = 0, times = p)
+beta_5 <- rep(x = 0, times = p)
+beta_6 <- rep(x = 0, times = p)
+
+beta_1[1:5] <- c(2, 2, 2, 2, 2)
+beta_2[1:5] <- c(2, 2, 2, 2, 2)
+beta_3[6:10] <- c(2, 2, 2, -2, -2)
+beta_4[6:10] <- c(2, 2, 2, -2, -2)
+beta_5[11:15] <- c(-2, -2, -2, -2, -2)
+beta_6[11:15] <- c(-2, -2, -2, -2, -2)
+
+Beta <- cbind(beta_1, beta_2, beta_3, beta_4, beta_5, beta_6)
+colnames(Beta) <- 1:6
+
+theta <- array(0, c(p, K, 6))
+theta[1, 1, 1] <- 2
+theta[3, 2, 1] <- 2
+theta[4, 3, 1] <- -2
+theta[5, 4, 1] <- -2
+theta[1, 1, 2] <- 2
+theta[3, 2, 2] <- 2
+theta[4, 3, 2] <- -2
+theta[5, 4, 2] <- -2
+theta[6, 1, 3] <- 2
+theta[8, 2, 3] <- 2
+theta[9, 3, 3] <- -2
+theta[10, 4, 3] <- -2
+theta[6, 1, 4] <- 2
+theta[8, 2, 4] <- 2
+theta[9, 3, 4] <- -2
+theta[10, 4, 4] <- -2
+theta[11, 1, 5] <- 2
+theta[13, 2, 5] <- 2
+theta[14, 3, 5] <- -2
+theta[15, 4, 5] <- -2
+theta[11, 1, 6] <- 2
+theta[13, 2, 6] <- 2
+theta[14, 3, 6] <- -2
+theta[15, 4, 6] <- -2
+
+pliable <- matrix(0, N, 6)
+for (e in 1:6) {
+  pliable[, e] <- compute_pliable(X, Z, theta[, , e])
+}
+
+esd <- diag(6)
+e <- MASS::mvrnorm(N, mu = rep(0, 6), Sigma = esd)
+y_train <- X %*% Beta + pliable + e
+y <- y_train
+
+colnames(y) <- c(paste0("y", seq_len(ncol(y))))
+TT <- tree_parms(y)
+gg1 <- matrix(0, 2, 2)
+gg1[1, ] <- c(0.02, 0.02)
+gg1[2, ] <- c(0.02, 0.02)
+
+# Running MADMMplasso ========================================================
+mad_wrap <- function(seed = 3398, ...) {
+  set.seed(seed)
+  suppressMessages(
+    MADMMplasso(
+      X, Z, y,
+      alpha = 0.2, my_lambda = matrix(rep(0.2, dim(y)[2]), 1),
+      lambda_min = 0.001, max_it = 5000, e.abs = 1e-4, e.rel = 1e-2, maxgrid = 1L,
+      nlambda = 1L, rho = 5, tree = TT, my_print = FALSE, alph = 1, gg = gg1,
+      tol = 1e-3, cl = 2, ...
+    )
+  )
+}
+fit_R <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = FALSE)
+fit_C <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = FALSE)
+fit_R_pal <- mad_wrap(legacy = TRUE, parallel = FALSE, pal = TRUE)
+fit_C_pal <- mad_wrap(legacy = FALSE, parallel = FALSE, pal = TRUE)
+
+# Restrict to *nix machines
+if (.Platform$OS.type == "unix") {
+  fit_R_parallel <- mad_wrap(legacy = TRUE, parallel = TRUE, pal = FALSE)
+  fit_C_parallel <- mad_wrap(legacy = FALSE, parallel = TRUE, pal = FALSE)
+}
+
+test_that("results are identical after parallelization", {
+  expect_identical(fit_R_pal, fit_R)
+  expect_identical(fit_C_pal, fit_C)
+  if (.Platform$OS.type == "unix") {
+    expect_identical(fit_R_parallel, fit_R)
+    expect_identical(fit_R_pal, fit_R_parallel)
+    expect_identical(fit_C_parallel, fit_C)
+    expect_identical(fit_C_pal, fit_C_parallel)
+  }
+})
+
+test_that("parallel and pal cannot be both true", {
+  msg <- "parallel and pal cannot be TRUE at the same time"
+  expect_error(mad_wrap(legacy = TRUE, parallel = TRUE, pal = TRUE), msg)
+  expect_error(mad_wrap(legacy = FALSE, parallel = TRUE, pal = TRUE), msg)
+})
diff --git a/tests/testthat/test-reg.R b/tests/testthat/test-reg.R
index 0d78536..8a47ee5 100644
--- a/tests/testthat/test-reg.R
+++ b/tests/testthat/test-reg.R
@@ -18,6 +18,7 @@ test_that("reg() produces the correct output", {
   for (rp in seq_len(reps)) {
     r <- matrix(rnorm(n_obs[rp] * n_vars[rp]), n_obs[rp], n_vars[rp])
     z <- as.matrix(sample(0:1, n_obs[rp], replace = TRUE))
-    expect_identical(reg(r, z), reg_R(r, z), tolerance = 1e-10)
+    expect_identical(reg(r, z)[1, ], reg_R(r, z)[[1]][1, ], tolerance = 1e-10)
+    expect_identical(reg(r, z)[2, ], reg_R(r, z)[[2]][1, ], tolerance = 1e-10)
   }
 })