From 3fc7a7541c8ff2d26f5b47fcda260d3c6b2deed4 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 11:05:14 +0200 Subject: [PATCH 01/12] Added test for #53 --- tests/testthat/test-issue_53.R | 102 +++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 tests/testthat/test-issue_53.R diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R new file mode 100644 index 0000000..14e00fc --- /dev/null +++ b/tests/testthat/test-issue_53.R @@ -0,0 +1,102 @@ +# Install and load the package, set the seed +remotes::install_github("ocbe-uio/MADMMplasso") +library(MADMMplasso) +set.seed(1235) + +# Generate the data +N <- 100 +p <- 50 +nz <- 4 +K <- nz +X <- matrix(rnorm(n = N * p), nrow = N, ncol = p) +mx <- colMeans(X) +sx <- sqrt(apply(X, 2, var)) +X <- scale(X, mx, sx) +X <- matrix(as.numeric(X), N, p) +Z <- matrix(rnorm(N * nz), N, nz) +mz <- colMeans(Z) +sz <- sqrt(apply(Z, 2, var)) +Z <- scale(Z, mz, sz) +beta_1 <- rep(x = 0, times = p) +beta_2 <- rep(x = 0, times = p) +beta_3 <- rep(x = 0, times = p) +beta_4 <- rep(x = 0, times = p) +beta_5 <- rep(x = 0, times = p) +beta_6 <- rep(x = 0, times = p) + +beta_1[1:5] <- c(2, 2, 2, 2, 2) +beta_2[1:5] <- c(2, 2, 2, 2, 2) +beta_3[6:10] <- c(2, 2, 2, -2, -2) +beta_4[6:10] <- c(2, 2, 2, -2, -2) +beta_5[11:15] <- c(-2, -2, -2, -2, -2) +beta_6[11:15] <- c(-2, -2, -2, -2, -2) + +Beta <- cbind(beta_1, beta_2, beta_3, beta_4, beta_5, beta_6) + +colnames(Beta) <- c(1:6) + +theta <- array(0, c(p, K, 6)) +theta[1, 1, 1] <- 2 +theta[3, 2, 1] <- 2 +theta[4, 3, 1] <- -2 +theta[5, 4, 1] <- -2 +theta[1, 1, 2] <- 2 +theta[3, 2, 2] <- 2 +theta[4, 3, 2] <- -2 +theta[5, 4, 2] <- -2 +theta[6, 1, 3] <- 2 +theta[8, 2, 3] <- 2 +theta[9, 3, 3] <- -2 +theta[10, 4, 3] <- -2 +theta[6, 1, 4] <- 2 +theta[8, 2, 4] <- 2 +theta[9, 3, 4] <- -2 +theta[10, 4, 4] <- -2 +theta[11, 1, 5] <- 2 +theta[13, 2, 5] <- 2 +theta[14, 3, 5] <- -2 +theta[15, 4, 5] <- -2 +theta[11, 1, 6] <- 2 +theta[13, 2, 6] <- 2 +theta[14, 3, 6] <- -2 +theta[15, 4, 6] <- -2 + +library(MASS) +pliable <- matrix(0, N, 6) +for (e in 1:6) { + pliable[, e] <- compute_pliable(X, Z, theta[, , e]) +} +esd <- diag(6) +e <- MASS::mvrnorm(N, mu = rep(0, 6), Sigma = esd) +y_train <- X %*% Beta + pliable + e +y <- y_train +colnames(y) <- c(paste("y", 1:(ncol(y)), sep = "")) +TT <- tree_parms(y) + +gg1 <- matrix(0, 2, 2) +gg1[1, ] <- c(0.02, 0.02) +gg1[2, ] <- c(0.2, 0.2) +nlambda <- 50 +e.abs <- 1E-4 +e.rel <- 1E-2 +alpha <- .5 +tol <- 1E-3 + +# Fitting models +message("fit_C") +fit_C <- MADMMplasso( + X, Z, y, + alpha = alpha, my_lambda = NULL, + lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, + pal = FALSE, gg = gg1, tol = tol, legacy = FALSE, cl = 3L +) + +message("fit_R") +fit_R <- MADMMplasso( + X, Z, y, + alpha = alpha, my_lambda = NULL, + lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, + pal = FALSE, gg = gg1, tol = tol, legacy = TRUE, cl = 3L +) From 85822959b82b89994063af39598f5e89232660a2 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 14:35:40 +0200 Subject: [PATCH 02/12] Improved warning messages regarding `pal`, `cl` and `parallel` --- R/MADMMplasso.R | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index aab30ab..d5c1fe6 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -50,9 +50,19 @@ #' @export MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 50000, e.abs = 1E-3, e.rel = 1E-3, maxgrid, nlambda, rho = 5, my_print = FALSE, alph = 1.8, tree, pal = cl == 1L, gg = NULL, tol = 1E-4, cl = 1L, legacy = FALSE) { # Recalculating the number of CPUs - cl <- ifelse(pal, 1L, cl) # cl is irrelevant if pal = TRUE + if (pal && cl > 1L) { + cl <- 1L + warning("pal is TRUE, resetting cl to 1") + } parallel <- cl > 1L - + if (my_print) { + message( + "Parallelization is ", ifelse(parallel, "enabled ", "disabled "), + "(", cl, " CPUs)" + ) + message("pal is ", ifelse(pal, "TRUE", "FALSE")) + message("Using ", ifelse(legacy, "R", "C++"), " engine") + } N <- nrow(X) p <- ncol(X) From d1651173f5451878e5346505eaacf3c5b38bdcdb Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 15:22:15 +0200 Subject: [PATCH 03/12] Fixed calculation of `my_values` on `parallel` (closes #53) --- R/MADMMplasso.R | 14 ++------------ tests/testthat/test-issue_53.R | 15 +-------------- 2 files changed, 3 insertions(+), 26 deletions(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index d5c1fe6..5277c92 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -218,7 +218,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma doParallel::registerDoParallel(cl = cl) foreach::getDoParRegistered() if (legacy) { - my_values_matrix <- foreach(i = 1:nlambda, .packages = "MADMMplasso", .combine = rbind) %dopar% { + my_values <- foreach(i = 1:nlambda, .packages = "MADMMplasso") %dopar% { admm_MADMMplasso( beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd.w, tree, my_print, @@ -226,7 +226,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma ) } } else { - my_values_matrix <- foreach(i = 1:nlambda, .packages = "MADMMplasso", .combine = rbind) %dopar% { + my_values <- foreach(i = 1:nlambda, .packages = "MADMMplasso") %dopar% { admm_MADMMplasso_cpp( beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd_w_tu, svd_w_tv, svd_w_d, @@ -235,16 +235,6 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma } } parallel::stopCluster(cl) - - # Converting to list so hh_nlambda_loop_cpp can handle it - if (nlambda == 1) { - my_values <- list(my_values_matrix) - } else { - my_values <- list() - for (hh in seq_len(nlambda)) { - my_values[[hh]] <- my_values_matrix[hh, ] - } - } } else if (!parallel && !pal) { if (legacy) { my_values <- lapply( diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R index 14e00fc..843dd87 100644 --- a/tests/testthat/test-issue_53.R +++ b/tests/testthat/test-issue_53.R @@ -1,6 +1,4 @@ # Install and load the package, set the seed -remotes::install_github("ocbe-uio/MADMMplasso") -library(MADMMplasso) set.seed(1235) # Generate the data @@ -61,7 +59,6 @@ theta[13, 2, 6] <- 2 theta[14, 3, 6] <- -2 theta[15, 4, 6] <- -2 -library(MASS) pliable <- matrix(0, N, 6) for (e in 1:6) { pliable[, e] <- compute_pliable(X, Z, theta[, , e]) @@ -76,24 +73,14 @@ TT <- tree_parms(y) gg1 <- matrix(0, 2, 2) gg1[1, ] <- c(0.02, 0.02) gg1[2, ] <- c(0.2, 0.2) -nlambda <- 50 +nlambda <- 2 e.abs <- 1E-4 e.rel <- 1E-2 alpha <- .5 tol <- 1E-3 # Fitting models -message("fit_C") fit_C <- MADMMplasso( - X, Z, y, - alpha = alpha, my_lambda = NULL, - lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, - nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, - pal = FALSE, gg = gg1, tol = tol, legacy = FALSE, cl = 3L -) - -message("fit_R") -fit_R <- MADMMplasso( X, Z, y, alpha = alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, From 41cff7dd2db9b3300f62e54e68be729e1fc8d409 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 15:22:41 +0200 Subject: [PATCH 04/12] Adjusted unit tests (#53) --- tests/testthat/test-MADMMplasso.R | 4 ++-- tests/testthat/test-issue_53.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-MADMMplasso.R b/tests/testthat/test-MADMMplasso.R index 8143294..a2c151d 100644 --- a/tests/testthat/test-MADMMplasso.R +++ b/tests/testthat/test-MADMMplasso.R @@ -85,7 +85,7 @@ fit_C <- MADMMplasso( alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, - pal = TRUE, gg = gg1, tol = tol, cl = 6 + pal = TRUE, gg = gg1, tol = tol, cl = 1 ) set.seed(9356219) fit_R <- suppressWarnings( @@ -95,7 +95,7 @@ fit_R <- suppressWarnings( alpha = alpha, my_lambda = matrix(rep(0.2, ncol(y)), 1), lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, - pal = TRUE, gg = gg1, tol = tol, cl = 6, legacy = TRUE + pal = TRUE, gg = gg1, tol = tol, cl = 1, legacy = TRUE ) ) ) diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R index 843dd87..7cb35e5 100644 --- a/tests/testthat/test-issue_53.R +++ b/tests/testthat/test-issue_53.R @@ -85,5 +85,5 @@ fit_C <- MADMMplasso( alpha = alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, - pal = FALSE, gg = gg1, tol = tol, legacy = TRUE, cl = 3L + pal = FALSE, gg = gg1, tol = tol, legacy = FALSE, cl = 3L ) From ead9b234b9e7edf5a9ad991be3ad4ad5cf24393a Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 15:40:26 +0200 Subject: [PATCH 05/12] Removed unnecessary code --- R/MADMMplasso.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index 5277c92..9e2b71d 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -218,7 +218,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma doParallel::registerDoParallel(cl = cl) foreach::getDoParRegistered() if (legacy) { - my_values <- foreach(i = 1:nlambda, .packages = "MADMMplasso") %dopar% { + my_values <- foreach(i = 1:nlambda) %dopar% { admm_MADMMplasso( beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd.w, tree, my_print, @@ -226,7 +226,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma ) } } else { - my_values <- foreach(i = 1:nlambda, .packages = "MADMMplasso") %dopar% { + my_values <- foreach(i = 1:nlambda) %dopar% { admm_MADMMplasso_cpp( beta0, theta0, beta, beta_hat, theta, rho1, X, Z, max_it, my_W_hat, XtY, y, N, e.abs, e.rel, alpha, lam[i, ], alph, svd_w_tu, svd_w_tv, svd_w_d, From 2c8af41c0b18d66d986ff48a19d0a61af976f870 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 15:42:09 +0200 Subject: [PATCH 06/12] Improved unit tests for #53 --- tests/testthat/test-issue_53.R | 35 ++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R index 7cb35e5..7caa985 100644 --- a/tests/testthat/test-issue_53.R +++ b/tests/testthat/test-issue_53.R @@ -78,12 +78,43 @@ e.abs <- 1E-4 e.rel <- 1E-2 alpha <- .5 tol <- 1E-3 - # Fitting models +set.seed(1235) fit_C <- MADMMplasso( X, Z, y, alpha = alpha, my_lambda = NULL, lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, - pal = FALSE, gg = gg1, tol = tol, legacy = FALSE, cl = 3L + pal = FALSE, gg = gg1, tol = tol, legacy = FALSE, cl = 2L +) + +set.seed(1235) +fit_R <- MADMMplasso( + X, Z, y, + alpha = alpha, my_lambda = NULL, + lambda_min = 0.001, max_it = 5000, e.abs = e.abs, e.rel = e.rel, maxgrid = nlambda, + nlambda = nlambda, rho = 5, tree = TT, my_print = FALSE, alph = 1, + pal = FALSE, gg = gg1, tol = tol, legacy = TRUE, cl = 2L ) + +test_that("C++ and R versions basically output the same thing", { + expect_named(fit_C$beta, names(fit_R$beta)) + tl <- 1e1 + expect_equal(fit_C$beta0[[1]], as.matrix(fit_R$beta0[[1]]), tolerance = tl) + expect_equal(as.vector(fit_C$beta[[1]]), as.vector(fit_R$beta[[1]]), tolerance = tl) + expect_equal(as.vector(fit_C$BETA_hat[[1]]), as.vector(fit_R$BETA_hat[[1]]), tolerance = tl) + expect_equal(fit_C$theta0[[1]], fit_R$theta0[[1]], tolerance = tl) + for (i in 1:6) { + expect_equal( + as.vector(fit_C$theta[[1]][, , i]), + as.vector(fit_R$theta[[1]][, , i]), + tolerance = tl + ) + } + expect_equal(fit_C$path, fit_R$path, tolerance = tl) + expect_identical(fit_C$Lambdas, fit_R$Lambdas) + expect_equal(fit_C$non_zero, as.matrix(fit_R$non_zero), tolerance = tl) + expect_equal(fit_C$LOSS, as.matrix(fit_R$LOSS), tolerance = tl) + expect_equal(fit_C$Y_HAT[[1]], fit_R$Y_HAT[[1]], tolerance = tl) + expect_identical(fit_C$gg, fit_R$gg) +}) From df656ded91d70ce11a124421a38f769f962afd13 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:03:00 +0200 Subject: [PATCH 07/12] Resolved lints --- tests/testthat/test-issue_53.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R index 7caa985..4e68a8d 100644 --- a/tests/testthat/test-issue_53.R +++ b/tests/testthat/test-issue_53.R @@ -31,7 +31,7 @@ beta_6[11:15] <- c(-2, -2, -2, -2, -2) Beta <- cbind(beta_1, beta_2, beta_3, beta_4, beta_5, beta_6) -colnames(Beta) <- c(1:6) +colnames(Beta) <- 1:6 theta <- array(0, c(p, K, 6)) theta[1, 1, 1] <- 2 @@ -67,7 +67,7 @@ esd <- diag(6) e <- MASS::mvrnorm(N, mu = rep(0, 6), Sigma = esd) y_train <- X %*% Beta + pliable + e y <- y_train -colnames(y) <- c(paste("y", 1:(ncol(y)), sep = "")) +colnames(y) <- paste0("y", 1:(ncol(y))) TT <- tree_parms(y) gg1 <- matrix(0, 2, 2) @@ -76,7 +76,7 @@ gg1[2, ] <- c(0.2, 0.2) nlambda <- 2 e.abs <- 1E-4 e.rel <- 1E-2 -alpha <- .5 +alpha <- 0.5 tol <- 1E-3 # Fitting models set.seed(1235) From d3d5c6eff07737d7581c3a348fb373bcb90a7873 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:03:58 +0200 Subject: [PATCH 08/12] Updated actions/checkout on linter --- .github/workflows/lint.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index d3e57f1..34eb7f6 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -14,7 +14,7 @@ jobs: env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: r-lib/actions/setup-r@v2 with: From 945cdff6779d8adb6f4d92f9ba40c93005655ed9 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:05:31 +0200 Subject: [PATCH 09/12] Refactoring parallelization on Unix `makeForkCluster()` is a stub on Windows, so this does not help with --- R/MADMMplasso.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index 9e2b71d..38f4d2d 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -214,7 +214,7 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma # Pre-calculating my_values through my_values_matrix if (parallel) { - cl <- makeCluster(cl1, type = "FORK") + cl <- parallel::makeForkCluster(cl1) doParallel::registerDoParallel(cl = cl) foreach::getDoParRegistered() if (legacy) { From 22be22e849843fc0a81925052071ae23a786cb8d Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:13:56 +0200 Subject: [PATCH 10/12] Fixed code smell --- tests/testthat/test-issue_53.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-issue_53.R b/tests/testthat/test-issue_53.R index 4e68a8d..6253bbc 100644 --- a/tests/testthat/test-issue_53.R +++ b/tests/testthat/test-issue_53.R @@ -67,7 +67,7 @@ esd <- diag(6) e <- MASS::mvrnorm(N, mu = rep(0, 6), Sigma = esd) y_train <- X %*% Beta + pliable + e y <- y_train -colnames(y) <- paste0("y", 1:(ncol(y))) +colnames(y) <- paste0("y", seq_len(ncol(y))) TT <- tree_parms(y) gg1 <- matrix(0, 2, 2) From 8406510cbd35b3b2bff6223a1191bd5411e78267 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:25:28 +0200 Subject: [PATCH 11/12] Adding workaround for parallelization on Windows (#46) Not as elegant as simply using `makeCluster()`, but this avoids rewriting some unit tests. May be improved in the future. --- R/MADMMplasso.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/R/MADMMplasso.R b/R/MADMMplasso.R index 38f4d2d..970ab59 100644 --- a/R/MADMMplasso.R +++ b/R/MADMMplasso.R @@ -214,7 +214,11 @@ MADMMplasso <- function(X, Z, y, alpha, my_lambda = NULL, lambda_min = 0.001, ma # Pre-calculating my_values through my_values_matrix if (parallel) { - cl <- parallel::makeForkCluster(cl1) + if (.Platform$OS.type == "unix") { + cl <- parallel::makeForkCluster(cl1) + } else { + cl <- parallel::makeCluster(cl1) + } doParallel::registerDoParallel(cl = cl) foreach::getDoParRegistered() if (legacy) { From fac98b1211ff972c3a2a8b63287f98af4afa9313 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Tue, 20 Aug 2024 16:30:39 +0200 Subject: [PATCH 12/12] Increment version number to 0.0.0.9022 --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 85bf55d..81c007a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: MADMMplasso Title: Multi Variate Multi Response 'ADMM' with Interaction Effects -Version: 0.0.0.9021 +Version: 0.0.0.9022 Authors@R: c( person(