Merge branch 'issue-17' into develop

ocbe-uio · Aug 2, 2023 · 6d417af · 6d417af
2 parents 14cf14f + de61fa4
commit 6d417af
Show file tree

Hide file tree

Showing 8 changed files with 81 additions and 68 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: EMJMCMC
 Type: Package
 Title: Flexible Bayesian Nonlinear Model Configuration
-Version: 1.4.4.9029
+Version: 1.4.4.9030
 Date: 2022-03-06
 Authors@R:
   c(
@@ -15,6 +15,6 @@ Depends: R (>= 3.4.1), bigmemory
 Imports: glmnet, biglm, hash, BAS, stringi, parallel, methods, speedglm, stats
 RoxygenNote: 7.2.3
 Suggests:
-    testthat (>= 3.0.0)
+    testthat (>= 3.0.0), bindata, clusterGeneration, reshape2
 Config/testthat/edition: 3
 Encoding: UTF-8
diff --git a/R/EMJMCMC2016-method-forecast.matrix.R b/R/EMJMCMC2016-method-forecast.matrix.R
@@ -36,16 +36,16 @@ EMJMCMC2016$methods(
         betas[which(is.na(betas))] <- 0
         xyz <- which(unlist(mliks) != -10000)
         g.results[4, 2] <<- lHash
-        moddee <- which(unlist(mliks) == max(unlist(mliks), na.rm = TRUE))[1]
+        moddee <- calc_moddee(mliks)
         zyx <- array(data = NA, dim = lHash)
-        nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+        nconsum <- calc_nconsum(mliks, moddee, xyz)
 
         if (nconsum > 0) {
           zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
         } else {
           diff <- 0 - mliks[moddee]
           mliks <- mliks + diff
-          nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+          nconsum <- calc_nconsum(mliks, moddee, xyz)
           zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
         }
 

diff --git a/R/EMJMCMC2016-method-forecast.matrix.na.fast.R b/R/EMJMCMC2016-method-forecast.matrix.na.fast.R
@@ -22,17 +22,17 @@ EMJMCMC2016$methods(
       ids <- which(na.br == 0)
       mliks <- mliks.in
       xyz <- which(unlist(mliks) != -10000)
-      moddee <- which(unlist(mliks) == max(unlist(mliks), na.rm = TRUE))[1]
+      moddee <- calc_moddee(mliks)
       zyx <- vector(mode = "double", length = nrow(betas))
-      nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+      nconsum <- calc_nconsum(mliks, moddee, xyz)
       betas1 <- betas
       betas1[which(is.na(betas1))] <- 0
       if (nconsum > 0) {
         zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
       } else {
         diff <- 0 - mliks[moddee]
         mliks <- mliks + diff
-        nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+        nconsum <- calc_nconsum(mliks, moddee, xyz)
         zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
       }
       res <- t(zyx) %*% g(betas1 %*% t(stats::model.matrix(object = formula.cur, data = covariates)))
@@ -67,16 +67,16 @@ EMJMCMC2016$methods(
       return(-1)
     }
     xyz <- which(unlist(mliks) != -10000)
-    moddee <- which(unlist(mliks) == max(unlist(mliks), na.rm = TRUE))[1]
+    moddee <- calc_moddee(mliks)
     zyx <- array(data = NA, dim = length(mliks))
-    nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+    nconsum <- calc_nconsum(mliks, moddee, xyz)
     betas1[which(is.na(betas1))] <- 0
     if (nconsum > 0) {
       zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
     } else {
       diff <- 0 - mliks[moddee]
       mliks <- mliks + diff
-      nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+      nconsum <- calc_nconsum(mliks, moddee, xyz)
       zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
     }
     if (length(ids) > 0) {

diff --git a/R/EMJMCMC2016-method-post_proceed_results_hash.R b/R/EMJMCMC2016-method-post_proceed_results_hash.R
@@ -19,26 +19,30 @@ EMJMCMC2016$methods(
     }
 
     lHash <- length(hashStat)
-    mliks <- hash::values(hashStat)[which((1:(lHash * linx)) %% linx == 1)]
+    if (length(hashStat) > 0) {
+      mliks <- hash::values(hashStat)[which((1:(lHash * linx)) %% linx == 1)]
+    } else {
+      mliks <- array(data = NA, dim = lHash)
+    }
     xyz <- which(unlist(mliks) != -10000)
     g.results[4, 2] <<- lHash
-    moddee <- which(unlist(mliks) == max(unlist(mliks), na.rm = TRUE))[1]
+    moddee <- calc_moddee(mliks)
     zyx <- array(data = NA, dim = lHash)
-    nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+    nconsum <- calc_nconsum(mliks, moddee, xyz)
 
     if (nconsum > 0) {
       zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
     } else {
       diff <- 0 - mliks[moddee]
       mliks <- mliks + diff
-      nconsum <- sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+      nconsum <- calc_nconsum(mliks, moddee, xyz)
       zyx[xyz] <- exp(mliks[xyz] - mliks[moddee]) / nconsum
     }
 
 
     keysarr <- as.array(hash::keys(hashStat))
     p.post <- array(data = 0, dim = Nvars)
-    for (i in 1:lHash)
+    for (i in seq_len(lHash))
     {
       if (is.na(zyx[i])) {
         del(x = keysarr[i], hash = hashStat)

diff --git a/R/LogicRegr.R b/R/LogicRegr.R
@@ -68,7 +68,7 @@ LogicRegr = function(
   advanced$formula = formula
   advanced$data = data
   advanced$interact.param$Nvars.max = d
-  advanced$interact.param$ max.tree.size= cmax - 1
+  advanced$interact.param$max.tree.size = cmax - 1
   advanced$interact.param$p.and = p.and
   advanced$interact.param$p.nor = p.not
   advanced$interact.param$p.allow.tree = p.surv

diff --git a/R/calc_moddee.R b/R/calc_moddee.R
@@ -0,0 +1,6 @@
+calc_moddee <- function(mliks) {
+  if (length(mliks) == 0) {
+    return(0)
+  }
+  which(unlist(mliks) == max(unlist(mliks), na.rm = TRUE))[1]
+}
diff --git a/R/calc_nconsum.R b/R/calc_nconsum.R
@@ -0,0 +1,3 @@
+calc_nconsum <- function(mliks, moddee, xyz) {
+  sum(exp(-mliks[moddee] + mliks[xyz]), na.rm = TRUE)
+}
diff --git a/tests/testthat/test-BLR-tutorial.R b/tests/testthat/test-BLR-tutorial.R
@@ -1,10 +1,3 @@
-# #check the tutorial
-
-# library(EMJMCMC)
-# library(clusterGeneration)
-# library(bindata)
-# library(ggplot2)
-
 # #***********************IMPORTANT******************************************************
 # # if a multithreaded backend openBLAS for matrix multiplications
 # # is installed on your machine, please force it to use 1 thread explicitly
@@ -15,47 +8,54 @@
 # omp_set_num_threads(1)
 # #***********************IMPORTANT******************************************************
 
-# TODO: create n_threads so ncores is not fixed at 32 in the LogicRegr() call below
-# # set the seed
-# set.seed(040590)
-# # construct a correlation matrix for M = 50 variables
-# M = 50
-# m = clusterGeneration::rcorrmatrix(M,alphad=2.5)
-# # simulate 1000 binary variables with this correlation matrix
-# X = bindata::rmvbin(1000, margprob = rep(0.5,M), bincorr = m)
-
-# # prepare the correlation matrix in the melted format
-# melted_cormat = reshape2::melt(cor(X))
-# # plot the heat-map of the correlations
-# ggplot2::ggplot(data = melted_cormat,
-#                 ggplot2::aes(x=Var1, y=Var2, fill=value)) +
-#   ggplot2::geom_tile() +
-#   ggplot2::theme(axis.title.x = ggplot2::element_blank(),
-#                  axis.title.y =  ggplot2::element_blank(),
-#                  axis.text.x = ggplot2::element_blank())
-
-# # simulate Gaussian responses from a model with two-way interactions
-# # which is considered in S.4 of the paper
-# df = data.frame(X)
-# df$Y=rnorm(n = 1000,mean = 1+1.43*(df$X5*df$X9)+
-#              0.89*(df$X8*df$X11)+0.7*(df$X1*df$X4),sd = 1)
-
-# help("LogicRegr")
-
-
-# # specify the initial formula
-# formula1 = as.formula(paste(colnames(df)[M+1],"~ 1 + ",
-#                             paste0(colnames(df)[-c(M+1)],collapse = "+")))
-# # Bayesian logic regression with the robust-g-prior
-# res4G = LogicRegr(formula = formula1, data = df,
-#                   family = "Gaussian", prior = "G", report.level = 0.5,
-#                   d = 15,cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2,
-#                   ncores = 32) # FIXME: returns NULL objects even if ran at full power
-# # Bayesian logic regression with the Jeffreys prior
-# res4J = LogicRegr(formula = formula1, data = df,
-#                   family = "Gaussian", prior = "J", report.level = 0.5,
-#                   d = 15, cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2,
-#                   ncores = 32)
+n_threads <- min(parallel::detectCores() - 1, 7L)
+set.seed(040590)
+
+# construct a correlation matrix for M = 50 variables
+M <- 50
+m <- clusterGeneration::rcorrmatrix(M, alphad = 2.5)
+
+# simulate 1000 binary variables with this correlation matrix
+sample_size <- 100L
+X <- suppressWarnings(bindata::rmvbin(sample_size, margprob = rep(0.5, M), bincorr = m))
+
+# prepare the correlation matrix in the melted format
+melted_cormat <- reshape2::melt(cor(X))
+
+# simulate Gaussian responses from a model with two-way interactions
+# which is considered in S.4 of the paper
+df <- data.frame(X)
+df$Y <- rnorm(
+  n = sample_size,
+  mean = 1 + 1.43 * (df$X5 * df$X9) + 0.89 * (df$X8 * df$X11) + 0.7 * (df$X1 * df$X4),
+  sd = 1
+)
+
+# specify the initial formula
+formula1 <- as.formula(
+  paste(
+    colnames(df)[M + 1],
+    "~ 1 + ",
+    paste0(colnames(df)[-c(M + 1)], collapse = "+")
+  )
+)
+
+# Bayesian logic regression with the robust-g-prior
+# FIXME: returns NULL objects even if ran at full power
+res4G <- LogicRegr(
+  formula = formula1, data = df, family = "Gaussian", prior = "G",
+  report.level = 0.5, d = 15, cmax = 2, kmax = 15, p.and = 0.9, p.not = 0.1,
+  p.surv = 0.2,
+  ncores = n_threads,
+  n.mods = 10L
+)
+
+# Bayesian logic regression with the Jeffreys prior
+# res4J <- LogicRegr(
+#   formula = formula1, data = df, family = "Gaussian", prior = "J",
+#   report.level = 0.5, d = 15, cmax = 2, kmax = 15, p.and = 0.9, p.not = 0.1,
+#   p.surv = 0.2, ncores = n_threads
+# )
 
 # # print the results for the robust g-prior
 # print(base::rbind(c("expressions","probabilities"),res4G$feat.stat))
@@ -67,12 +67,12 @@
 # # simulate Gaussian responses from a model with two-way interactions
 # # and an age effect which is an extension of S.4 of the paper
 # Xp = data.frame(X)
-# Xp$age = rpois(1000,lambda = 34)
-# Xp$Y=rnorm(n = 1000,mean = 1+0.7*(Xp$X1*Xp$X4) +
+# Xp$age = rpois(sample_size,lambda = 34)
+# Xp$Y=rnorm(n = sample_size,mean = 1+0.7*(Xp$X1*Xp$X4) +
 #              0.89*(Xp$X8*Xp$X11)+1.43*(Xp$X5*Xp$X9) + 2*Xp$age, sd = 1)
 
 
-# teid  = sample.int(size =100,n = 1000,replace = F)
+# teid  = sample.int(size =100,n = sample_size,replace = F)
 # test  = Xp[teid,]
 # train = Xp[-teid,]