Merge branch 'issue-17' into develop

ocbe-uio · May 23, 2023 · 14cf14f · 14cf14f
2 parents f17f29b + e45016f
commit 14cf14f
Show file tree

Hide file tree

Showing 5 changed files with 168 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: EMJMCMC
 Type: Package
 Title: Flexible Bayesian Nonlinear Model Configuration
-Version: 1.4.4.9028
+Version: 1.4.4.9029
 Date: 2022-03-06
 Authors@R:
   c(

diff --git a/R/LogicRegr.R b/R/LogicRegr.R
@@ -40,13 +40,30 @@
 #' @keywords methods models
 #' @example /inst/examples/LogicRegr_example.R
 #' @export
-LogicRegr = function(formula, data, family = "Gaussian",prior = "J",report.level = 0.5, d = 20, cmax = 5, kmax = 20, p.and = 0.9, p.not = 0.05, p.surv = 0.1,ncores = -1, n.mods = 1000 ,advanced = list(presearch = T,locstop = F ,estimator = estimate.logic.bern.tCCH,estimator.args =  list(data = data.example,n = 1000, m = 50,r=1),recalc_margin = 250, save.beta = F,interact = T,relations = c("","lgx2","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.0,0.0,0.0,0.0,0.0,0.0),interact.param=list(allow_offsprings=1,mutation_rate = 300,last.mutation = 5000, max.tree.size = 1, Nvars.max = 100,p.allow.replace=0.9,p.allow.tree=0.2,p.nor=0.2,p.and = 1),n.models = 10000,unique = T,max.cpu = 4,max.cpu.glob = 4,create.table = F,create.hash = T,pseudo.paral = T,burn.in = 50,outgraphs=F,print.freq = 1000,advanced.param = list(
-  max.N.glob=as.integer(10),
-  min.N.glob=as.integer(5),
-  max.N=as.integer(3),
-  min.N=as.integer(1),
-  printable = F)))
-{
+LogicRegr = function(
+  formula, data, family = "Gaussian",prior = "J",report.level = 0.5, d = 20,
+  cmax = 5, kmax = 20, p.and = 0.9, p.not = 0.05, p.surv = 0.1,ncores = -1,
+  n.mods = 1000 ,advanced = list(presearch = T,locstop = F ,
+  estimator = estimate.logic.bern.tCCH,
+  estimator.args =  list(data = data.example,n = 1000, m = 50,r=1),
+  recalc_margin = 250, save.beta = FALSE, interact = TRUE,
+  relations = c("","lgx2","cos","sigmoid","tanh","atan","erf"),
+  relations.prob =c(0.4,0.0,0.0,0.0,0.0,0.0,0.0),
+  interact.param = list(
+    allow_offsprings=1,mutation_rate = 300,last.mutation = 5000,
+    max.tree.size = 1, Nvars.max = 100,p.allow.replace=0.9,p.allow.tree=0.2,
+    p.nor=0.2,p.and = 1),n.models = 10000,unique = TRUE,max.cpu = 4,
+    max.cpu.glob = 4,create.table = FALSE, create.hash = TRUE,
+    pseudo.paral = TRUE, burn.in = 50, outgraphs = FALSE, print.freq = 1000,
+    advanced.param = list(
+      max.N.glob=as.integer(10),
+      min.N.glob=as.integer(5),
+      max.N=as.integer(3),
+      min.N=as.integer(1),
+      printable = FALSE
+    )
+  )
+) {
   data.example = data
   advanced$formula = formula
   advanced$data = data

diff --git a/R/pinferunemjmcmc.R b/R/pinferunemjmcmc.R
@@ -31,8 +31,11 @@
 #' @example inst/examples/pinferunemjmcmc_example.R
 #' @keywords  methods models
 #' @export
-pinferunemjmcmc = function(n.cores = 4, mcgmj = mcgmjpse, report.level =  0.5, simplify = FALSE, num.mod.best = 1000, predict = FALSE,  test.data = 1, link.function = function(z)z, runemjmcmc.params)
-{
+pinferunemjmcmc = function(
+  n.cores = 4, mcgmj = mcgmjpse, report.level =  0.5, simplify = FALSE,
+  num.mod.best = 1000, predict = FALSE,  test.data = 1,
+  link.function = function(z)z, runemjmcmc.params
+) {
 
   if(predict)
   {

diff --git a/man/LogicRegr.Rd b/man/LogicRegr.Rd
diff --git a/tests/testthat/test-BLR-tutorial.R b/tests/testthat/test-BLR-tutorial.R
@@ -0,0 +1,131 @@
+# #check the tutorial
+
+# library(EMJMCMC)
+# library(clusterGeneration)
+# library(bindata)
+# library(ggplot2)
+
+# #***********************IMPORTANT******************************************************
+# # if a multithreaded backend openBLAS for matrix multiplications
+# # is installed on your machine, please force it to use 1 thread explicitly
+# # unless ncores in LogrRegr is reasonably small, in the latter case
+# # you might want to experiment with the combinations of blas_set_num_threads and ncores
+# library(RhpcBLASctl)
+# blas_set_num_threads(1)
+# omp_set_num_threads(1)
+# #***********************IMPORTANT******************************************************
+
+# TODO: create n_threads so ncores is not fixed at 32 in the LogicRegr() call below
+# # set the seed
+# set.seed(040590)
+# # construct a correlation matrix for M = 50 variables
+# M = 50
+# m = clusterGeneration::rcorrmatrix(M,alphad=2.5)
+# # simulate 1000 binary variables with this correlation matrix
+# X = bindata::rmvbin(1000, margprob = rep(0.5,M), bincorr = m)
+
+# # prepare the correlation matrix in the melted format
+# melted_cormat = reshape2::melt(cor(X))
+# # plot the heat-map of the correlations
+# ggplot2::ggplot(data = melted_cormat,
+#                 ggplot2::aes(x=Var1, y=Var2, fill=value)) +
+#   ggplot2::geom_tile() +
+#   ggplot2::theme(axis.title.x = ggplot2::element_blank(),
+#                  axis.title.y =  ggplot2::element_blank(),
+#                  axis.text.x = ggplot2::element_blank())
+
+# # simulate Gaussian responses from a model with two-way interactions
+# # which is considered in S.4 of the paper
+# df = data.frame(X)
+# df$Y=rnorm(n = 1000,mean = 1+1.43*(df$X5*df$X9)+
+#              0.89*(df$X8*df$X11)+0.7*(df$X1*df$X4),sd = 1)
+
+# help("LogicRegr")
+
+
+# # specify the initial formula
+# formula1 = as.formula(paste(colnames(df)[M+1],"~ 1 + ",
+#                             paste0(colnames(df)[-c(M+1)],collapse = "+")))
+# # Bayesian logic regression with the robust-g-prior
+# res4G = LogicRegr(formula = formula1, data = df,
+#                   family = "Gaussian", prior = "G", report.level = 0.5,
+#                   d = 15,cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2,
+#                   ncores = 32) # FIXME: returns NULL objects even if ran at full power
+# # Bayesian logic regression with the Jeffreys prior
+# res4J = LogicRegr(formula = formula1, data = df,
+#                   family = "Gaussian", prior = "J", report.level = 0.5,
+#                   d = 15, cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2,
+#                   ncores = 32)
+
+# # print the results for the robust g-prior
+# print(base::rbind(c("expressions","probabilities"),res4G$feat.stat))
+
+# #print the results for the Jeffreys prior
+# print(base::rbind(c("expressions","probabilities"),res4J$feat.stat))
+
+
+# # simulate Gaussian responses from a model with two-way interactions
+# # and an age effect which is an extension of S.4 of the paper
+# Xp = data.frame(X)
+# Xp$age = rpois(1000,lambda = 34)
+# Xp$Y=rnorm(n = 1000,mean = 1+0.7*(Xp$X1*Xp$X4) +
+#              0.89*(Xp$X8*Xp$X11)+1.43*(Xp$X5*Xp$X9) + 2*Xp$age, sd = 1)
+
+
+# teid  = sample.int(size =100,n = 1000,replace = F)
+# test  = Xp[teid,]
+# train = Xp[-teid,]
+
+
+
+# # specify the initial formula
+# formula1 = as.formula(paste("Y ~ 1 +",
+#                             paste0(colnames(test)[-c(51,52)],collapse = "+")))
+# # specify the link function
+# g = function(x) x
+# # specify the parameters of the custom estimator function
+# estimator.args = list(data = train, n = dim(train)[1],
+#                       m =stri_count_fixed(as.character(formula1)[3],"+"),k.max = 15)
+# # specify the parameters of gmjmcmc algorithm
+# gmjmcmc.params = list(allow_offsprings=1,mutation_rate = 250,
+#                       last.mutation=10000, max.tree.size = 5, Nvars.max =15,
+#                       p.allow.replace=0.9,p.allow.tree=0.01,p.nor=0.01,p.and = 0.9)
+# # specify some advenced parameters of mjmcmc
+# mjmcmc.params = list(max.N.glob=10, min.N.glob=5, max.N=3, min.N=1,
+#                      printable = F)
+# # run the inference of BLR with a non-binary covariate and predicions
+# res.alt = pinferunemjmcmc(n.cores = 30, report.level =  0.2,
+#                           num.mod.best = 100,simplify = T,predict = T,test.data = test,
+#                           link.function = g,
+#                           runemjmcmc.params = list(formula = formula1,latnames = c("I(age)"),
+#                                                    data = train,estimator = estimate.logic.lm.jef,
+#                                                    estimator.args =estimator.args,
+#                                                    recalc_margin = 249, save.beta = T,interact = T,outgraphs=F,
+#                                                    interact.param = gmjmcmc.params,
+#                                                    n.models = 10000,unique = F,max.cpu = 4,max.cpu.glob = 4,
+#                                                    create.table = F,create.hash = T,pseudo.paral = T,burn.in = 100,
+#                                                    print.freq = 1000,
+#                                                    advanced.param = mjmcmc.params))
+
+
+# print(base::rbind(c("expressions","probabilities"),res.alt$feat.stat))
+
+
+
+# print(sqrt(mean((res.alt$predictions-test$Y)^2)))
+# print(mean(abs((res.alt$predictions-test$Y))))
+
+
+# library(HDeconometrics)
+# ridge = ic.glmnet(x = train[,-51],y=train$Y,family = "gaussian",
+#                   alpha = 0)
+# predict.ridge = predict(ridge$glmnet,newx = as.matrix(test[,-51]),
+#                         type = "response")[,which(ridge$glmnet$lambda == ridge$lambda)]
+# print(sqrt(mean((predict.ridge-test$Y)^2)))
+# print(mean(abs((predict.ridge-test$Y))))
+
+
+# tmean = 1+2*test$age+0.7*(test$X1*test$X4) +
+#   0.89*(test$X8*test$X11)+1.43*(test$X5*test$X9)
+# print(sqrt(mean((tmean -test$Y)^2)))
+# print(mean(abs((tmean -test$Y))))