diff --git a/DESCRIPTION b/DESCRIPTION index c856918..07cf344 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: EMJMCMC Type: Package Title: Flexible Bayesian Nonlinear Model Configuration -Version: 1.4.4.9028 +Version: 1.4.4.9029 Date: 2022-03-06 Authors@R: c( diff --git a/R/LogicRegr.R b/R/LogicRegr.R index 61d50fe..ff75099 100644 --- a/R/LogicRegr.R +++ b/R/LogicRegr.R @@ -40,13 +40,30 @@ #' @keywords methods models #' @example /inst/examples/LogicRegr_example.R #' @export -LogicRegr = function(formula, data, family = "Gaussian",prior = "J",report.level = 0.5, d = 20, cmax = 5, kmax = 20, p.and = 0.9, p.not = 0.05, p.surv = 0.1,ncores = -1, n.mods = 1000 ,advanced = list(presearch = T,locstop = F ,estimator = estimate.logic.bern.tCCH,estimator.args = list(data = data.example,n = 1000, m = 50,r=1),recalc_margin = 250, save.beta = F,interact = T,relations = c("","lgx2","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.0,0.0,0.0,0.0,0.0,0.0),interact.param=list(allow_offsprings=1,mutation_rate = 300,last.mutation = 5000, max.tree.size = 1, Nvars.max = 100,p.allow.replace=0.9,p.allow.tree=0.2,p.nor=0.2,p.and = 1),n.models = 10000,unique = T,max.cpu = 4,max.cpu.glob = 4,create.table = F,create.hash = T,pseudo.paral = T,burn.in = 50,outgraphs=F,print.freq = 1000,advanced.param = list( - max.N.glob=as.integer(10), - min.N.glob=as.integer(5), - max.N=as.integer(3), - min.N=as.integer(1), - printable = F))) -{ +LogicRegr = function( + formula, data, family = "Gaussian",prior = "J",report.level = 0.5, d = 20, + cmax = 5, kmax = 20, p.and = 0.9, p.not = 0.05, p.surv = 0.1,ncores = -1, + n.mods = 1000 ,advanced = list(presearch = T,locstop = F , + estimator = estimate.logic.bern.tCCH, + estimator.args = list(data = data.example,n = 1000, m = 50,r=1), + recalc_margin = 250, save.beta = FALSE, interact = TRUE, + relations = c("","lgx2","cos","sigmoid","tanh","atan","erf"), + relations.prob =c(0.4,0.0,0.0,0.0,0.0,0.0,0.0), + interact.param = list( + allow_offsprings=1,mutation_rate = 300,last.mutation = 5000, + max.tree.size = 1, Nvars.max = 100,p.allow.replace=0.9,p.allow.tree=0.2, + p.nor=0.2,p.and = 1),n.models = 10000,unique = TRUE,max.cpu = 4, + max.cpu.glob = 4,create.table = FALSE, create.hash = TRUE, + pseudo.paral = TRUE, burn.in = 50, outgraphs = FALSE, print.freq = 1000, + advanced.param = list( + max.N.glob=as.integer(10), + min.N.glob=as.integer(5), + max.N=as.integer(3), + min.N=as.integer(1), + printable = FALSE + ) + ) +) { data.example = data advanced$formula = formula advanced$data = data diff --git a/R/pinferunemjmcmc.R b/R/pinferunemjmcmc.R index 3dd951f..74b08b4 100644 --- a/R/pinferunemjmcmc.R +++ b/R/pinferunemjmcmc.R @@ -31,8 +31,11 @@ #' @example inst/examples/pinferunemjmcmc_example.R #' @keywords methods models #' @export -pinferunemjmcmc = function(n.cores = 4, mcgmj = mcgmjpse, report.level = 0.5, simplify = FALSE, num.mod.best = 1000, predict = FALSE, test.data = 1, link.function = function(z)z, runemjmcmc.params) -{ +pinferunemjmcmc = function( + n.cores = 4, mcgmj = mcgmjpse, report.level = 0.5, simplify = FALSE, + num.mod.best = 1000, predict = FALSE, test.data = 1, + link.function = function(z)z, runemjmcmc.params +) { if(predict) { diff --git a/man/LogicRegr.Rd b/man/LogicRegr.Rd index f2f57c1..c0bd88b 100644 --- a/man/LogicRegr.Rd +++ b/man/LogicRegr.Rd @@ -21,15 +21,16 @@ LogicRegr( n.mods = 1000, advanced = list(presearch = T, locstop = F, estimator = estimate.logic.bern.tCCH, estimator.args = list(data = data.example, n = 1000, m = 50, r = 1), recalc_margin = - 250, save.beta = F, interact = T, relations = c("", "lgx2", "cos", "sigmoid", "tanh", - "atan", "erf"), relations.prob = c(0.4, 0, 0, 0, 0, 0, 0), interact.param = + 250, save.beta = FALSE, interact = TRUE, relations = c("", "lgx2", "cos", "sigmoid", + "tanh", "atan", "erf"), relations.prob = c(0.4, 0, 0, 0, 0, 0, 0), interact.param = list(allow_offsprings = 1, mutation_rate = 300, last.mutation = 5000, max.tree.size = 1, Nvars.max = 100, p.allow.replace = 0.9, p.allow.tree = 0.2, p.nor = 0.2, p.and = 1), n.models = 10000, - unique = T, max.cpu = 4, max.cpu.glob = 4, create.table = - F, create.hash = T, pseudo.paral = T, burn.in = 50, outgraphs = F, print.freq = 1000, - advanced.param = list(max.N.glob = as.integer(10), min.N.glob = as.integer(5), max.N - = as.integer(3), min.N = as.integer(1), printable = F)) + unique = TRUE, max.cpu = 4, max.cpu.glob = 4, + create.table = FALSE, create.hash = TRUE, pseudo.paral = TRUE, burn.in = 50, + outgraphs = FALSE, print.freq = 1000, advanced.param = list(max.N.glob = + as.integer(10), min.N.glob = as.integer(5), max.N = as.integer(3), min.N = + as.integer(1), printable = FALSE)) ) } \arguments{ diff --git a/tests/testthat/test-BLR-tutorial.R b/tests/testthat/test-BLR-tutorial.R new file mode 100644 index 0000000..311b98b --- /dev/null +++ b/tests/testthat/test-BLR-tutorial.R @@ -0,0 +1,131 @@ +# #check the tutorial + +# library(EMJMCMC) +# library(clusterGeneration) +# library(bindata) +# library(ggplot2) + +# #***********************IMPORTANT****************************************************** +# # if a multithreaded backend openBLAS for matrix multiplications +# # is installed on your machine, please force it to use 1 thread explicitly +# # unless ncores in LogrRegr is reasonably small, in the latter case +# # you might want to experiment with the combinations of blas_set_num_threads and ncores +# library(RhpcBLASctl) +# blas_set_num_threads(1) +# omp_set_num_threads(1) +# #***********************IMPORTANT****************************************************** + +# TODO: create n_threads so ncores is not fixed at 32 in the LogicRegr() call below +# # set the seed +# set.seed(040590) +# # construct a correlation matrix for M = 50 variables +# M = 50 +# m = clusterGeneration::rcorrmatrix(M,alphad=2.5) +# # simulate 1000 binary variables with this correlation matrix +# X = bindata::rmvbin(1000, margprob = rep(0.5,M), bincorr = m) + +# # prepare the correlation matrix in the melted format +# melted_cormat = reshape2::melt(cor(X)) +# # plot the heat-map of the correlations +# ggplot2::ggplot(data = melted_cormat, +# ggplot2::aes(x=Var1, y=Var2, fill=value)) + +# ggplot2::geom_tile() + +# ggplot2::theme(axis.title.x = ggplot2::element_blank(), +# axis.title.y = ggplot2::element_blank(), +# axis.text.x = ggplot2::element_blank()) + +# # simulate Gaussian responses from a model with two-way interactions +# # which is considered in S.4 of the paper +# df = data.frame(X) +# df$Y=rnorm(n = 1000,mean = 1+1.43*(df$X5*df$X9)+ +# 0.89*(df$X8*df$X11)+0.7*(df$X1*df$X4),sd = 1) + +# help("LogicRegr") + + +# # specify the initial formula +# formula1 = as.formula(paste(colnames(df)[M+1],"~ 1 + ", +# paste0(colnames(df)[-c(M+1)],collapse = "+"))) +# # Bayesian logic regression with the robust-g-prior +# res4G = LogicRegr(formula = formula1, data = df, +# family = "Gaussian", prior = "G", report.level = 0.5, +# d = 15,cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2, +# ncores = 32) # FIXME: returns NULL objects even if ran at full power +# # Bayesian logic regression with the Jeffreys prior +# res4J = LogicRegr(formula = formula1, data = df, +# family = "Gaussian", prior = "J", report.level = 0.5, +# d = 15, cmax = 2,kmax = 15, p.and = 0.9, p.not = 0.1, p.surv = 0.2, +# ncores = 32) + +# # print the results for the robust g-prior +# print(base::rbind(c("expressions","probabilities"),res4G$feat.stat)) + +# #print the results for the Jeffreys prior +# print(base::rbind(c("expressions","probabilities"),res4J$feat.stat)) + + +# # simulate Gaussian responses from a model with two-way interactions +# # and an age effect which is an extension of S.4 of the paper +# Xp = data.frame(X) +# Xp$age = rpois(1000,lambda = 34) +# Xp$Y=rnorm(n = 1000,mean = 1+0.7*(Xp$X1*Xp$X4) + +# 0.89*(Xp$X8*Xp$X11)+1.43*(Xp$X5*Xp$X9) + 2*Xp$age, sd = 1) + + +# teid = sample.int(size =100,n = 1000,replace = F) +# test = Xp[teid,] +# train = Xp[-teid,] + + + +# # specify the initial formula +# formula1 = as.formula(paste("Y ~ 1 +", +# paste0(colnames(test)[-c(51,52)],collapse = "+"))) +# # specify the link function +# g = function(x) x +# # specify the parameters of the custom estimator function +# estimator.args = list(data = train, n = dim(train)[1], +# m =stri_count_fixed(as.character(formula1)[3],"+"),k.max = 15) +# # specify the parameters of gmjmcmc algorithm +# gmjmcmc.params = list(allow_offsprings=1,mutation_rate = 250, +# last.mutation=10000, max.tree.size = 5, Nvars.max =15, +# p.allow.replace=0.9,p.allow.tree=0.01,p.nor=0.01,p.and = 0.9) +# # specify some advenced parameters of mjmcmc +# mjmcmc.params = list(max.N.glob=10, min.N.glob=5, max.N=3, min.N=1, +# printable = F) +# # run the inference of BLR with a non-binary covariate and predicions +# res.alt = pinferunemjmcmc(n.cores = 30, report.level = 0.2, +# num.mod.best = 100,simplify = T,predict = T,test.data = test, +# link.function = g, +# runemjmcmc.params = list(formula = formula1,latnames = c("I(age)"), +# data = train,estimator = estimate.logic.lm.jef, +# estimator.args =estimator.args, +# recalc_margin = 249, save.beta = T,interact = T,outgraphs=F, +# interact.param = gmjmcmc.params, +# n.models = 10000,unique = F,max.cpu = 4,max.cpu.glob = 4, +# create.table = F,create.hash = T,pseudo.paral = T,burn.in = 100, +# print.freq = 1000, +# advanced.param = mjmcmc.params)) + + +# print(base::rbind(c("expressions","probabilities"),res.alt$feat.stat)) + + + +# print(sqrt(mean((res.alt$predictions-test$Y)^2))) +# print(mean(abs((res.alt$predictions-test$Y)))) + + +# library(HDeconometrics) +# ridge = ic.glmnet(x = train[,-51],y=train$Y,family = "gaussian", +# alpha = 0) +# predict.ridge = predict(ridge$glmnet,newx = as.matrix(test[,-51]), +# type = "response")[,which(ridge$glmnet$lambda == ridge$lambda)] +# print(sqrt(mean((predict.ridge-test$Y)^2))) +# print(mean(abs((predict.ridge-test$Y)))) + + +# tmean = 1+2*test$age+0.7*(test$X1*test$X4) + +# 0.89*(test$X8*test$X11)+1.43*(test$X5*test$X9) +# print(sqrt(mean((tmean -test$Y)^2))) +# print(mean(abs((tmean -test$Y))))