From 45a9a3bcd1392582b10bed3dd965f3cd7d240d0f Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 25 Apr 2024 06:24:10 +0200 Subject: [PATCH 1/2] Minor reformatting of DESCRIPTION, updated date --- DESCRIPTION | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 493cdc4..70f6a6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,8 +1,8 @@ Package: EMJMCMC Type: Package -Title: Evolutionary Mode Jumping Markov Chain Monte Carlo Expert Toolbox +Title: Evolutionary Mode Jumping Markov Chain Monte Carlo Expert Toolbox Version: 1.5.0 -Date: 2024-04-23 +Date: 2024-04-25 Authors@R: c( person("Aliaksandr", "Hubin", email = "aliaksah@math.uio.no", role = c("aut")), @@ -11,14 +11,11 @@ Authors@R: person("Florian", "Frommlet", role = c("ctb")) ) Maintainer: Waldir Leoncio -Description: Implementation of the Mode Jumping Markov Chain Monte Carlo algorithm from Hubin, A., Storvik, G. (2018) , Genetically Modified Mode Jumping Markov Chain Monte Carlo from Hubin, A., Storvik, G., & Frommlet, F. (2020) , Hubin, A., Storvik, G., & Frommlet, F. (2021) , and Hubin, A., Heinze, G., & De Bin, R. (2023) , and Reversible Genetically Modified Mode Jumping Markov Chain Monte Carlo from Hubin, A., Frommlet, F., & Storvik, G. (2021) . - which allow for estimating posterior model - probabilities and Bayesian model averaging across a wide set of Bayesian models including linear, generalized linear, generalized linear mixed, generalized nonlinear, generalized nonlinear mixed, and logic regression models. +Description: Implementation of the Mode Jumping Markov Chain Monte Carlo algorithm from Hubin, A., Storvik, G. (2018) , Genetically Modified Mode Jumping Markov Chain Monte Carlo from Hubin, A., Storvik, G., & Frommlet, F. (2020) , Hubin, A., Storvik, G., & Frommlet, F. (2021) , and Hubin, A., Heinze, G., & De Bin, R. (2023) , and Reversible Genetically Modified Mode Jumping Markov Chain Monte Carlo from Hubin, A., Frommlet, F., & Storvik, G. (2021) , which allow for estimating posterior model probabilities and Bayesian model averaging across a wide set of Bayesian models including linear, generalized linear, generalized linear mixed, generalized nonlinear, generalized nonlinear mixed, and logic regression models. License: GPL Depends: R (>= 3.4.1), bigmemory Imports: glmnet, biglm, hash, BAS, stringi, parallel, methods, speedglm, stats RoxygenNote: 7.3.1 -Suggests: - testthat (>= 3.0.0), bindata, clusterGeneration, reshape2 +Suggests: testthat (>= 3.0.0), bindata, clusterGeneration, reshape2 Config/testthat/edition: 3 Encoding: UTF-8 From dc7815e949c33d838dbbd1b26d1c60864e1b1d06 Mon Sep 17 00:00:00 2001 From: Waldir Leoncio Date: Thu, 25 Apr 2024 10:34:54 +0200 Subject: [PATCH 2/2] Replaced `T/F` with `TRUE/FALSE` --- R/EMJMCMC2016-method-forecast.R | 2 +- R/EMJMCMC2016-method-modejumping_mcmc.R | 18 +++++----- R/EMJMCMC2016-method-post_proceed_results.R | 4 +-- ...CMC2016-method-post_proceed_results_hash.R | 2 +- R/estimate.bigm.R | 2 +- R/estimate.gamma.cpen.R | 4 +-- R/estimate.logic.bern.R | 2 +- R/estimate.logic.bern.tCCH.R | 4 +-- R/estimate.logic.glm.R | 2 +- R/estimate.logic.lm.R | 2 +- R/estimate.logic.lm.jef.R | 2 +- R/estimate.logic.lm.tCCH.R | 4 +-- R/estimate.speedglm.R | 2 +- R/mcgmj.R | 4 +-- R/parall.gmj.R | 4 +-- R/pinferunemjmcmc.R | 2 +- R/runemjmcmc.R | 30 ++++++++-------- R/runpar.infer.R | 4 +-- R/simplify.formula.R | 2 +- R/simplifyposteriors.R | 2 +- R/simplifyposteriors.infer.R | 2 +- README.md | 4 +-- inst/examples/pinferunemjmcmc_example.R | 2 +- man/pinferunemjmcmc.Rd | 2 +- man/runemjmcmc.Rd | 36 +++++++++---------- tests/testthat/test-abalone-BGNLM.R | 4 +-- 26 files changed, 74 insertions(+), 74 deletions(-) diff --git a/R/EMJMCMC2016-method-forecast.R b/R/EMJMCMC2016-method-forecast.R index 484b18f..850dced 100644 --- a/R/EMJMCMC2016-method-forecast.R +++ b/R/EMJMCMC2016-method-forecast.R @@ -4,7 +4,7 @@ EMJMCMC2016$methods( res <- 0 for (i in ids) { - res <- res + statistics1[i, 15] * link.g(sum(statistics1[i, 16:nvars] * covariates, na.rm = T)) + res <- res + statistics1[i, 15] * link.g(sum(statistics1[i, 16:nvars] * covariates, na.rm = TRUE)) } return(list(forecast = res)) } diff --git a/R/EMJMCMC2016-method-modejumping_mcmc.R b/R/EMJMCMC2016-method-modejumping_mcmc.R index 4a43b66..ca89f24 100644 --- a/R/EMJMCMC2016-method-modejumping_mcmc.R +++ b/R/EMJMCMC2016-method-modejumping_mcmc.R @@ -124,7 +124,7 @@ EMJMCMC2016$methods( to.del <- to.del[-sample(x = Nvars, size = sample(x = Nvars - 1, size = 1), prob = p.add + p.epsilon)] } if (length(to.del) < Nvars - Nvars.max) { - tdl.id <- order(p.add, decreasing = T) + tdl.id <- order(p.add, decreasing = TRUE) to.del <- to.del[-tdl.id[1:Nvars.max]] } if (glob.model$print.freq > 0L) { @@ -186,7 +186,7 @@ EMJMCMC2016$methods( if (sjm + sjf + 1 <= max.tree.size) { if (allow_offsprings == 1) { - if (!grepl(father, mother, fixed = T) && !grepl(mother, father, fixed = T)) { + if (!grepl(father, mother, fixed = TRUE) && !grepl(mother, father, fixed = TRUE)) { proposal <- stri_paste(paste(ifelse(stats::runif(n = 1, min = 0, max = 1) < p.nor, "I((1-", "I(("), mother, sep = ""), paste(ifelse(stats::runif(n = 1, min = 0, max = 1) < p.nor, "(1-", "("), father, "))", sep = ""), sep = ifelse(stats::runif(n = 1, min = 0, max = 1) < p.and, ")&", ")|")) } else { if (max(sjm, sjf) > 1) { @@ -288,7 +288,7 @@ EMJMCMC2016$methods( # perform preliminary filtration here if (Nvars > Nvars.max || j == mutation_rate) { - # pool.cor.prob = T + # pool.cor.prob = TRUE # do the stuff here if (j == mutation_rate) { fparam.pool <<- unique(c(fparam.pool, filtered)) @@ -311,7 +311,7 @@ EMJMCMC2016$methods( to.del <- to.del[-sample(x = Nvars, size = sample(x = Nvars - 1, size = 1), prob = p.add + p.epsilon)] } if (length(to.del) < Nvars - Nvars.max) { - tdl.id <- order(p.add, decreasing = T) + tdl.id <- order(p.add, decreasing = TRUE) to.del <- to.del[-tdl.id[1:Nvars.max]] } if (glob.model$print.freq > 0L) { @@ -688,7 +688,7 @@ EMJMCMC2016$methods( to.del <- to.del[-sample(x = Nvars, size = sample(x = Nvars - 1, size = 1), prob = p.add + p.epsilon)] } if (length(to.del) < Nvars - Nvars.max) { - tdl.id <- order(p.add, decreasing = T) + tdl.id <- order(p.add, decreasing = TRUE) to.del <- to.del[-tdl.id[1:Nvars.max]] } if (glob.model$print.freq > 0L) { @@ -1410,7 +1410,7 @@ EMJMCMC2016$methods( } if (LocImprove == as.array(0)) { - thact <- sum(ratcand, -ratcur, -SA.forw$log.prob.cur, SA.forw$log.prob.fix, SA.back$log.prob.cur, -SA.back$log.prob.fix, na.rm = T) + thact <- sum(ratcand, -ratcur, -SA.forw$log.prob.cur, SA.forw$log.prob.fix, SA.back$log.prob.cur, -SA.back$log.prob.fix, na.rm = TRUE) if (log(stats::runif(n = 1, min = 0, max = 1)) <= thact) { ratcur <- ratcand mlikcur <- ratcand @@ -1439,7 +1439,7 @@ EMJMCMC2016$methods( modglob <- SA.back$modglob } } else { - thact <- sum(ratcand, -ratcur, -SA.forw$log.prob.cur, SA.forw$log.prob.fix, vect[[mod_id]]$log.mod.switchback.prob, -vect[[mod_id]]$log.mod.switch.prob, na.rm = T) + thact <- sum(ratcand, -ratcur, -SA.forw$log.prob.cur, SA.forw$log.prob.fix, vect[[mod_id]]$log.mod.switchback.prob, -vect[[mod_id]]$log.mod.switch.prob, na.rm = TRUE) if (log(stats::runif(n = 1, min = 0, max = 1)) <= thact) { ratcur <- ratcand mlikcur <- ratcand @@ -1503,7 +1503,7 @@ EMJMCMC2016$methods( # if(log(stats::runif(n = 1,min = 0,max = 1))<=(ratcand - ratcur - MTMCMC.forw$log.prob.cur + MTMCMC.forw$log.prob.fix + MTMCMC.back$log.prob.cur - MTMCMC.back$log.prob.fix)) - thact <- sum(ratcand, -ratcur, -MTMCMC.forw$log.prob.cur, MTMCMC.forw$log.prob.fix, MTMCMC.back$log.prob.cur, -MTMCMC.back$log.prob.fix, na.rm = T) + thact <- sum(ratcand, -ratcur, -MTMCMC.forw$log.prob.cur, MTMCMC.forw$log.prob.fix, MTMCMC.back$log.prob.cur, -MTMCMC.back$log.prob.fix, na.rm = TRUE) if (log(stats::runif(n = 1, min = 0, max = 1)) <= thact) { ratcur <- ratcand mlikcur <- ratcand @@ -1588,7 +1588,7 @@ EMJMCMC2016$methods( # if(log(stats::runif(n = 1,min = 0,max = 1))<=(ratcand - ratcur - GREEDY.forw$log.prob.cur + GREEDY.forw$log.prob.fix + GREEDY.back$log.prob.cur - GREEDY.back$log.prob.fix)) - thact <- sum(ratcand, -ratcur, -GREEDY.forw$log.prob.cur, GREEDY.forw$log.prob.fix, GREEDY.back$log.prob.cur, -GREEDY.back$log.prob.fix, na.rm = T) + thact <- sum(ratcand, -ratcur, -GREEDY.forw$log.prob.cur, GREEDY.forw$log.prob.fix, GREEDY.back$log.prob.cur, -GREEDY.back$log.prob.fix, na.rm = TRUE) if (log(stats::runif(n = 1, min = 0, max = 1)) <= thact) { ratcur <- ratcand mlikcur <- ratcand diff --git a/R/EMJMCMC2016-method-post_proceed_results.R b/R/EMJMCMC2016-method-post_proceed_results.R index 9265dfa..2524756 100644 --- a/R/EMJMCMC2016-method-post_proceed_results.R +++ b/R/EMJMCMC2016-method-post_proceed_results.R @@ -11,7 +11,7 @@ EMJMCMC2016$methods( if (nconsum > 0) { zyx[xyz] <- exp(statistics1[xyz, 1] - statistics1[moddee, 1]) / nconsum } else { - nnnorm <- sum(statistics1[xyz, 4], na.rm = T) + nnnorm <- sum(statistics1[xyz, 4], na.rm = TRUE) if (nnnorm == 0) { nnnorm <- 1 } @@ -28,7 +28,7 @@ EMJMCMC2016$methods( p.post <- (p.post + varcur * statistics1[i, 15]) } - if (!exists("p.post") || is.null(p.post) || sum(p.post, na.rm = T) == 0 || sum(p.post, na.rm = T) > Nvars) { + if (!exists("p.post") || is.null(p.post) || sum(p.post, na.rm = TRUE) == 0 || sum(p.post, na.rm = TRUE) > Nvars) { p.post <- array(data = 0.5, dim = Nvars) } diff --git a/R/EMJMCMC2016-method-post_proceed_results_hash.R b/R/EMJMCMC2016-method-post_proceed_results_hash.R index a037aed..31781e1 100644 --- a/R/EMJMCMC2016-method-post_proceed_results_hash.R +++ b/R/EMJMCMC2016-method-post_proceed_results_hash.R @@ -60,7 +60,7 @@ EMJMCMC2016$methods( p.post <- (p.post + varcur * zyx[i]) } - if (!exists("p.post") || is.null(p.post) || sum(p.post, na.rm = T) == 0 || sum(p.post, na.rm = T) > Nvars) { + if (!exists("p.post") || is.null(p.post) || sum(p.post, na.rm = TRUE) == 0 || sum(p.post, na.rm = TRUE) > Nvars) { p.post <- array(data = 0.5, dim = Nvars) } # hash::values(hashStat)[which((1:(lHash * linx)) %%linx == 4)]<-zyx diff --git a/R/estimate.bigm.R b/R/estimate.bigm.R index 36d5c21..a907564 100644 --- a/R/estimate.bigm.R +++ b/R/estimate.bigm.R @@ -21,7 +21,7 @@ estimate.bigm <- function(formula, data, family, prior, n, maxit = 2, chunksize = 1000000) # nice behaviour { out <- biglm::bigglm( - data = data, family = family, formula = formula, sandwich = F, + data = data, family = family, formula = formula, sandwich = FALSE, maxit = maxit, chunksize = chunksize ) if (prior == "AIC") { diff --git a/R/estimate.gamma.cpen.R b/R/estimate.gamma.cpen.R index 9ed058b..ed280ab 100644 --- a/R/estimate.gamma.cpen.R +++ b/R/estimate.gamma.cpen.R @@ -11,7 +11,7 @@ estimate.gamma.cpen <- function(formula, data, r = 1.0 / 1000.0, logn = log(1000 fobserved <- fmla.proc[1] fmla.proc[2] <- stringi::stri_replace_all(str = fmla.proc[2], fixed = " ", replacement = "") fmla.proc[2] <- stringi::stri_replace_all(str = fmla.proc[2], fixed = "\n", replacement = "") - fparam <- stringi::stri_split_fixed(str = fmla.proc[2], pattern = "+I", omit_empty = F)[[1]] + fparam <- stringi::stri_split_fixed(str = fmla.proc[2], pattern = "+I", omit_empty = FALSE)[[1]] sj <- (stringi::stri_count_fixed(str = fparam, pattern = "*")) sj <- sj + (stringi::stri_count_fixed(str = fparam, pattern = "+")) for (rel in relat) { @@ -48,7 +48,7 @@ estimate.gamma.cpen_2 = function(formula, data,r = 1.0/223.0,logn=log(223.0),rel fobserved = fmla.proc[1] fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") - fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+I",omit_empty = F)[[1]] + fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+I",omit_empty = FALSE)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "*")) sj=sj+(stri_count_fixed(str = fparam, pattern = "+")) for(rel in relat) diff --git a/R/estimate.logic.bern.R b/R/estimate.logic.bern.R index c2f2eb6..e465d8e 100644 --- a/R/estimate.logic.bern.R +++ b/R/estimate.logic.bern.R @@ -14,7 +14,7 @@ estimate.logic.bern = function(formula, data, family = stats::binomial(), n=1000 fobserved = fmla.proc[1] fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") - fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] + fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj=(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 diff --git a/R/estimate.logic.bern.tCCH.R b/R/estimate.logic.bern.tCCH.R index 61a5ac6..180d63a 100644 --- a/R/estimate.logic.bern.tCCH.R +++ b/R/estimate.logic.bern.tCCH.R @@ -3,7 +3,7 @@ estimate.logic.bern.tCCH = function(formula = NULL,y.id = 51, data, n=1000, m=50 #define the function estimating parameters of a given Bernoulli logic regression with robust g prior if(is.null(formula)) return(list(mlik = -10000 + stats::rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) - X = scale(stats::model.matrix(object = formula,data = data),center = T,scale = F) + X = scale(stats::model.matrix(object = formula,data = data),center = TRUE,scale = FALSE) X[,1] = 1 fmla.proc=as.character(formula)[2:3] out = stats::glm(formula = stats::as.formula(paste0(fmla.proc[1],"~X+0")),data=data,family = stats::binomial()) @@ -21,7 +21,7 @@ estimate.logic.bern.tCCH = function(formula = NULL,y.id = 51, data, n=1000, m=50 fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") - fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] + fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj=(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 diff --git a/R/estimate.logic.glm.R b/R/estimate.logic.glm.R index 86642bd..5b1849b 100644 --- a/R/estimate.logic.glm.R +++ b/R/estimate.logic.glm.R @@ -26,7 +26,7 @@ fmla.proc<-as.character(formula)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") -fparam <-stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] +fparam <-stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj<-(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj<-sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj<-sj+1 diff --git a/R/estimate.logic.lm.R b/R/estimate.logic.lm.R index 4942867..a98e3d0 100644 --- a/R/estimate.logic.lm.R +++ b/R/estimate.logic.lm.R @@ -25,7 +25,7 @@ fmla.proc<-as.character(formula)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") -fparam <-stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] +fparam <-stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj<-(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj<-sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj<-sj+1 diff --git a/R/estimate.logic.lm.jef.R b/R/estimate.logic.lm.jef.R index e6f0422..4fbcbd4 100644 --- a/R/estimate.logic.lm.jef.R +++ b/R/estimate.logic.lm.jef.R @@ -13,7 +13,7 @@ estimate.logic.lm.jef = function(formula= NULL, data, n, m, r = 1,k.max=21) fobserved = fmla.proc[1] fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") - fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] + fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj=(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 diff --git a/R/estimate.logic.lm.tCCH.R b/R/estimate.logic.lm.tCCH.R index d7cbdb2..90af8ed 100644 --- a/R/estimate.logic.lm.tCCH.R +++ b/R/estimate.logic.lm.tCCH.R @@ -15,7 +15,7 @@ estimate.logic.lm.tCCH = function(formula = NULL, data, n=1000, m=50, r = 1, p.a } fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") - fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] + fparam =stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] sj=(stringi::stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stringi::stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 @@ -23,7 +23,7 @@ estimate.logic.lm.tCCH = function(formula = NULL, data, n=1000, m=50, r = 1, p.a p.v = (n+1)/(p+1) R.2 = summary(out)$r.squared - mlik = (-0.5*p*log(p.v) -0.5*(n-1)*log(1-(1-1/p.v)*R.2) + log(beta((p.a+p)/2,p.b/2)) - log(beta(p.a/2,p.b/2)) + log(BAS::phi1(p.b/2,(n-1)/2,(p.a+p.b+p)/2,p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - BAS::hypergeometric1F1(p.b/2,(p.a+p.b)/2,p.s/2/p.v,log = T)+log(Jprior) + p*log(r)+n) + mlik = (-0.5*p*log(p.v) -0.5*(n-1)*log(1-(1-1/p.v)*R.2) + log(beta((p.a+p)/2,p.b/2)) - log(beta(p.a/2,p.b/2)) + log(BAS::phi1(p.b/2,(n-1)/2,(p.a+p.b+p)/2,p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - BAS::hypergeometric1F1(p.b/2,(p.a+p.b)/2,p.s/2/p.v,log = TRUE)+log(Jprior) + p*log(r)+n) if(mlik==-Inf||is.na(mlik)||is.nan(mlik)) mlik = -10000 return(list(mlik = mlik,waic = stats::AIC(out)-n , dic = stats::BIC(out)-n,summary.fixed =list(mean = stats::coef(out)))) diff --git a/R/estimate.speedglm.R b/R/estimate.speedglm.R index f823a16..f023410 100644 --- a/R/estimate.speedglm.R +++ b/R/estimate.speedglm.R @@ -20,7 +20,7 @@ estimate.speedglm <- function(formula, data, family, prior, logn) # weird behavi { # use dic and aic as bic and aic correspondingly X <- stats::model.matrix(object = formula,data = data) -out <- speedglm::speedglm.wfit(y = data[,1], X = X, intercept=FALSE, family=family,eigendec = T, method = "Cholesky") +out <- speedglm::speedglm.wfit(y = data[,1], X = X, intercept=FALSE, family=family,eigendec = TRUE, method = "Cholesky") if(prior == "AIC") return(list(mlik = -out$aic ,waic = -(out$deviance + 2*out$rank) , dic = -(out$RSS),summary.fixed =list(mean = out$coefficients))) if(prior=="BIC") diff --git a/R/mcgmj.R b/R/mcgmj.R index a4acda8..f13e1cf 100644 --- a/R/mcgmj.R +++ b/R/mcgmj.R @@ -1,6 +1,6 @@ # This file hosts small, internal functions that are used as auxiliary to other, # user-level functions. -mcgmjpar = function(X,FUN,mc.cores) parallel::mclapply(X= X,FUN = FUN,mc.preschedule = T,mc.cores = mc.cores) +mcgmjpar = function(X,FUN,mc.cores) parallel::mclapply(X= X,FUN = FUN,mc.preschedule = TRUE,mc.cores = mc.cores) -mcgmjpse = function(X,FUN,mc.cores) if(.Platform[[1]]=="unix" & length(mc.cores)>0 & mc.cores >=2) parallel::mclapply(X= X,FUN = FUN,mc.preschedule = T,mc.cores = mc.cores) else lapply(X,FUN) +mcgmjpse = function(X,FUN,mc.cores) if(.Platform[[1]]=="unix" & length(mc.cores)>0 & mc.cores >=2) parallel::mclapply(X= X,FUN = FUN,mc.preschedule = TRUE,mc.cores = mc.cores) else lapply(X,FUN) diff --git a/R/parall.gmj.R b/R/parall.gmj.R index fafd9c1..2ef338d 100644 --- a/R/parall.gmj.R +++ b/R/parall.gmj.R @@ -69,9 +69,9 @@ set.seed(as.integer(vect$cpu)) do.call(runemjmcmc, vect[1:vect$simlen]) vals<-hash::values(hashStat) fparam<-mySearch$fparam -cterm<-max(vals[1,],na.rm = T) +cterm<-max(vals[1,],na.rm = TRUE) ppp<-mySearch$post_proceed_results_hash(hashStat = hashStat) -post.populi<-sum(exp(hash::values(hashStat)[1,][1:vect$NM]-cterm),na.rm = T) +post.populi<-sum(exp(hash::values(hashStat)[1,][1:vect$NM]-cterm),na.rm = TRUE) hash::clear(hashStat) rm(vals) return(list(post.populi = post.populi, p.post = ppp$p.post, cterm = cterm, fparam = fparam)) diff --git a/R/pinferunemjmcmc.R b/R/pinferunemjmcmc.R index fa2b24d..ea59f97 100644 --- a/R/pinferunemjmcmc.R +++ b/R/pinferunemjmcmc.R @@ -39,7 +39,7 @@ pinferunemjmcmc = function( if(predict) { - runemjmcmc.params$save.beta = T + runemjmcmc.params$save.beta = TRUE if(length(test.data)==0) { diff --git a/R/runemjmcmc.R b/R/runemjmcmc.R index 9e85389..732b2b7 100644 --- a/R/runemjmcmc.R +++ b/R/runemjmcmc.R @@ -19,14 +19,14 @@ #' @param locstop.nd Defines whether local greedy optimizers stop at the first local optima found (locstop.nd=TRUE) or not (locstop.nd=FALSE) #' @param latent a latent random field to be addressed (to be specifically used when estimator = INLA, currently unsupported) #' @param create.table a Boolean variable defining if a big.memory based hash table (only available for MJMCMC with no feature engineering, allows data sharing between CPUs) or the original R hash data structure (available for all algorithm, does not allow data sharing between CPUs) is used for storing of the results -#' @param hash.length a parameter defining hash size for the big.memory based hash table as 2^hash.length (only relevant when create.table = T) -#' @param pseudo.paral defines if lapply or mclapply is used for local vectorized computations within the chain (can only be TRUE if create.table=T) -#' @param max.cpu maximal number of cpus in MJMCMC when within chain parallelization is allowed pseudo.paral = F -#' @param max.cpu.glob maximal number of cpus in global moves in MJMCMC when within chain parallelization is allowed pseudo.paral = F +#' @param hash.length a parameter defining hash size for the big.memory based hash table as 2^hash.length (only relevant when create.table = TRUE) +#' @param pseudo.paral defines if lapply or mclapply is used for local vectorized computations within the chain (can only be TRUE if create.table= TRUE) +#' @param max.cpu maximal number of cpus in MJMCMC when within chain parallelization is allowed pseudo.paral = FALSE +#' @param max.cpu.glob maximal number of cpus in global moves in MJMCMC when within chain parallelization is allowed pseudo.paral = FALSE #' @param presearch a boolean parameter defining if greedy forward and backward regression steps are used for initialization of initial approximations of marginal inclusion probabilities #' @param locstop a boolean parameter defining if the presearch is stopped at the first local extremum visited #' @param interact a boolean parameter defining if feature engineering is allowed in the search -#' @param relations a vector of allowed modification functions (only relevant when feature engineering is enabled by means of interact = T) +#' @param relations a vector of allowed modification functions (only relevant when feature engineering is enabled by means of interact = TRUE) #' @param relations.prob probability distribution of addressing modifications defined in relations parameter (both vectors must be of the same length) #' @param gen.prob a vector of probabilities for different operators in GMJMCMC or RGMJMCMC in the deep regression context (hence only relevant if \code{interact.param$allow_offsprings} is either 3 or 4) #' @param pool.cross a parameter defining the probability of addressing covariates from the current pool of covariates in GMJMCMC (covariates from the set of filtered covariates can be addressed with probability 1-pool.cross) (only relevant when interact = TRUE) @@ -65,25 +65,25 @@ #' } #' @references Hubin & Storvik (2016),Hubin, Storvik & Frommlet (2017), Hubin & Storvik (2017) #' @author Aliaksandr Hubin -#' @seealso global objects statistics1 (if create.table==T) or hashStat (if create.table==F) contain all marginal likelihoods and two other model selection criteria as well as all of the beta coefficients for the models (if save.beta==T) +#' @seealso global objects statistics1 (if create.table== TRUE) or hashStat (if create.table== FALSE) contain all marginal likelihoods and two other model selection criteria as well as all of the beta coefficients for the models (if save.beta== TRUE) #' @example /inst/examples/runemjmcmc_example.R #' @keywords methods models #' @export runemjmcmc<-function( formula, data, secondary = vector(mode="character", length=0), latnames="", estimator,estimator.args = "list",n.models,p.add.default = 1,p.add = 0.5, - unique = F,save.beta=F, locstop.nd = F, latent="",max.cpu=4,max.cpu.glob=2, - create.table=T, hash.length = 20, presearch=T, locstop =F ,pseudo.paral = F, - interact = F,deep.method =1, + unique = FALSE,save.beta= FALSE, locstop.nd = FALSE, latent="",max.cpu=4,max.cpu.glob=2, + create.table= TRUE, hash.length = 20, presearch= TRUE, locstop = FALSE ,pseudo.paral = FALSE, + interact = FALSE,deep.method =1, relations = c("","sin","cos","sigmoid","tanh","atan","erf"), relations.prob =c(0.4,0.1,0.1,0.1,0.1,0.1,0.1),gen.prob = c(1,10,5,1,0), - pool.cross = 0.9,p.epsilon = 0.0001, del.sigma = 0.5,pool.cor.prob = F, + pool.cross = 0.9,p.epsilon = 0.0001, del.sigma = 0.5,pool.cor.prob = FALSE, interact.param=list(allow_offsprings=2,mutation_rate = 100,last.mutation=2000, max.tree.size = 10000, Nvars.max = 100, p.allow.replace = 0.7, - p.allow.tree=0.1,p.nor=0.3,p.and = 0.7), prand = 0.01,keep.origin = T, - sup.large.n = 5000, recalc_margin = 2^10, create.hash=F,interact.order=1, + p.allow.tree=0.1,p.nor=0.3,p.and = 0.7), prand = 0.01,keep.origin = TRUE, + sup.large.n = 5000, recalc_margin = 2^10, create.hash= FALSE,interact.order=1, burn.in=1, eps = 10^6, max.time = 120,max.it = 25000, print.freq = 100, - outgraphs=F,advanced.param=NULL, + outgraphs= FALSE,advanced.param=NULL, distrib_of_neighbourhoods=t(array(data = c(7.6651604,16.773326,14.541629,12.839445,2.964227,13.048343,7.165434, 0.9936905,15.942490,11.040131,3.200394,15.349051,5.466632,14.676458, 1.5184551,9.285762,6.125034,3.627547,13.343413,2.923767,15.318774, 14.5295380,1.521960,11.804457,5.070282,6.934380,10.578945,12.455602, 6.0826035,2.453729,14.340435,14.863495,1.028312,12.685017,13.806295),dim = c(7,5))), distrib_of_proposals = c(76.91870,71.25264,87.68184,60.55921,15812.39852), quiet = TRUE) @@ -207,7 +207,7 @@ if(create.table) truth = ppp$p.post # make sure it is equal to Truth column from the article truth.m = ppp$m.post truth.prob = ppp$s.mass - ordering = sort(ppp$p.post,index.return=T) + ordering = sort(ppp$p.post,index.return= TRUE) } else if(create.hash) { @@ -215,7 +215,7 @@ else if(create.hash) truth = ppp$p.post # make sure it is equal to Truth column from the article truth.m = ppp$m.post truth.prob = ppp$s.mass - ordering = sort(ppp$p.post,index.return=T) + ordering = sort(ppp$p.post,index.return= TRUE) } if (!quiet) { message("Post Proceed Results") diff --git a/R/runpar.infer.R b/R/runpar.infer.R index e97d862..b893678 100644 --- a/R/runpar.infer.R +++ b/R/runpar.infer.R @@ -7,9 +7,9 @@ runpar.infer=function(vect) do.call(runemjmcmc, vect[1:(length(vect)-5)]) vals=hash::values(hashStat) fparam=mySearch$fparam - cterm=max(vals[1,],na.rm = T) + cterm=max(vals[1,],na.rm = TRUE) ppp=mySearch$post_proceed_results_hash(hashStat = hashStat) - post.populi=sum(exp(hash::values(hashStat)[1,][1:vect$NM]-cterm),na.rm = T) + post.populi=sum(exp(hash::values(hashStat)[1,][1:vect$NM]-cterm),na.rm = TRUE) betas = NULL mliks = NULL diff --git a/R/simplify.formula.R b/R/simplify.formula.R index f63c12f..f7e92c0 100644 --- a/R/simplify.formula.R +++ b/R/simplify.formula.R @@ -17,6 +17,6 @@ fmla.proc<-as.character(fmla)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stringi::stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") -fparam <- names[which(names %in% stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] )] +fparam <- names[which(names %in% stringi::stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = FALSE)[[1]] )] return(list(fparam = fparam,fobserved = fobserved)) } diff --git a/R/simplifyposteriors.R b/R/simplifyposteriors.R index 3870639..ac461a6 100644 --- a/R/simplifyposteriors.R +++ b/R/simplifyposteriors.R @@ -38,7 +38,7 @@ for(i in 1:length(posteriors[,1])) { res<-as.data.frame(t(hash::values(rhash)[c(3,4),])) res$V1<-as.numeric(as.character(res$V1)) res<-res[which(res$V1>thf),] -res<-res[order(res$V1, decreasing = T),] +res<-res[order(res$V1, decreasing = TRUE),] hash::clear(rhash) rm(rhash) res[which(res[,1]>1),1]<-1 diff --git a/R/simplifyposteriors.infer.R b/R/simplifyposteriors.infer.R index c78b8b1..09de862 100644 --- a/R/simplifyposteriors.infer.R +++ b/R/simplifyposteriors.infer.R @@ -39,7 +39,7 @@ simplifyposteriors.infer=function(X,posteriors,th=0.0000001,thf=0.5, resp) res=res[tokeep,] }else warning(paste0("No features with posteriors above ",thf,". Returning everything")) - res=res[order(res$V1, decreasing = T),] + res=res[order(res$V1, decreasing = TRUE),] hash::clear(rhash) rm(rhash) tokeep = which(res[,1]>1) diff --git a/README.md b/README.md index ba9fe7a..fba627e 100644 --- a/README.md +++ b/README.md @@ -42,11 +42,11 @@ omp_set_num_threads(1) * An expert one threaded call of (R)(G)MJMCMC is (see [runemjmcmc](https://rdrr.io/github/aliaksah/EMJMCMC2016/man/EMJMCMC.html) for details): ```R -runemjmcmc(formula = formula1,data = data.example,recalc_margin = 2^10,estimator =estimate.bas.lm,estimator.args = list(data = data.example,prior = 3, g = 96 ,n=96),save.beta = T,interact = T,relations = c("","sin","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.1,0.1,0.1,0.1,0.1,0.1),interact.param=list(allow_offsprings=2,mutation_rate = 100, max.tree.size = 200000, Nvars.max = 95,p.allow.replace=0.9,p.allow.tree=0.5,p.nor=0.3,p.and = 0.7),n.models = 50000,unique = T,max.cpu = 10,max.cpu.glob = 10,create.table = F,create.hash = T,pseudo.paral = F,burn.in = 100,print.freq = 100) +runemjmcmc(formula = formula1,data = data.example,recalc_margin = 2^10,estimator =estimate.bas.lm,estimator.args = list(data = data.example,prior = 3, g = 96 ,n=96),save.beta = TRUE,interact = TRUE,relations = c("","sin","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.1,0.1,0.1,0.1,0.1,0.1),interact.param=list(allow_offsprings=2,mutation_rate = 100, max.tree.size = 200000, Nvars.max = 95,p.allow.replace=0.9,p.allow.tree=0.5,p.nor=0.3,p.and = 0.7),n.models = 50000,unique = TRUE,max.cpu = 10,max.cpu.glob = 10,create.table = FALSE,create.hash = TRUE,pseudo.paral = FALSE,burn.in = 100,print.freq = 100) ``` * An expert parallel call of (R)(G)MCMC with predictions is (see [pinferunemjmcmc](https://rdrr.io/github/aliaksah/EMJMCMC2016/man/pinferunemjmcmc.html) for details): ```R -pinferunemjmcmc(n.cores =30, report.level = 0.8 , num.mod.best = NM,simplify = T, predict = T,test.data = as.data.frame(test),link.function = g, runemjmcmc.params =list(formula = formula1,data = data.example,gen.prob = c(1,1,1,1,0),estimator =estimate.bas.glm.cpen,estimator.args = list(data = data.example,prior = aic.prior(),family = binomial(),yid=31, logn = log(143),r=exp(-0.5)),recalc_margin = 95, save.beta = T,interact = T,relations = c("gauss","tanh","atan","sin"),relations.prob =c(0.1,0.1,0.1,0.1),interact.param=list(allow_offsprings=4,mutation_rate = 100,last.mutation=1000, max.tree.size = 6, Nvars.max = 20,p.allow.replace=0.5,p.allow.tree=0.4,p.nor=0.3,p.and = 0.9),n.models = 7000,unique =T,max.cpu = 4,max.cpu.glob = 4,create.table = F,create.hash = T,pseudo.paral = T,burn.in = 100,print.freq = 1000,advanced.param = list(max.N.glob=as.integer(10), min.N.glob=as.integer(5), max.N=as.integer(3), min.N=as.integer(1), printable = F))) +pinferunemjmcmc(n.cores =30, report.level = 0.8 , num.mod.best = NM,simplify = TRUE, predict = TRUE,test.data = as.data.frame(test),link.function = g, runemjmcmc.params =list(formula = formula1,data = data.example,gen.prob = c(1,1,1,1,0),estimator =estimate.bas.glm.cpen,estimator.args = list(data = data.example,prior = aic.prior(),family = binomial(),yid=31, logn = log(143),r=exp(-0.5)),recalc_margin = 95, save.beta = TRUE,interact = TRUE,relations = c("gauss","tanh","atan","sin"),relations.prob =c(0.1,0.1,0.1,0.1),interact.param=list(allow_offsprings=4,mutation_rate = 100,last.mutation=1000, max.tree.size = 6, Nvars.max = 20,p.allow.replace=0.5,p.allow.tree=0.4,p.nor=0.3,p.and = 0.9),n.models = 7000,unique = TRUE,max.cpu = 4,max.cpu.glob = 4,create.table = FALSE,create.hash = TRUE,pseudo.paral = TRUE,burn.in = 100,print.freq = 1000,advanced.param = list(max.N.glob=as.integer(10), min.N.glob=as.integer(5), max.N=as.integer(3), min.N=as.integer(1), printable = FALSE))) ``` * A simple call of parallel inference on Bayesian logic regression is (see [LogicRegr](https://rdrr.io/github/aliaksah/EMJMCMC2016/man/LogicRegr.html) for details): ```R diff --git a/inst/examples/pinferunemjmcmc_example.R b/inst/examples/pinferunemjmcmc_example.R index 2e4c14a..9f866b6 100644 --- a/inst/examples/pinferunemjmcmc_example.R +++ b/inst/examples/pinferunemjmcmc_example.R @@ -116,7 +116,7 @@ formula1 <- as.formula( for (jjjj in 1:10) { resw <- as.integer(res$predictions >= 0.1 * jjjj) - prec <- (1 - sum(abs(resw - test$X), na.rm = T) / length(resw)) + prec <- (1 - sum(abs(resw - test$X), na.rm = TRUE) / length(resw)) print(prec) # FNR ps <- which(test$X == 1) diff --git a/man/pinferunemjmcmc.Rd b/man/pinferunemjmcmc.Rd index ee6c51e..88040bb 100644 --- a/man/pinferunemjmcmc.Rd +++ b/man/pinferunemjmcmc.Rd @@ -178,7 +178,7 @@ formula1 <- as.formula( for (jjjj in 1:10) { resw <- as.integer(res$predictions >= 0.1 * jjjj) - prec <- (1 - sum(abs(resw - test$X), na.rm = T) / length(resw)) + prec <- (1 - sum(abs(resw - test$X), na.rm = TRUE) / length(resw)) print(prec) # FNR ps <- which(test$X == 1) diff --git a/man/runemjmcmc.Rd b/man/runemjmcmc.Rd index 2a54169..6699225 100644 --- a/man/runemjmcmc.Rd +++ b/man/runemjmcmc.Rd @@ -15,18 +15,18 @@ runemjmcmc( n.models, p.add.default = 1, p.add = 0.5, - unique = F, - save.beta = F, - locstop.nd = F, + unique = FALSE, + save.beta = FALSE, + locstop.nd = FALSE, latent = "", max.cpu = 4, max.cpu.glob = 2, - create.table = T, + create.table = TRUE, hash.length = 20, - presearch = T, - locstop = F, - pseudo.paral = F, - interact = F, + presearch = TRUE, + locstop = FALSE, + pseudo.paral = FALSE, + interact = FALSE, deep.method = 1, relations = c("", "sin", "cos", "sigmoid", "tanh", "atan", "erf"), relations.prob = c(0.4, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), @@ -34,22 +34,22 @@ runemjmcmc( pool.cross = 0.9, p.epsilon = 1e-04, del.sigma = 0.5, - pool.cor.prob = F, + pool.cor.prob = FALSE, interact.param = list(allow_offsprings = 2, mutation_rate = 100, last.mutation = 2000, max.tree.size = 10000, Nvars.max = 100, p.allow.replace = 0.7, p.allow.tree = 0.1, p.nor = 0.3, p.and = 0.7), prand = 0.01, - keep.origin = T, + keep.origin = TRUE, sup.large.n = 5000, recalc_margin = 2^10, - create.hash = F, + create.hash = FALSE, interact.order = 1, burn.in = 1, eps = 10^6, max.time = 120, max.it = 25000, print.freq = 100, - outgraphs = F, + outgraphs = FALSE, advanced.param = NULL, distrib_of_neighbourhoods = t(array(data = c(7.6651604, 16.773326, 14.541629, 12.839445, 2.964227, 13.048343, 7.165434, 0.9936905, 15.94249, 11.040131, 3.200394, @@ -88,25 +88,25 @@ runemjmcmc( \item{latent}{a latent random field to be addressed (to be specifically used when estimator = INLA, currently unsupported)} -\item{max.cpu}{maximal number of cpus in MJMCMC when within chain parallelization is allowed pseudo.paral = F} +\item{max.cpu}{maximal number of cpus in MJMCMC when within chain parallelization is allowed pseudo.paral = FALSE} -\item{max.cpu.glob}{maximal number of cpus in global moves in MJMCMC when within chain parallelization is allowed pseudo.paral = F} +\item{max.cpu.glob}{maximal number of cpus in global moves in MJMCMC when within chain parallelization is allowed pseudo.paral = FALSE} \item{create.table}{a Boolean variable defining if a big.memory based hash table (only available for MJMCMC with no feature engineering, allows data sharing between CPUs) or the original R hash data structure (available for all algorithm, does not allow data sharing between CPUs) is used for storing of the results} -\item{hash.length}{a parameter defining hash size for the big.memory based hash table as 2^hash.length (only relevant when create.table = T)} +\item{hash.length}{a parameter defining hash size for the big.memory based hash table as 2^hash.length (only relevant when create.table = TRUE)} \item{presearch}{a boolean parameter defining if greedy forward and backward regression steps are used for initialization of initial approximations of marginal inclusion probabilities} \item{locstop}{a boolean parameter defining if the presearch is stopped at the first local extremum visited} -\item{pseudo.paral}{defines if lapply or mclapply is used for local vectorized computations within the chain (can only be TRUE if create.table=T)} +\item{pseudo.paral}{defines if lapply or mclapply is used for local vectorized computations within the chain (can only be TRUE if create.table= TRUE)} \item{interact}{a boolean parameter defining if feature engineering is allowed in the search} \item{deep.method}{an integer in \{1, 2, 3, 4\} defining the method of estimating the alpha parameters of BGNLM, details to be found in https://www.jair.org/index.php/jair/article/view/13047} -\item{relations}{a vector of allowed modification functions (only relevant when feature engineering is enabled by means of interact = T)} +\item{relations}{a vector of allowed modification functions (only relevant when feature engineering is enabled by means of interact = TRUE)} \item{relations.prob}{probability distribution of addressing modifications defined in relations parameter (both vectors must be of the same length)} @@ -238,7 +238,7 @@ formula1 <- as.formula( Hubin & Storvik (2016),Hubin, Storvik & Frommlet (2017), Hubin & Storvik (2017) } \seealso{ -global objects statistics1 (if create.table==T) or hashStat (if create.table==F) contain all marginal likelihoods and two other model selection criteria as well as all of the beta coefficients for the models (if save.beta==T) +global objects statistics1 (if create.table== TRUE) or hashStat (if create.table== FALSE) contain all marginal likelihoods and two other model selection criteria as well as all of the beta coefficients for the models (if save.beta== TRUE) } \author{ Aliaksandr Hubin diff --git a/tests/testthat/test-abalone-BGNLM.R b/tests/testthat/test-abalone-BGNLM.R index fa5d59c..2effeb9 100644 --- a/tests/testthat/test-abalone-BGNLM.R +++ b/tests/testthat/test-abalone-BGNLM.R @@ -7,7 +7,7 @@ #***********************IMPORTANT****************************************************** -data.example = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/abalone.data",header = F) +data.example = read.csv("https://raw.githubusercontent.com/aliaksah/EMJMCMC2016/master/supplementaries/BGNLM/abalone%20age/abalone.data",header = FALSE) data.example$MS=as.integer(data.example$V1=="M") data.example$FS=as.integer(data.example$V1=="F") data.example$V1=data.example$V9 @@ -31,7 +31,7 @@ test_that("Input dataset is still roughly the same", { formula1 = as.formula(paste(colnames(test)[1],"~ 1 +",paste0(colnames(test)[-1],collapse = "+"))) #define the number or cpus -M = 2 +M = 2 #define the size of the simulated samples NM= 100 #define \k_{max} + 1 from the paper