diff --git a/.gitignore b/.gitignore
index c452bc5..78acf5a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,5 +10,8 @@ _pkgdown.yml
 src/*.o
 src/*.so
 vignettes/modelFitting_cache
+vignettes/modelFitting_files
 vignettes/randomEffects_cache
 vignettes/randomEffects_files
+vignettes/factorModels_files
+vignettes/factorModels_cache
diff --git a/NEWS.md b/NEWS.md
index 26c2e72..e56ffed 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -4,7 +4,7 @@ spOccupancy Version 0.3.0 contains numerous substantial updates that provide new
 
 + Additional functionality for fitting spatial and non-spatial multi-species occupancy models with residual species correlations (i.e., joint species distribution models with imperfect detection). See documentation for `lfMsPGOcc()` and `sfMsPGOcc()`. We also included the functions `lfJSDM()` and `sfJSDM()` which are more typical joint species distribution models that fail to explicitly account for imperfect detection.
 + All single-species and multi-species models allow for unstructured random intercepts in both the occurrence and detection portions of the occupancy model. Prior to this version, random intercepts were not supported in the occurrence portion of spatially-explicit models. 
-+ All `predict()` functions now include the argument `type`, which allows for prediction of detection probability (`type = 'detection'`) at a set of covariate values as well as predictions of occurrence (`type = 'occupancy'`). 
++ `predict()` functions for single-species and multi-species models now include the argument `type`, which allows for prediction of detection probability (`type = 'detection'`) at a set of covariate values as well as predictions of occurrence (`type = 'occupancy'`). 
 + All models are substantially faster than version 0.2.1. We improved performance by implementing a change in how we sample the latent Polya-Gamma variables in the detection component of the model. This results in substantial increases in speed for models where the number of replicates varies across sites. We additionally updated how non-spatial random effects were sampled, which also contributes to improved computational performance.
 + All model fitting functions now include the object `like.samples` in the resulting model object, which contains model likelihood values needed for calculation of WAIC. This leads to much shorter run times for `waicOcc()` compared to previous versions.
 + All `fitted.*()` functions now return both the fitted values and the estimated detection probability samples from a fitted `spOccupancy` model. 
diff --git a/R/PGOcc.R b/R/PGOcc.R
index 31619e5..3d4a2c4 100644
--- a/R/PGOcc.R
+++ b/R/PGOcc.R
@@ -396,10 +396,10 @@ PGOcc <- function(occ.formula, det.formula, data, inits, priors,
         }
     }   else {
         if (verbose) {	    
-          message("No prior specified for sigma.sq.psi.ig.\nSetting prior shape to 2 and prior scale to 1\n")
+          message("No prior specified for sigma.sq.psi.ig.\nSetting prior shape to 0.1 and prior scale to 0.1\n")
         }
-        sigma.sq.psi.a <- rep(2, p.occ.re)
-        sigma.sq.psi.b <- rep(1, p.occ.re)
+        sigma.sq.psi.a <- rep(0.1, p.occ.re)
+        sigma.sq.psi.b <- rep(0.1, p.occ.re)
       }
     } else {
       sigma.sq.psi.a <- 0
diff --git a/R/lfJSDM.R b/R/lfJSDM.R
index 7acd9e0..71ee4fc 100644
--- a/R/lfJSDM.R
+++ b/R/lfJSDM.R
@@ -572,7 +572,9 @@ lfJSDM <- function(formula, data, inits, priors,
     colnames(out$beta.star.samples) <- beta.star.names
     out$re.level.names <- re.level.names
   }
+  loadings.names <- paste(rep(sp.names, times = n.factors), rep(1:n.factors, each = N), sep = '-')
   out$lambda.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$lambda.samples))))
+  colnames(out$lambda.samples) <- loadings.names
 
   # Return things back in the original order. 
   out$z.samples <- do.call(abind, lapply(out.tmp, function(a) array(a$z.samples, 
diff --git a/R/lfMsPGOcc.R b/R/lfMsPGOcc.R
index bfd1fca..34d32cf 100644
--- a/R/lfMsPGOcc.R
+++ b/R/lfMsPGOcc.R
@@ -1043,7 +1043,9 @@ lfMsPGOcc <- function(occ.formula, det.formula, data, inits, priors,
     colnames(out$beta.star.samples) <- beta.star.names
     out$re.level.names <- re.level.names
   }
+  loadings.names <- paste(rep(sp.names, times = n.factors), rep(1:n.factors, each = N), sep = '-')
   out$lambda.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$lambda.samples))))
+  colnames(out$lambda.samples) <- loadings.names
 
   # Return things back in the original order. 
   out$z.samples <- do.call(abind, lapply(out.tmp, function(a) array(a$z.samples, 
diff --git a/R/sfJSDM.R b/R/sfJSDM.R
index 42d56a8..9b1a8a8 100644
--- a/R/sfJSDM.R
+++ b/R/sfJSDM.R
@@ -270,8 +270,6 @@ sfJSDM <- function(formula, data, inits, priors,
   }
   # phi -----------------------------
   coords.D <- iDist(coords)
-  lower.unif <- 3 / max(coords.D)
-  upper.unif <- 3 / sort(unique(c(coords.D)))[2]
   # Get distance matrix which is used if priors are not specified
   if ("phi.unif" %in% names(priors)) {
     if (!is.list(priors$phi.unif) | length(priors$phi.unif) != 2) {
@@ -811,7 +809,9 @@ sfJSDM <- function(formula, data, inits, priors,
       colnames(out$beta.star.samples) <- beta.star.names
       out$re.level.names <- re.level.names
     }
+    loadings.names <- paste(rep(sp.names, times = n.factors), rep(1:n.factors, each = N), sep = '-')
     out$lambda.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$lambda.samples))))
+    colnames(out$lambda.samples) <- loadings.names
     out$theta.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$theta.samples))))
     if (cov.model != 'matern') {
       theta.names <- paste(rep(c('phi'), each = q), 1:q, sep = '-')
diff --git a/R/sfMsPGOcc.R b/R/sfMsPGOcc.R
index 60252e6..0197504 100644
--- a/R/sfMsPGOcc.R
+++ b/R/sfMsPGOcc.R
@@ -496,8 +496,6 @@ sfMsPGOcc <- function(occ.formula, det.formula, data, inits, priors,
 
   # phi -----------------------------
   coords.D <- iDist(coords)
-  lower.unif <- 3 / max(coords.D)
-  upper.unif <- 3 / sort(unique(c(coords.D)))[2]
   # Get distance matrix which is used if priors are not specified
   if ("phi.unif" %in% names(priors)) {
     if (!is.list(priors$phi.unif) | length(priors$phi.unif) != 2) {
@@ -1298,7 +1296,9 @@ sfMsPGOcc <- function(occ.formula, det.formula, data, inits, priors,
       colnames(out$beta.star.samples) <- beta.star.names
       out$re.level.names <- re.level.names
     }
+    loadings.names <- paste(rep(sp.names, times = n.factors), rep(1:n.factors, each = N), sep = '-')
     out$lambda.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$lambda.samples))))
+    colnames(out$lambda.samples) <- loadings.names
     out$theta.samples <- mcmc(do.call(rbind, lapply(out.tmp, function(a) t(a$theta.samples))))
     if (cov.model != 'matern') {
       theta.names <- paste(rep(c('phi'), each = q), 1:q, sep = '-')
diff --git a/R/simMsOcc.R b/R/simMsOcc.R
index e00c3e4..44ff32a 100644
--- a/R/simMsOcc.R
+++ b/R/simMsOcc.R
@@ -286,7 +286,7 @@ simMsOcc <- function(J.x, J.y, n.rep, N, beta, alpha, psi.RE = list(),
   psi <- matrix(NA, nrow = N, ncol = J)
   z <- matrix(NA, nrow = N, ncol = J)
   for (i in 1:N) {
-    if (sp) {
+    if (sp | factor.model) {
       if (length(psi.RE) > 0) {
         psi[i, ] <- logit.inv(X %*% as.matrix(beta[i, ]) + w.star[i, ] + beta.star.sites[i, ])
       } else {
diff --git a/R/spIntPGOcc.R b/R/spIntPGOcc.R
index 69a7863..dc8bfcf 100644
--- a/R/spIntPGOcc.R
+++ b/R/spIntPGOcc.R
@@ -405,10 +405,10 @@ spIntPGOcc <- function(occ.formula, det.formula, data, inits, priors,
       sigma.sq.b <- priors$sigma.sq.ig[2]
     } else {
       if (verbose) {
-        message("No prior specified for sigma.sq.ig.\nSetting the shape and scale parameters to 2.\n")
+        message("No prior specified for sigma.sq.ig.\nSetting the shape parameter to 2 and scale parameter to 1.\n")
       }
       sigma.sq.a <- 2
-      sigma.sq.b <- 2
+      sigma.sq.b <- 1
     }
     # nu -----------------------------
     if (cov.model == 'matern') {
diff --git a/R/spMsPGOcc.R b/R/spMsPGOcc.R
index f0d3104..6792ee3 100644
--- a/R/spMsPGOcc.R
+++ b/R/spMsPGOcc.R
@@ -491,8 +491,6 @@ spMsPGOcc <- function(occ.formula, det.formula, data, inits, priors,
 
   # phi -----------------------------
   coords.D <- iDist(coords)
-  lower.unif <- 3 / max(coords.D)
-  upper.unif <- 3 / sort(unique(c(coords.D)))[2]
   # Get distance matrix which is used if priors are not specified
   if ("phi.unif" %in% names(priors)) {
     if (!is.list(priors$phi.unif) | length(priors$phi.unif) != 2) {
@@ -545,10 +543,10 @@ spMsPGOcc <- function(occ.formula, det.formula, data, inits, priors,
     }
   } else {
     if (verbose) {
-      message("No prior specified for sigma.sq.ig.\nSetting the shape and scale parameters to 2.\n")
+      message("No prior specified for sigma.sq.ig.\nSetting the shape parameter to 2 and scale parameter to 1.\n")
     }
     sigma.sq.a <- rep(2, N)
-    sigma.sq.b <- rep(2, N)
+    sigma.sq.b <- rep(1, N)
   }
 
   # nu -----------------------------
diff --git a/R/spPGOcc.R b/R/spPGOcc.R
index 5ef7cc9..b204d79 100644
--- a/R/spPGOcc.R
+++ b/R/spPGOcc.R
@@ -400,8 +400,6 @@ spPGOcc <- function(occ.formula, det.formula, data, inits, priors,
   # phi -----------------------------
   # Get distance matrix which is used if priors are not specified
   coords.D <- iDist(coords)
-  lower.unif <- 3 / max(coords.D)
-  upper.unif <- 3 / sort(unique(c(coords.D)))[2]
   if ("phi.unif" %in% names(priors)) {
     if (!is.vector(priors$phi.unif) | !is.atomic(priors$phi.unif) | length(priors$phi.unif) != 2) {
       stop("error: phi.unif must be a vector of length 2 with elements corresponding to phi's lower and upper bounds")
@@ -424,10 +422,10 @@ spPGOcc <- function(occ.formula, det.formula, data, inits, priors,
     sigma.sq.b <- priors$sigma.sq.ig[2]
   } else {
     if (verbose) {
-      message("No prior specified for sigma.sq.ig.\nSetting the shape and scale parameters to 2.\n")
+      message("No prior specified for sigma.sq.ig.\nSetting the shape parameter to 2 and scale parameter to 1.\n")
     }
     sigma.sq.a <- 2
-    sigma.sq.b <- 2
+    sigma.sq.b <- 1
   }
   # nu -----------------------------
   if (cov.model == 'matern') {
@@ -478,10 +476,10 @@ spPGOcc <- function(occ.formula, det.formula, data, inits, priors,
       }
   }   else {
       if (verbose) {	    
-        message("No prior specified for sigma.sq.psi.ig.\nSetting prior shape to 2 and prior scale to 1\n")
+        message("No prior specified for sigma.sq.psi.ig.\nSetting prior shape to 0.1 and prior scale to 0.1\n")
       }
-      sigma.sq.psi.a <- rep(2, p.occ.re)
-      sigma.sq.psi.b <- rep(1, p.occ.re)
+      sigma.sq.psi.a <- rep(0.1, p.occ.re)
+      sigma.sq.psi.b <- rep(0.1, p.occ.re)
     }
   } else {
     sigma.sq.psi.a <- 0
diff --git a/README.Rmd b/README.Rmd
index 3f70c57..53ad50a 100644
--- a/README.Rmd
+++ b/README.Rmd
@@ -23,7 +23,7 @@ cat(
 )
 ```
 
-spOccupancy fits single-species, multi-species, and integrated spatial occupancy models using Markov Chain Monte Carlo (MCMC). Models are fit using P&oacute;ly-Gamma data augmentation. Spatial models are fit using either Gaussian processes or Nearest Neighbor Gaussian Processes (NNGP) for large spatial datasets. The package provides functionality for data integration of multiple single-species occupancy data sets using a joint likelihood framework. Below we provide a very brief introduction to some of the package's functionality, and illustrate just one of the model fitting funcitons. For more information, see the resources referenced at the bottom of this page. 
+spOccupancy fits single-species, multi-species, and integrated spatial occupancy models using Markov Chain Monte Carlo (MCMC). Models are fit using P&oacute;ly-Gamma data augmentation. Spatial models are fit using either Gaussian processes or Nearest Neighbor Gaussian Processes (NNGP) for large spatial datasets. The package provides functionality for data integration of multiple single-species occupancy data sets using a joint likelihood framework. For multi-species models, spOccupancy provides functions to account for residual species correlations in a joint species distribution model framework while accounting for imperfect detection. Below we provide a very brief introduction to some of the package's functionality, and illustrate just one of the model fitting funcitons. For more information, see the resources referenced at the bottom of this page. 
 
 ## Installation
 
@@ -35,19 +35,23 @@ install.packages("spOccupancy")
 
 ## Functionality
 
-|`spOccupancy` Function  | Description                                                   |
-|---------------- |----------------------------------------------------------------------|
-|`PGOcc`          | Single-species occupancy model                                       |
-|`spPGOcc`        | Single-species spatial occupancy model                               |
-|`intPGOcc`       | Single-species occupancy model with multiple data sources            |
-|`spIntPGOcc`     | Single-species spatial occupancy model with multiple data sources    |
-|`msPGOcc`        | Multi-species occupancy model                                        |
-|`spMsPGOcc`      | Multi-species spatial occupancy model                                |
-|`ppcOcc`         | Posterior predictive check using Bayesian p-values                   | 
-|`waicOcc`        | Compute Widely Applicable Information Criterion (WAIC)               |
-|`simOcc`         | Simulate single-species occupancy data                               |
-|`simMsOcc`       | Simulate multi-species occupancy data                                |
-|`simIntOcc`      | Simulate single-species occupancy data from multiple data sources
+|`spOccupancy` Function  | Description                                                     |
+|---------------- |------------------------------------------------------------------------|
+|`PGOcc()`          | Single-species occupancy model                                       |
+|`spPGOcc()`        | Single-species spatial occupancy model                               |
+|`intPGOcc()`       | Single-species occupancy model with multiple data sources            |
+|`spIntPGOcc()`     | Single-species spatial occupancy model with multiple data sources    |
+|`msPGOcc()`        | Multi-species occupancy model                                        |
+|`spMsPGOcc()`      | Multi-species spatial occupancy model                                |
+|`lfJSDM()`         | Joint species distribution model without imperfect detection         |
+|`sfJSDM()`         | Spatial joint species distribution model without imperfect detection |
+|`lfMsPGOcc()`      | Multi-species occupancy model with species correlations              |
+|`sfMsPGOcc()`      | Multi-species spatial occupancy model with species correlations      |
+|`ppcOcc()`         | Posterior predictive check using Bayesian p-values                   | 
+|`waicOcc()`        | Compute Widely Applicable Information Criterion (WAIC)               |
+|`simOcc()`         | Simulate single-species occupancy data                               |
+|`simMsOcc()`       | Simulate multi-species occupancy data                                |
+|`simIntOcc()`      | Simulate single-species occupancy data from multiple data sources    |
 
 ## Example usage
 
@@ -128,9 +132,11 @@ out.pred <- predict(out, X.0, coords.0, verbose = FALSE)
 
 ## Learn more
 
-The `vignette("modelFitting")` provides a more detailed description and tutorial of all functions in `spOccupancy`. For full statistical details on the MCMC samplers used in `spOccupancy`, see `vignette("mcmcSamplers")`. In addition, see [our recent paper](https://arxiv.org/abs/2111.12163) that describes the package in more detail (Doser et al. 2021). 
+The `vignette("modelFitting")` provides a more detailed description and tutorial of the core functions in `spOccupancy`. For full statistical details on the MCMC samplers for core functions in `spOccupancy`, see `vignette("mcmcSamplers")`. In addition, see [our recent paper](https://arxiv.org/abs/2111.12163) that describes the package in more detail (Doser et al. 2021). For a detailed description and tutorial of joint species distribution models in `spOccupancy` that account for residual species correlations, see `vignette("factorModels")`, as well as `vignette("mcmcFactorModels")` for full statistical details.
 
 ## References
 
-Doser, J. W., Finley, A. O., Kéry, M., and Zipkin, E. F. (2021a). spOccupancy: An R package for single-species, multi-species, and integrated spatial occupancy models. arXiv preprint arxiv:2111.12163.
+Doser, J. W., Finley, A. O., Kéry, M., and Zipkin, E. F. (2021). spOccupancy: An R package for single-species, multi-species, and integrated spatial occupancy models. arXiv preprint arxiv:2111.12163.
+
+Doser, J. W., Finley, A. O., Banerjee, S. (2022) Joint species distribution models with imperfect detection for high-dimensional spatial data. In prep.
 
diff --git a/README.md b/README.md
index f92b3c8..d6cbc34 100644
--- a/README.md
+++ b/README.md
@@ -13,10 +13,13 @@ using Póly-Gamma data augmentation. Spatial models are fit using either
 Gaussian processes or Nearest Neighbor Gaussian Processes (NNGP) for
 large spatial datasets. The package provides functionality for data
 integration of multiple single-species occupancy data sets using a joint
-likelihood framework. Below we provide a very brief introduction to some
-of the package’s functionality, and illustrate just one of the model
-fitting funcitons. For more information, see the resources referenced at
-the bottom of this page.
+likelihood framework. For multi-species models, spOccupancy provides
+functions to account for residual species correlations in a joint
+species distribution model framework while accounting for imperfect
+detection. Below we provide a very brief introduction to some of the
+package’s functionality, and illustrate just one of the model fitting
+funcitons. For more information, see the resources referenced at the
+bottom of this page.
 
 ## Installation
 
@@ -29,19 +32,23 @@ install.packages("spOccupancy")
 
 ## Functionality
 
-| `spOccupancy` Function | Description                                                       |
-| ---------------------- | ----------------------------------------------------------------- |
-| `PGOcc`                | Single-species occupancy model                                    |
-| `spPGOcc`              | Single-species spatial occupancy model                            |
-| `intPGOcc`             | Single-species occupancy model with multiple data sources         |
-| `spIntPGOcc`           | Single-species spatial occupancy model with multiple data sources |
-| `msPGOcc`              | Multi-species occupancy model                                     |
-| `spMsPGOcc`            | Multi-species spatial occupancy model                             |
-| `ppcOcc`               | Posterior predictive check using Bayesian p-values                |
-| `waicOcc`              | Compute Widely Applicable Information Criterion (WAIC)            |
-| `simOcc`               | Simulate single-species occupancy data                            |
-| `simMsOcc`             | Simulate multi-species occupancy data                             |
-| `simIntOcc`            | Simulate single-species occupancy data from multiple data sources |
+| `spOccupancy` Function | Description                                                          |
+| ---------------------- | -------------------------------------------------------------------- |
+| `PGOcc()`              | Single-species occupancy model                                       |
+| `spPGOcc()`            | Single-species spatial occupancy model                               |
+| `intPGOcc()`           | Single-species occupancy model with multiple data sources            |
+| `spIntPGOcc()`         | Single-species spatial occupancy model with multiple data sources    |
+| `msPGOcc()`            | Multi-species occupancy model                                        |
+| `spMsPGOcc()`          | Multi-species spatial occupancy model                                |
+| `lfJSDM()`             | Joint species distribution model without imperfect detection         |
+| `sfJSDM()`             | Spatial joint species distribution model without imperfect detection |
+| `lfMsPGOcc()`          | Multi-species occupancy model with species correlations              |
+| `sfMsPGOcc()`          | Multi-species spatial occupancy model with species correlations      |
+| `ppcOcc()`             | Posterior predictive check using Bayesian p-values                   |
+| `waicOcc()`            | Compute Widely Applicable Information Criterion (WAIC)               |
+| `simOcc()`             | Simulate single-species occupancy data                               |
+| `simMsOcc()`           | Simulate multi-species occupancy data                                |
+| `simIntOcc()`          | Simulate single-species occupancy data from multiple data sources    |
 
 ## Example usage
 
@@ -120,25 +127,25 @@ summary(out)
 #> Thinning Rate: 4
 #> Number of Chains: 3
 #> Total Posterior Samples: 6000
-#> Run Time (min): 1.9316
+#> Run Time (min): 1.4212
 #> 
 #> Occurrence (logit scale): 
-#>                          Mean     SD    2.5%     50%   97.5%   Rhat      ESS
-#> (Intercept)            4.1679 0.6201  3.0796  4.1129  5.5253 1.0281 247.0541
-#> scale(Elevation)      -0.5374 0.2535 -1.0717 -0.5319 -0.0500 1.0330 558.6439
-#> I(scale(Elevation)^2) -1.2247 0.2262 -1.7324 -1.2051 -0.8373 1.0363 251.1803
+#>                          Mean     SD    2.5%     50%   97.5%   Rhat  ESS
+#> (Intercept)            3.9332 0.5585  2.9850  3.8780  5.2357 1.0589  257
+#> scale(Elevation)      -0.5069 0.2110 -0.9323 -0.5057 -0.1032 1.0189 1119
+#> I(scale(Elevation)^2) -1.1406 0.2057 -1.6026 -1.1218 -0.7826 1.0402  386
 #> 
 #> Detection (logit scale): 
-#>                    Mean     SD    2.5%     50%  97.5%   Rhat      ESS
-#> (Intercept)      0.6628 0.1127  0.4479  0.6615 0.8826 1.0003 5010.936
-#> scale(day)       0.2900 0.0712  0.1511  0.2903 0.4301 1.0016 6000.000
-#> scale(tod)      -0.0312 0.0702 -0.1680 -0.0310 0.1059 1.0011 6000.000
-#> I(scale(day)^2) -0.0750 0.0858 -0.2424 -0.0762 0.0958 0.9999 6000.000
+#>                    Mean     SD    2.5%     50%  97.5%   Rhat  ESS
+#> (Intercept)      0.6624 0.1146  0.4389  0.6606 0.8859 1.0006 5572
+#> scale(day)       0.2922 0.0706  0.1564  0.2915 0.4327 0.9999 6000
+#> scale(tod)      -0.0326 0.0704 -0.1689 -0.0327 0.1038 1.0034 6000
+#> I(scale(day)^2) -0.0746 0.0863 -0.2430 -0.0769 0.0968 1.0007 6000
 #> 
 #> Spatial Covariance: 
-#>            Mean     SD   2.5%    50%  97.5%   Rhat     ESS
-#> sigma.sq 1.7061 1.2382 0.3851 1.3643 5.0950 1.0312 96.9992
-#> phi      0.0064 0.0071 0.0006 0.0032 0.0268 1.1779 43.3338
+#>            Mean     SD   2.5%    50%  97.5%   Rhat ESS
+#> sigma.sq 0.9695 0.8124 0.2045 0.7195 3.2131 1.1136 114
+#> phi      0.0067 0.0073 0.0006 0.0036 0.0263 1.4115  59
 ```
 
 ### Posterior predictive check
@@ -164,7 +171,7 @@ summary(ppc.out)
 #> Number of Chains: 3
 #> Total Posterior Samples: 6000
 #> 
-#> Bayesian p-value:  0.4026667 
+#> Bayesian p-value:  0.4913 
 #> Fit statistic:  freeman-tukey
 ```
 
@@ -177,7 +184,7 @@ due to Monte Carlo error your results will differ slightly).
 ``` r
 waicOcc(out)
 #>       elpd         pD       WAIC 
-#> -676.57521   25.20569 1403.56181
+#> -683.42274   19.50487 1405.85522
 ```
 
 Alternatively, we can perform k-fold cross-validation (CV) directly in
@@ -190,7 +197,7 @@ value of this CV score.
 
 ``` r
 out$k.fold.deviance
-#> [1] 1496.396
+#> [1] 1496.671
 ```
 
 ### Prediction
@@ -212,14 +219,21 @@ out.pred <- predict(out, X.0, coords.0, verbose = FALSE)
 ## Learn more
 
 The `vignette("modelFitting")` provides a more detailed description and
-tutorial of all functions in `spOccupancy`. For full statistical details
-on the MCMC samplers used in `spOccupancy`, see
+tutorial of the core functions in `spOccupancy`. For full statistical
+details on the MCMC samplers for core functions in `spOccupancy`, see
 `vignette("mcmcSamplers")`. In addition, see [our recent
 paper](https://arxiv.org/abs/2111.12163) that describes the package in
-more detail (Doser et al. 2021).
+more detail (Doser et al. 2021). For a detailed description and tutorial
+of joint species distribution models in `spOccupancy` that account for
+residual species correlations, see `vignette("factorModels")`, as well
+as `vignette("mcmcFactorModels")` for full statistical details.
 
 ## References
 
-Doser, J. W., Finley, A. O., Kéry, M., and Zipkin, E. F. (2021a).
+Doser, J. W., Finley, A. O., Kéry, M., and Zipkin, E. F. (2021).
 spOccupancy: An R package for single-species, multi-species, and
 integrated spatial occupancy models. arXiv preprint arxiv:2111.12163.
+
+Doser, J. W., Finley, A. O., Banerjee, S. (2022) Joint species
+distribution models with imperfect detection for high-dimensional
+spatial data. In prep.
diff --git a/man/fitted.lfJSDM.Rd b/man/fitted.lfJSDM.Rd
index 1ef363c..ab9d355 100644
--- a/man/fitted.lfJSDM.Rd
+++ b/man/fitted.lfJSDM.Rd
@@ -24,8 +24,8 @@
 \value{
   A list comprised of: 
 
-  \item{z.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
-  \item{psi.samples}{A three-dimensional numeric array of probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{z.samples}{A three-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, and sites.}
+  \item{psi.samples}{A three-dimensional numeric array of probability values. Array dimensions correspond to MCMC samples, species, and sites.}
 }
 
 \keyword{model}
diff --git a/man/fitted.lfMsPGOcc.Rd b/man/fitted.lfMsPGOcc.Rd
index 82af887..2e4d042 100644
--- a/man/fitted.lfMsPGOcc.Rd
+++ b/man/fitted.lfMsPGOcc.Rd
@@ -24,7 +24,7 @@
 \value{
   A list comprised of: 
   \item{y.rep.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
-  \item{p.samples}{A three-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{p.samples}{A four-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
 }
 
 \keyword{model}
diff --git a/man/fitted.msPGOcc.Rd b/man/fitted.msPGOcc.Rd
index 1174748..00e783f 100644
--- a/man/fitted.msPGOcc.Rd
+++ b/man/fitted.msPGOcc.Rd
@@ -25,7 +25,7 @@
   A list comprised of: 
 
   \item{y.rep.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
-  \item{p.samples}{A three-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{p.samples}{A four-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
 }
 
 \keyword{model}
diff --git a/man/fitted.sfJSDM.Rd b/man/fitted.sfJSDM.Rd
index 3d52f68..ae31e6c 100644
--- a/man/fitted.sfJSDM.Rd
+++ b/man/fitted.sfJSDM.Rd
@@ -24,8 +24,8 @@
 \value{
   A list comprised of: 
 
-  \item{z.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
-  \item{psi.samples}{A three-dimensional numeric array of probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{z.samples}{A three-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, and sites.}
+  \item{psi.samples}{A three-dimensional numeric array of probability values. Array dimensions correspond to MCMC samples, species, and sites.}
 }
 
 \keyword{model}
diff --git a/man/fitted.sfMsPGOcc.Rd b/man/fitted.sfMsPGOcc.Rd
index a4665eb..0268fde 100644
--- a/man/fitted.sfMsPGOcc.Rd
+++ b/man/fitted.sfMsPGOcc.Rd
@@ -26,7 +26,7 @@
 
   \item{y.rep.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
 
-  \item{p.samples}{A three-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{p.samples}{A four-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
 }
 
 \keyword{model}
diff --git a/man/fitted.spMsPGOcc.Rd b/man/fitted.spMsPGOcc.Rd
index 9697482..69bfe47 100644
--- a/man/fitted.spMsPGOcc.Rd
+++ b/man/fitted.spMsPGOcc.Rd
@@ -25,7 +25,7 @@
   A list comprised of: 
 
   \item{y.rep.samples}{A four-dimensional numeric array of fitted values for use in Goodness of Fit assessments. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
-  \item{p.samples}{A three-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, sites, and replicates.}
+  \item{p.samples}{A four-dimensional numeric array of detection probability values. Array dimensions correspond to MCMC samples, species, sites, and replicates.}
 }
 
 \keyword{model}
diff --git a/man/hbef2015.rda.Rd b/man/hbef2015.rda.Rd
index cef847f..4b64272 100644
--- a/man/hbef2015.rda.Rd
+++ b/man/hbef2015.rda.Rd
@@ -29,10 +29,9 @@ data(hbef2015)
 	(Accessed 2021-09-07)}
 
 \references{
-  Doser, J.W., Leuenberger, W., Sillett, T.S., Hallworth, M.T., Zipkin, E.F. 
-  Integrated community occupancy models:  A framework to assess occurrence 
-  and biodiversity dynamics using multiple data sources.
-  https://arxiv.org/abs/2109.01894
+ Doser, J. W., Leuenberger, W., Sillett, T. S., Hallworth, M. T. & Zipkin, E. F. (2022). 
+ Integrated community occupancy models: A framework to assess occurrence and biodiversity 
+ dynamics using multiple data sources. Methods in Ecology and Evolution, 00, 1– 14. \doi{10.1111/2041-210X.13811}
 }
 
 \format{
diff --git a/man/neon2015.rda.Rd b/man/neon2015.rda.Rd
index fcc39ac..7bc19f4 100644
--- a/man/neon2015.rda.Rd
+++ b/man/neon2015.rda.Rd
@@ -33,10 +33,9 @@ data(neon2015)
 	RELEASE-2021 (DP1.10003.001). https://doi.org/10.48443/s730-dy13. 
 	Dataset accessed from https://data.neonscience.org on October 10, 2021}
 \references{
-  Doser, J.W., Leuenberger, W., Sillett, T.S., Hallworth, M.T., Zipkin, E.F. 
-  Integrated community occupancy models:  A framework to assess occurrence 
-  and biodiversity dynamics usingmultiple data sources.
-  https://arxiv.org/abs/2109.01894
+  Doser, J. W., Leuenberger, W., Sillett, T. S., Hallworth, M. T. & Zipkin, E. F. (2022). 
+  Integrated community occupancy models: A framework to assess occurrence and biodiversity 
+  dynamics using multiple data sources. Methods in Ecology and Evolution, 00, 1– 14. \doi{10.1111/2041-210X.13811}
 
   Barnett, D. T., Duffy, P. A., Schimel, D. S., Krauss, R. E., Irvine, K. M., 
   Davis, F. W.,Gross, J. E., Azuaje, E. I., Thorpe, A. S., Gudex-Cross, D., et al. (2019). 
diff --git a/man/predict.PGOcc.Rd b/man/predict.PGOcc.Rd
index 96a8c18..68317be 100644
--- a/man/predict.PGOcc.Rd
+++ b/man/predict.PGOcc.Rd
@@ -23,6 +23,10 @@
   \item{...}{currently no additional arguments}
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects. 
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -58,7 +62,7 @@ p.occ <- length(beta)
 alpha <- c(0, 1)
 p.det <- length(alpha)
 dat <- simOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, beta = beta, alpha = alpha,
-	      sp = FALSE)
+              sp = FALSE)
 # Split into fitting and prediction data set
 pred.indx <- sample(1:J, round(J * .25), replace = FALSE)
 y <- dat$y[-pred.indx, ]
@@ -74,32 +78,32 @@ occ.covs <- X[, 2, drop = FALSE]
 colnames(occ.covs) <- c('occ.cov')
 det.covs <- list(det.cov = X.p[, , 2])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs)
+                  occ.covs = occ.covs,
+                  det.covs = det.covs)
 # Priors
 prior.list <- list(beta.normal = list(mean = rep(0, p.occ),
-				      var = rep(2.72, p.occ)),
-		   alpha.normal = list(mean = rep(0, p.det),
-			               var = rep(2.72, p.det)))
+                                      var = rep(2.72, p.occ)),
+                   alpha.normal = list(mean = rep(0, p.det),
+                                       var = rep(2.72, p.det)))
 # Initial values
 inits.list <- list(alpha = rep(0, p.det),
-		      beta = rep(0, p.occ),
-		      z = apply(y, 1, max, na.rm = TRUE))
+                   beta = rep(0, p.occ),
+                   z = apply(y, 1, max, na.rm = TRUE))
 
 n.samples <- 5000
 n.report <- 1000
 
 out <- PGOcc(occ.formula = ~ occ.cov, 
-	     det.formula = ~ det.cov,
-	     data = data.list, 
-	     inits = inits.list,
-	     n.samples = n.samples,
-	     priors = prior.list,
-	     n.omp.threads = 1,
-	     verbose = TRUE,
-	     n.report = n.report, 
-	     n.burn = 4000, 
-	     n.thin = 1)
+             det.formula = ~ det.cov,
+             data = data.list, 
+             inits = inits.list,
+             n.samples = n.samples,
+             priors = prior.list,
+             n.omp.threads = 1,
+             verbose = TRUE,
+             n.report = n.report, 
+             n.burn = 4000, 
+             n.thin = 1)
 
 summary(out)
 
diff --git a/man/predict.intPGOcc.Rd b/man/predict.intPGOcc.Rd
index 666d802..4ad0e33 100644
--- a/man/predict.intPGOcc.Rd
+++ b/man/predict.intPGOcc.Rd
@@ -66,7 +66,7 @@ p.det <- sum(p.det.long)
 
 # Simulate occupancy data. 
 dat <- simIntOcc(n.data = n.data, J.x = J.x, J.y = J.y, J.obs = J.obs, 
-		 n.rep = n.rep, beta = beta, alpha = alpha, sp = FALSE)
+                 n.rep = n.rep, beta = beta, alpha = alpha, sp = FALSE)
 
 y <- dat$y
 X <- dat$X.obs
@@ -83,34 +83,34 @@ det.covs[[2]] <- list(det.cov.2.1 = X.p[[2]][, , 2])
 det.covs[[3]] <- list(det.cov.3.1 = X.p[[3]][, , 2]) 
 det.covs[[4]] <- list(det.cov.4.1 = X.p[[4]][, , 2]) 
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs, 
-		  sites = sites)
+                  occ.covs = occ.covs,
+                  det.covs = det.covs, 
+                  sites = sites)
 
 J <- length(dat$z.obs)
 # Initial values
 inits.list <- list(alpha = list(0, 0, 0, 0), 
-		      beta = 0, 
-		      z = rep(1, J))
+                   beta = 0, 
+                   z = rep(1, J))
 # Priors
 prior.list <- list(beta.normal = list(mean = 0, var = 2.72), 
-		   alpha.normal = list(mean = list(0, 0, 0, 0), 
-			               var = list(2.72, 2.72, 2.72, 2.72)))
+                   alpha.normal = list(mean = list(0, 0, 0, 0), 
+                                       var = list(2.72, 2.72, 2.72, 2.72)))
 n.samples <- 5000
 out <- intPGOcc(occ.formula = ~ occ.cov, 
                 det.formula = list(f.1 = ~ det.cov.1.1, 
-		                   f.2 = ~ det.cov.2.1, 
-		                   f.3 = ~ det.cov.3.1, 
-		                   f.4 = ~ det.cov.4.1), 
-		data = data.list,
-		inits = inits.list,
-		n.samples = n.samples, 
-		priors = prior.list, 
-		n.omp.threads = 1, 
-		verbose = TRUE, 
-		n.report = 1000, 
-		n.burn = 4000, 
-		n.thin = 1)
+                                   f.2 = ~ det.cov.2.1, 
+                                   f.3 = ~ det.cov.3.1, 
+                                   f.4 = ~ det.cov.4.1), 
+                data = data.list,
+                inits = inits.list,
+                n.samples = n.samples, 
+                priors = prior.list, 
+                n.omp.threads = 1, 
+                verbose = TRUE, 
+                n.report = 1000, 
+                n.burn = 4000, 
+                n.thin = 1)
 
 summary(out)
 
diff --git a/man/predict.lfJSDM.Rd b/man/predict.lfJSDM.Rd
index 0585b7f..0edb30b 100644
--- a/man/predict.lfJSDM.Rd
+++ b/man/predict.lfJSDM.Rd
@@ -8,7 +8,7 @@
 
 \usage{
 \method{predict}{lfJSDM}(object, X.0, coords.0, 
-	                    ignore.RE = FALSE, ...)
+        ignore.RE = FALSE, ...)
 }
 
 \arguments{
@@ -20,12 +20,16 @@
   \item{coords.0}{the spatial coordinates corresponding to \code{X.0}. Note that \code{spOccupancy} assumes coordinates are specified 
   in a projected coordinate system.}
 
-  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If FALSE, random effects will be ignored and prediction will only use the fixed effects. If TRUE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
+  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If TRUE, random effects will be ignored and prediction will only use the fixed effects. If FALSE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
 
   \item{...}{currently no additional arguments}
 
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -75,7 +79,7 @@ for (i in 1:p.det) {
 
 n.factors <- 3
 dat <- simMsOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, N = N, beta = beta, alpha = alpha,
-		sp = FALSE, factor.model = TRUE, n.factors = n.factors)
+                sp = FALSE, factor.model = TRUE, n.factors = n.factors)
 n.samples <- 5000
 # Split into fitting and prediction data set
 pred.indx <- sample(1:J, round(J * .25), replace = FALSE)
@@ -93,32 +97,32 @@ coords.0 <- dat$coords[pred.indx, ]
 covs <- X[, 2, drop = FALSE]
 colnames(covs) <- c('occ.cov')
 data.list <- list(y = y, 
-		  covs = covs,
+                  covs = covs,
                   coords = coords)
 
 # Occupancy initial values
 prior.list <- list(beta.comm.normal = list(mean = 0, var = 2.72), 
-		   tau.sq.beta.ig = list(a = 0.1, b = 0.1))
+                   tau.sq.beta.ig = list(a = 0.1, b = 0.1))
 # Initial values
 lambda.inits <- matrix(0, N, n.factors)
 diag(lambda.inits) <- 1
 lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
 inits.list <- list(alpha.comm = 0, 
-		      beta.comm = 0, 
-		      beta = 0, 
-		      tau.sq.beta = 1, 
-		      lambda = lambda.inits)
+                   beta.comm = 0, 
+                   beta = 0, 
+                   tau.sq.beta = 1, 
+                   lambda = lambda.inits)
 
 out <- lfJSDM(formula = ~ occ.cov, 
-	      data = data.list, 
-	      inits = inits.list, 
-	      n.samples = n.samples, 
-	      n.factors = 3, 
-	      priors = prior.list, 
+              data = data.list, 
+              inits = inits.list, 
+              n.samples = n.samples, 
+              n.factors = 3, 
+              priors = prior.list, 
               n.omp.threads = 1, 
-	      verbose = TRUE, 
-	      n.report = 1000, 
-	      n.burn = 4000)
+              verbose = TRUE, 
+              n.report = 1000, 
+              n.burn = 4000)
 
 summary(out)
 
diff --git a/man/predict.lfMsPGOcc.Rd b/man/predict.lfMsPGOcc.Rd
index ca056fa..4c90605 100644
--- a/man/predict.lfMsPGOcc.Rd
+++ b/man/predict.lfMsPGOcc.Rd
@@ -8,7 +8,7 @@
 
 \usage{
 \method{predict}{lfMsPGOcc}(object, X.0, coords.0, 
-	                    ignore.RE = FALSE, type = 'occupancy', ...)
+        ignore.RE = FALSE, type = 'occupancy', ...)
 }
 
 \arguments{
@@ -20,7 +20,7 @@
   \item{coords.0}{the spatial coordinates corresponding to \code{X.0}. Note that \code{spOccupancy} assumes coordinates are specified 
   in a projected coordinate system.}
 
-  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If FALSE, random effects will be ignored and prediction will only use the fixed effects. If TRUE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
+  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If TRUE, random effects will be ignored and prediction will only use the fixed effects. If FALSE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
 
   \item{...}{currently no additional arguments}
 
@@ -28,6 +28,10 @@
 
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -82,7 +86,7 @@ for (i in 1:p.det) {
 
 n.factors <- 3
 dat <- simMsOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, N = N, beta = beta, alpha = alpha,
-		sp = FALSE, factor.model = TRUE, n.factors = n.factors)
+                sp = FALSE, factor.model = TRUE, n.factors = n.factors)
 n.samples <- 5000
 # Split into fitting and prediction data set
 pred.indx <- sample(1:J, round(J * .25), replace = FALSE)
@@ -101,42 +105,41 @@ coords.0 <- dat$coords[pred.indx, ]
 occ.covs <- X[, 2, drop = FALSE]
 colnames(occ.covs) <- c('occ.cov')
 det.covs <- list(det.cov.1 = X.p[, , 2], 
-		 det.cov.2 = X.p[, , 3]
-		 )
+                 det.cov.2 = X.p[, , 3])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs, 
+                  occ.covs = occ.covs,
+                  det.covs = det.covs, 
                   coords = coords)
 
 # Occupancy initial values
 prior.list <- list(beta.comm.normal = list(mean = 0, var = 2.72), 
-		   alpha.comm.normal = list(mean = 0, var = 2.72), 
-		   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
-		   tau.sq.alpha.ig = list(a = 0.1, b = 0.1))
+                   alpha.comm.normal = list(mean = 0, var = 2.72), 
+                   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
+                   tau.sq.alpha.ig = list(a = 0.1, b = 0.1))
 # Initial values
 lambda.inits <- matrix(0, N, n.factors)
 diag(lambda.inits) <- 1
 lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
 inits.list <- list(alpha.comm = 0, 
-		      beta.comm = 0, 
-		      beta = 0, 
-		      alpha = 0,
-		      tau.sq.beta = 1, 
-		      tau.sq.alpha = 1, 
-		      lambda = lambda.inits, 
-		      z = apply(y, c(1, 2), max, na.rm = TRUE))
+                   beta.comm = 0, 
+                   beta = 0, 
+                   alpha = 0,
+                   tau.sq.beta = 1, 
+                   tau.sq.alpha = 1, 
+                   lambda = lambda.inits, 
+                   z = apply(y, c(1, 2), max, na.rm = TRUE))
 
 out <- lfMsPGOcc(occ.formula = ~ occ.cov, 
-	         det.formula = ~ det.cov.1 + det.cov.2, 
-	         data = data.list, 
-	         inits = inits.list, 
-	         n.samples = n.samples, 
-		 n.factors = 3, 
-	         priors = prior.list, 
+                 det.formula = ~ det.cov.1 + det.cov.2, 
+                 data = data.list, 
+                 inits = inits.list, 
+                 n.samples = n.samples, 
+                 n.factors = 3, 
+                 priors = prior.list, 
                  n.omp.threads = 1, 
-	         verbose = TRUE, 
-	         n.report = 1000, 
-	         n.burn = 4000)
+                 verbose = TRUE, 
+                 n.report = 1000, 
+                 n.burn = 4000)
 
 summary(out, level = 'community')
 
diff --git a/man/predict.msPGOcc.Rd b/man/predict.msPGOcc.Rd
index bdefcf2..9c775c4 100644
--- a/man/predict.msPGOcc.Rd
+++ b/man/predict.msPGOcc.Rd
@@ -16,7 +16,7 @@
 
   \item{X.0}{the design matrix of covariates at the prediction locations. This should include a column of 1s for the intercept if an intercept is included in the model. If random effects are included in the occupancy (or detection if \code{type = 'detection'}) portion of the model, the levels of the random effects at the new locations should be included as a column in the design matrix. The ordering of the levels should match the ordering used to fit the data in \code{msPGOcc}. Columns should correspond to the order of how covariates were specified in the corresponding formula argument of \code{msPGOcc}. Column names of the random effects must match the name of the random effects, if specified in the corresponding formula argument of \code{msPGOcc}.}
 
-  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If FALSE, random effects will be ignored and prediction will only use the fixed effects. If TRUE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
+  \item{ignore.RE}{a logical value indicating whether to include unstructured random effects for prediction. If TRUE, random effects will be ignored and prediction will only use the fixed effects. If FALSE, random effects will be included in the prediction for both observed and unobserved levels of the random effect.}
 
   \item{...}{currently no additional arguments}
 
@@ -24,6 +24,10 @@
 
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -74,7 +78,7 @@ for (i in 1:p.det) {
 }
 
 dat <- simMsOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, N = N, beta = beta, alpha = alpha,
-		sp = FALSE)
+                sp = FALSE)
 n.samples <- 5000
 # Split into fitting and prediction data set
 pred.indx <- sample(1:J, round(J * .25), replace = FALSE)
@@ -90,36 +94,35 @@ psi.0 <- dat$psi[, pred.indx]
 occ.covs <- X[, 2, drop = FALSE]
 colnames(occ.covs) <- c('occ.cov')
 det.covs <- list(det.cov.1 = X.p[, , 2], 
-		 det.cov.2 = X.p[, , 3]
-		 )
+                 det.cov.2 = X.p[, , 3])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs)
+                  occ.covs = occ.covs,
+                  det.covs = det.covs)
 
 # Occupancy initial values
 prior.list <- list(beta.comm.normal = list(mean = 0, var = 2.72), 
-		   alpha.comm.normal = list(mean = 0, var = 2.72), 
-		   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
-		   tau.sq.alpha.ig = list(a = 0.1, b = 0.1))
+                   alpha.comm.normal = list(mean = 0, var = 2.72), 
+                   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
+                   tau.sq.alpha.ig = list(a = 0.1, b = 0.1))
 # Initial values
 inits.list <- list(alpha.comm = 0, 
-		      beta.comm = 0, 
-		      beta = 0, 
-		      alpha = 0,
-		      tau.sq.beta = 1, 
-		      tau.sq.alpha = 1, 
-		      z = apply(y, c(1, 2), max, na.rm = TRUE))
+                   beta.comm = 0, 
+                   beta = 0, 
+                   alpha = 0,
+                   tau.sq.beta = 1, 
+                   tau.sq.alpha = 1, 
+                   z = apply(y, c(1, 2), max, na.rm = TRUE))
 
 out <- msPGOcc(occ.formula = ~ occ.cov, 
-	       det.formula = ~ det.cov.1 + det.cov.2, 
-	       data = data.list, 
-	       inits = inits.list, 
-	       n.samples = n.samples, 
-	       priors = prior.list, 
+               det.formula = ~ det.cov.1 + det.cov.2, 
+               data = data.list, 
+               inits = inits.list, 
+               n.samples = n.samples, 
+               priors = prior.list, 
                n.omp.threads = 1, 
-	       verbose = TRUE, 
-	       n.report = 1000, 
-	       n.burn = 4000)
+               verbose = TRUE, 
+               n.report = 1000, 
+               n.burn = 4000)
 
 summary(out, level = 'community')
 
diff --git a/man/predict.sfJSDM.Rd b/man/predict.sfJSDM.Rd
index 4e24b6c..1389f5f 100644
--- a/man/predict.sfJSDM.Rd
+++ b/man/predict.sfJSDM.Rd
@@ -8,7 +8,7 @@
 
 \usage{
 \method{predict}{sfJSDM}(object, X.0, coords.0, n.omp.threads = 1, verbose = TRUE, 
-                            n.report = 100, ignore.RE = FALSE, ...)
+        n.report = 100, ignore.RE = FALSE, ...)
 }
 
 \arguments{
@@ -33,14 +33,18 @@
   \item{n.report}{the interval to report sampling progress.}
 
   \item{ignore.RE}{a logical value indicating whether to include unstructured random 
-    effects for prediction. If FALSE, unstructured random effects will be ignored and 
-    prediction will only use the fixed effects and the spatial random effects. If TRUE, 
+    effects for prediction. If TRUE, unstructured random effects will be ignored and 
+    prediction will only use the fixed effects and the spatial random effects. If FALSE, 
     random effects will be included in the prediction for both observed and unobserved 
     levels of the unstructured random effects.}
 
   \item{...}{currently no additional arguments}
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -96,7 +100,7 @@ phi <- runif(n.factors, 3/1, 3/.4)
 sp <- TRUE
 
 dat <- simMsOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, N = N, beta = beta, alpha = alpha,
-		phi = phi, sigma.sq = sigma.sq, sp = TRUE, cov.model = 'exponential', 
+                phi = phi, sigma.sq = sigma.sq, sp = TRUE, cov.model = 'exponential', 
                 factor.model = TRUE, n.factors = n.factors)
 
 # Number of batches
@@ -122,24 +126,24 @@ psi.0 <- dat$psi[, pred.indx]
 covs <- X[, 2, drop = FALSE]
 colnames(covs) <- c('occ.cov')
 data.list <- list(y = y, 
-		  covs = covs,
-		  coords = coords)
+                  covs = covs,
+                  coords = coords)
 
 # Priors 
 prior.list <- list(beta.comm.normal = list(mean = 0, var = 2.72), 
-		   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
-		   phi.unif = list(a = 3/1, b = 3/.1), 
-		   sigma.sq.ig = list(a = 2, b = 2)) 
+                   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
+                   phi.unif = list(a = 3/1, b = 3/.1), 
+                   sigma.sq.ig = list(a = 2, b = 2)) 
 # Starting values
 lambda.inits <- matrix(0, N, n.factors)
 diag(lambda.inits) <- 1
 lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
 inits.list <- list(beta.comm = 0, 
-		   beta = 0, 
-		   tau.sq.beta = 1, 
-		   phi = 3 / .5, 
-		   sigma.sq = 2,
-		   lambda = lambda.inits)
+                   beta = 0, 
+                   tau.sq.beta = 1, 
+                   phi = 3 / .5, 
+                   sigma.sq = 2,
+                   lambda = lambda.inits)
 # Tuning
 tuning.list <- list(phi = 1) 
 
diff --git a/man/predict.sfMsPGOcc.Rd b/man/predict.sfMsPGOcc.Rd
index 923d813..ccd0f5f 100644
--- a/man/predict.sfMsPGOcc.Rd
+++ b/man/predict.sfMsPGOcc.Rd
@@ -8,7 +8,7 @@
 
 \usage{
 \method{predict}{sfMsPGOcc}(object, X.0, coords.0, n.omp.threads = 1, verbose = TRUE, 
-                            n.report = 100, ignore.RE = FALSE, type = 'occupancy', ...)
+        n.report = 100, ignore.RE = FALSE, type = 'occupancy', ...)
 }
 
 \arguments{
@@ -33,8 +33,8 @@
   \item{n.report}{the interval to report sampling progress.}
 
   \item{ignore.RE}{a logical value indicating whether to include unstructured random 
-    effects for prediction. If FALSE, unstructured random effects will be ignored and 
-    prediction will only use the fixed effects and the spatial random effects. If TRUE, 
+    effects for prediction. If TRUE, unstructured random effects will be ignored and 
+    prediction will only use the fixed effects and the spatial random effects. If FALSE, 
     random effects will be included in the prediction for both observed and unobserved 
     levels of the unstructured random effects.}
 
@@ -43,6 +43,10 @@
   \item{...}{currently no additional arguments}
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -105,7 +109,7 @@ phi <- runif(n.factors, 3/1, 3/.4)
 sp <- TRUE
 
 dat <- simMsOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, N = N, beta = beta, alpha = alpha,
-		phi = phi, sigma.sq = sigma.sq, sp = TRUE, cov.model = 'exponential', 
+                phi = phi, sigma.sq = sigma.sq, sp = TRUE, cov.model = 'exponential', 
                 factor.model = TRUE, n.factors = n.factors)
 
 # Number of batches
@@ -132,56 +136,55 @@ psi.0 <- dat$psi[, pred.indx]
 occ.covs <- X[, 2, drop = FALSE]
 colnames(occ.covs) <- c('occ.cov')
 det.covs <- list(det.cov.1 = X.p[, , 2], 
-		 det.cov.2 = X.p[, , 3]
-		 )
+                 det.cov.2 = X.p[, , 3])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs, 
-		  coords = coords)
+                  occ.covs = occ.covs,
+                  det.covs = det.covs, 
+                  coords = coords)
 
 # Priors 
 prior.list <- list(beta.comm.normal = list(mean = 0, var = 2.72), 
-		   alpha.comm.normal = list(mean = 0, var = 2.72), 
-		   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
-		   tau.sq.alpha.ig = list(a = 0.1, b = 0.1),
-		   phi.unif = list(a = 3/1, b = 3/.1), 
-		   sigma.sq.ig = list(a = 2, b = 2)) 
+                   alpha.comm.normal = list(mean = 0, var = 2.72), 
+                   tau.sq.beta.ig = list(a = 0.1, b = 0.1), 
+                   tau.sq.alpha.ig = list(a = 0.1, b = 0.1),
+                   phi.unif = list(a = 3/1, b = 3/.1), 
+                   sigma.sq.ig = list(a = 2, b = 2)) 
 # Starting values
 lambda.inits <- matrix(0, N, n.factors)
 diag(lambda.inits) <- 1
 lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
 inits.list <- list(alpha.comm = 0, 
-		      beta.comm = 0, 
-		      beta = 0, 
-		      alpha = 0,
-		      tau.sq.beta = 1, 
-		      tau.sq.alpha = 1, 
-		      phi = 3 / .5, 
-		      sigma.sq = 2,
-		      lambda = lambda.inits,
-		      z = apply(y, c(1, 2), max, na.rm = TRUE))
+                   beta.comm = 0, 
+                   beta = 0, 
+                   alpha = 0,
+                   tau.sq.beta = 1, 
+                   tau.sq.alpha = 1, 
+                   phi = 3 / .5, 
+                   sigma.sq = 2,
+                   lambda = lambda.inits,
+                   z = apply(y, c(1, 2), max, na.rm = TRUE))
 # Tuning
 tuning.list <- list(phi = 1) 
 
 out <- sfMsPGOcc(occ.formula = ~ occ.cov, 
-		 det.formula = ~ det.cov.1 + det.cov.2, 
-		 data = data.list,
-		 inits = inits.list, 
-		 n.batch = n.batch, 
-		 batch.length = batch.length, 
-		 accept.rate = 0.43, 
-		 n.factors = 3,
-	         priors = prior.list, 
-		 cov.model = "exponential", 
-		 tuning = tuning.list, 
-	         n.omp.threads = 1, 
-	         verbose = TRUE, 
-		 NNGP = TRUE, 
-		 n.neighbors = 5, 
-		 search.type = 'cb', 
-	         n.report = 10, 
-		 n.burn = 100, 
-		 n.thin = 1)
+                 det.formula = ~ det.cov.1 + det.cov.2, 
+                 data = data.list,
+                 inits = inits.list, 
+                 n.batch = n.batch, 
+                 batch.length = batch.length, 
+                 accept.rate = 0.43, 
+                 n.factors = 3,
+                 priors = prior.list, 
+                 cov.model = "exponential", 
+                 tuning = tuning.list, 
+                 n.omp.threads = 1, 
+                 verbose = TRUE, 
+                 NNGP = TRUE, 
+                 n.neighbors = 5, 
+                 search.type = 'cb', 
+                 n.report = 10, 
+                 n.burn = 100, 
+                 n.thin = 1)
 
 summary(out, level = 'both')
 
diff --git a/man/predict.spIntPGOcc.Rd b/man/predict.spIntPGOcc.Rd
index 17aa03b..6e54285 100644
--- a/man/predict.spIntPGOcc.Rd
+++ b/man/predict.spIntPGOcc.Rd
@@ -8,7 +8,7 @@
 
 \usage{
 \method{predict}{spIntPGOcc}(object, X.0, coords.0, n.omp.threads = 1, verbose = TRUE, 
-                             n.report = 100, ...)
+        n.report = 100, ...)
 }
 
 \arguments{
@@ -92,8 +92,8 @@ sp <- TRUE
 
 # Simulate occupancy data. 
 dat <- simIntOcc(n.data = n.data, J.x = J.x, J.y = J.y, J.obs = J.obs, 
-		 n.rep = n.rep, beta = beta, alpha = alpha, sp = sp, 
-		 phi = phi, sigma.sq = sigma.sq, cov.model = 'spherical')
+                 n.rep = n.rep, beta = beta, alpha = alpha, sp = sp, 
+                 phi = phi, sigma.sq = sigma.sq, cov.model = 'spherical')
 
 y <- dat$y
 X <- dat$X.obs
@@ -111,32 +111,32 @@ det.covs <- list()
 # Add covariates one by one
 det.covs[[1]] <- list(det.cov.1.1 = X.p[[1]][, , 2])
 det.covs[[2]] <- list(det.cov.2.1 = X.p[[2]][, , 2], 
-		      det.cov.2.2 = X.p[[2]][, , 3])
+                      det.cov.2.2 = X.p[[2]][, , 3])
 det.covs[[3]] <- list(det.cov.3.1 = X.p[[3]][, , 2])
 det.covs[[4]] <- list(det.cov.4.1 = X.p[[4]][, , 2], 
-		      det.cov.4.2 = X.p[[4]][, , 3], 
-		      det.cov.4.3 = X.p[[4]][, , 4])
+                      det.cov.4.2 = X.p[[4]][, , 3], 
+                      det.cov.4.3 = X.p[[4]][, , 4])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs,
-		  det.covs = det.covs, 
-		  sites = sites, 
-		  coords = coords)
+                  occ.covs = occ.covs,
+                  det.covs = det.covs, 
+                  sites = sites, 
+                  coords = coords)
 
 J <- length(dat$z.obs)
 
 # Initial values
 inits.list <- list(alpha = list(0, 0, 0, 0), 
-		      beta = 0, 
-		      phi = 3 / .5, 
-		      sigma.sq = 2, 
-		      w = rep(0, J), 
-		      z = rep(1, J))
+                   beta = 0, 
+                   phi = 3 / .5, 
+                   sigma.sq = 2, 
+                   w = rep(0, J), 
+                   z = rep(1, J))
 # Priors
 prior.list <- list(beta.normal = list(mean = 0, var = 2.72), 
-		   alpha.normal = list(mean = list(0, 0, 0, 0), 
-				       var = list(2.72, 2.72, 2.72, 2.72)),
-		   phi.unif = c(3/1, 3/.1), 
-		   sigma.sq.ig = c(2, 2))
+                   alpha.normal = list(mean = list(0, 0, 0, 0), 
+                                       var = list(2.72, 2.72, 2.72, 2.72)),
+                   phi.unif = c(3/1, 3/.1), 
+                   sigma.sq.ig = c(2, 2))
 # Tuning
 tuning.list <- list(phi = 1) 
 
@@ -146,26 +146,26 @@ n.batch <- 40
 batch.length <- 25
 
 out <- spIntPGOcc(occ.formula = ~ occ.cov, 
-		  det.formula = list(f.1 = ~ det.cov.1.1, 
-				     f.2 = ~ det.cov.2.1 + det.cov.2.2, 
-				     f.3 = ~ det.cov.3.1, 
-				     f.4 = ~ det.cov.4.1 + det.cov.4.2 + det.cov.4.3), 
-		  data = data.list,  
-		  inits = inits.list, 
-		  n.batch = n.batch, 
-		  batch.length = batch.length, 
-		  accept.rate = 0.43, 
-		  priors = prior.list, 
-		  cov.model = "spherical", 
-		  tuning = tuning.list, 
-		  n.omp.threads = 1, 
-		  verbose = TRUE, 
-		  NNGP = TRUE, 
-		  n.neighbors = 5, 
-		  search.type = 'cb', 
-		  n.report = 10, 
-		  n.burn = 500, 
-		  n.thin = 1)
+                  det.formula = list(f.1 = ~ det.cov.1.1, 
+                                     f.2 = ~ det.cov.2.1 + det.cov.2.2, 
+                                     f.3 = ~ det.cov.3.1, 
+                                     f.4 = ~ det.cov.4.1 + det.cov.4.2 + det.cov.4.3), 
+                  data = data.list,  
+                  inits = inits.list, 
+                  n.batch = n.batch, 
+                  batch.length = batch.length, 
+                  accept.rate = 0.43, 
+                  priors = prior.list, 
+                  cov.model = "spherical", 
+                  tuning = tuning.list, 
+                  n.omp.threads = 1, 
+                  verbose = TRUE, 
+                  NNGP = TRUE, 
+                  n.neighbors = 5, 
+                  search.type = 'cb', 
+                  n.report = 10, 
+                  n.burn = 500, 
+                  n.thin = 1)
 summary(out)
 
 # Predict at new locations ------------------------------------------------
diff --git a/man/predict.spMsPGOcc.Rd b/man/predict.spMsPGOcc.Rd
index e1b4b0d..ef9f23b 100644
--- a/man/predict.spMsPGOcc.Rd
+++ b/man/predict.spMsPGOcc.Rd
@@ -33,8 +33,8 @@
   \item{n.report}{the interval to report sampling progress.}
 
   \item{ignore.RE}{a logical value indicating whether to include unstructured random 
-    effects for prediction. If FALSE, unstructured random effects will be ignored and 
-    prediction will only use the fixed effects and the spatial random effects. If TRUE, 
+    effects for prediction. If TRUE, unstructured random effects will be ignored and 
+    prediction will only use the fixed effects and the spatial random effects. If FALSE, 
     random effects will be included in the prediction for both observed and unobserved 
     levels of the unstructured random effects.}
 
@@ -43,6 +43,10 @@
   \item{...}{currently no additional arguments}
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
diff --git a/man/predict.spPGOcc.Rd b/man/predict.spPGOcc.Rd
index 7421b0c..6f12178 100644
--- a/man/predict.spPGOcc.Rd
+++ b/man/predict.spPGOcc.Rd
@@ -10,7 +10,7 @@
 
 \usage{
 \method{predict}{spPGOcc}(object, X.0, coords.0, n.omp.threads = 1, verbose = TRUE, 
-                          n.report = 100, ignore.RE = FALSE, type = 'occupancy', ...)
+        n.report = 100, ignore.RE = FALSE, type = 'occupancy', ...)
 }
 
 \arguments{
@@ -33,8 +33,8 @@
     the screen.}
 
   \item{ignore.RE}{a logical value indicating whether to include unstructured random 
-    effects for prediction. If FALSE, unstructured random effects will be ignored and 
-    prediction will only use the fixed effects and the spatial random effects. If TRUE, 
+    effects for prediction. If TRUE, unstructured random effects will be ignored and 
+    prediction will only use the fixed effects and the spatial random effects. If FALSE, 
     random effects will be included in the prediction for both observed and unobserved 
     levels of the unstructured random effects.}
 
@@ -45,6 +45,10 @@
   \item{...}{currently no additional arguments}
 }
 
+\note{
+  When \code{ignore.RE = FALSE}, both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects.
+}
+
 \author{
   Jeffrey W. Doser \email{doserjef@msu.edu}, \cr
   Andrew O. Finley \email{finleya@msu.edu}
@@ -113,9 +117,9 @@ occ.covs <- X[, -1, drop = FALSE]
 colnames(occ.covs) <- c('occ.cov')
 det.covs <- list(det.cov.1 = X.p[, , 2])
 data.list <- list(y = y, 
-		  occ.covs = occ.covs, 
-		  det.covs = det.covs, 
-		  coords = coords)
+                  occ.covs = occ.covs, 
+                  det.covs = det.covs, 
+                  coords = coords)
 
 # Number of batches
 n.batch <- 10
@@ -124,36 +128,36 @@ batch.length <- 25
 n.iter <- n.batch * batch.length
 # Priors 
 prior.list <- list(beta.normal = list(mean = 0, var = 2.72), 
-		   alpha.normal = list(mean = 0, var = 2.72),
-		   sigma.sq.ig = c(2, 2), 
-		   phi.unif = c(3/1, 3/.1)) 
+                   alpha.normal = list(mean = 0, var = 2.72),
+                   sigma.sq.ig = c(2, 2), 
+                   phi.unif = c(3/1, 3/.1)) 
 # Initial values
 inits.list <- list(alpha = 0, beta = 0,
-		      phi = 3 / .5, 
-		      sigma.sq = 2,
-		      w = rep(0, nrow(X)),
-		      z = apply(y, 1, max, na.rm = TRUE))
+                   phi = 3 / .5, 
+                   sigma.sq = 2,
+                   w = rep(0, nrow(X)),
+                   z = apply(y, 1, max, na.rm = TRUE))
 # Tuning
 tuning.list <- list(phi = 1) 
 
 out <- spPGOcc(occ.formula = ~ occ.cov, 
-	       det.formula = ~ det.cov.1, 
-	       data = data.list, 
-	       inits = inits.list, 
-	       n.batch = n.batch, 
-	       batch.length = batch.length, 
-	       accept.rate = 0.43, 
-	       priors = prior.list,
-	       cov.model = 'exponential', 
-	       tuning = tuning.list, 
-	       n.omp.threads = 1, 
-	       verbose = TRUE, 
-	       NNGP = FALSE, 
-	       n.neighbors = 15, 
-	       search.type = 'cb', 
-	       n.report = 10, 
-	       n.burn = 50, 
-	       n.thin = 1)
+               det.formula = ~ det.cov.1, 
+               data = data.list, 
+               inits = inits.list, 
+               n.batch = n.batch, 
+               batch.length = batch.length, 
+               accept.rate = 0.43, 
+               priors = prior.list,
+               cov.model = 'exponential', 
+               tuning = tuning.list, 
+               n.omp.threads = 1, 
+               verbose = TRUE, 
+               NNGP = FALSE, 
+               n.neighbors = 15, 
+               search.type = 'cb', 
+               n.report = 10, 
+               n.burn = 50, 
+               n.thin = 1)
 
 summary(out) 
 
diff --git a/man/summary.PGOcc.Rd b/man/summary.PGOcc.Rd
index bceb6ca..de76d54 100644
--- a/man/summary.PGOcc.Rd
+++ b/man/summary.PGOcc.Rd
@@ -11,7 +11,7 @@
 
 \usage{
 \method{summary}{PGOcc}(object, quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{PGOcc}(x, \dots)
 }
 
diff --git a/man/summary.intPGOcc.Rd b/man/summary.intPGOcc.Rd
index ee86d19..f2d65da 100644
--- a/man/summary.intPGOcc.Rd
+++ b/man/summary.intPGOcc.Rd
@@ -11,7 +11,7 @@
 
 \usage{
 \method{summary}{intPGOcc}(object, quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{intPGOcc}(x, \dots)
 }
 
diff --git a/man/summary.lfJSDM.Rd b/man/summary.lfJSDM.Rd
index 6211c3b..7a7c5b8 100644
--- a/man/summary.lfJSDM.Rd
+++ b/man/summary.lfJSDM.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{lfJSDM}(object, level = 'both', quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{lfJSDM}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{lfJSDM}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/man/summary.lfMsPGOcc.Rd b/man/summary.lfMsPGOcc.Rd
index e165438..5513ff8 100644
--- a/man/summary.lfMsPGOcc.Rd
+++ b/man/summary.lfMsPGOcc.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{lfMsPGOcc}(object, level = 'both', quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{lfMsPGOcc}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{lfMsPGOcc}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/man/summary.msPGOcc.Rd b/man/summary.msPGOcc.Rd
index 69651b8..c13416f 100644
--- a/man/summary.msPGOcc.Rd
+++ b/man/summary.msPGOcc.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{msPGOcc}(object, level = 'both', quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{msPGOcc}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{msPGOcc}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/man/summary.ppcOcc.Rd b/man/summary.ppcOcc.Rd
index bf8cade..0c07f46 100644
--- a/man/summary.ppcOcc.Rd
+++ b/man/summary.ppcOcc.Rd
@@ -17,7 +17,7 @@
   \item{level}{a quoted keyword for multi-species models that indicates 
     the level to summarize the posterior predictive check. Valid key words
     are: \code{"community"}, \code{"species"}, or \code{"both"}.}
-  \item{digits}{for \code{summary}, number of digits to report.}
+  \item{digits}{number of digits to report.}
   \item{\dots}{currently no additional arguments}
 }
 
diff --git a/man/summary.sfJSDM.Rd b/man/summary.sfJSDM.Rd
index 4c3a62a..946073b 100644
--- a/man/summary.sfJSDM.Rd
+++ b/man/summary.sfJSDM.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{sfJSDM}(object, level, quantiles = c(0.025, 0.5, 0.975), 
-	 digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{sfJSDM}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{sfJSDM}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/man/summary.sfMsPGOcc.Rd b/man/summary.sfMsPGOcc.Rd
index f78fcfc..6892ed9 100644
--- a/man/summary.sfMsPGOcc.Rd
+++ b/man/summary.sfMsPGOcc.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{sfMsPGOcc}(object, level, quantiles = c(0.025, 0.5, 0.975), 
-	 digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{sfMsPGOcc}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{sfMsPGOcc}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/man/summary.spIntPGOcc.Rd b/man/summary.spIntPGOcc.Rd
index ba1e2d4..4f8f62e 100644
--- a/man/summary.spIntPGOcc.Rd
+++ b/man/summary.spIntPGOcc.Rd
@@ -6,12 +6,12 @@
 \title{Methods for spIntPGOcc Object}
 
 \description{
-  Methods for extracting information from fitted single-species integrated occupancy (\code{spIntPGOcc}) model.
+  Methods for extracting information from fitted single-species spatial integrated occupancy (\code{spIntPGOcc}) model.
 }
 
 \usage{
 \method{summary}{spIntPGOcc}(object, quantiles = c(0.025, 0.5, 0.975), 
-	digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{spIntPGOcc}(x, \dots)
 }
 
diff --git a/man/summary.spMsPGOcc.Rd b/man/summary.spMsPGOcc.Rd
index 917d950..d852e58 100644
--- a/man/summary.spMsPGOcc.Rd
+++ b/man/summary.spMsPGOcc.Rd
@@ -11,14 +11,14 @@
 
 \usage{
 \method{summary}{spMsPGOcc}(object, level, quantiles = c(0.025, 0.5, 0.975), 
-	 digits = max(3L, getOption("digits") - 3L), \dots)
+        digits = max(3L, getOption("digits") - 3L), \dots)
 \method{print}{spMsPGOcc}(x, \dots)
 }
 
 \arguments{
   \item{object, x}{object of class \code{spMsPGOcc}.}
   \item{level}{a quoted keyword that indicates the level to summarize the 
-    posterior predictive check. Valid key words are: \code{"community"}, 
+    model results. Valid key words are: \code{"community"}, 
     \code{"species"}, or \code{"both"}.}
   \item{quantiles}{for \code{summary}, posterior distribution quantiles to compute.}
   \item{digits}{for \code{summary}, number of digits to report.}
diff --git a/src/init.cpp b/src/init.cpp
index 966ac43..c1aaf4d 100644
--- a/src/init.cpp
+++ b/src/init.cpp
@@ -14,7 +14,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"spMsPGOcc", (DL_FUNC) &spMsPGOcc, 58},
     {"spMsPGOccNNGP", (DL_FUNC) &spMsPGOccNNGP, 64},
     {"spMsPGOccPredict", (DL_FUNC) &spMsPGOccPredict, 16},
-    {"spMsPGOccNNGPPredict", (DL_FUNC) &spMsPGOccNNGPPredict, 17},
+    {"spMsPGOccNNGPPredict", (DL_FUNC) &spMsPGOccNNGPPredict, 18},
     {"intPGOcc", (DL_FUNC) &intPGOcc, 31},
     {"spIntPGOcc", (DL_FUNC) &spIntPGOcc, 46},
     {"spIntPGOccNNGP", (DL_FUNC) &spIntPGOccNNGP, 52},
diff --git a/src/spOccupancy.h b/src/spOccupancy.h
index 0227597..d83d33f 100644
--- a/src/spOccupancy.h
+++ b/src/spOccupancy.h
@@ -54,7 +54,7 @@ extern "C" {
   SEXP spPGOccPredict(SEXP J_r, SEXP pOcc_r, SEXP X0_r, SEXP q_r, 
 		      SEXP obsD_r, SEXP obsPredD_r, SEXP betaSamples_r, 
 		      SEXP thetaSamples_r, SEXP wSamples_r, 
-		      SEXP betaStarSiteSamples_r, 
+		      SEXP betaStarSiteSamples_r,
 		      SEXP nSamples_r, SEXP covModel_r, SEXP nThreads_r, 
 		      SEXP verbose_r, SEXP nReport_r);
 
@@ -138,7 +138,8 @@ extern "C" {
   SEXP spMsPGOccNNGPPredict(SEXP coords_r, SEXP J_r, SEXP N_r, 
 		            SEXP pOcc_r, SEXP m_r, SEXP X0_r, SEXP coords0_r, 
 			    SEXP q_r, SEXP nnIndx0_r, SEXP betaSamples_r, 
-			    SEXP thetaSamples_r, SEXP wSamples_r, SEXP nSamples_r, 
+			    SEXP thetaSamples_r, SEXP wSamples_r, 
+			    SEXP betaStarSiteSamples_r, SEXP nSamples_r, 
 			    SEXP covModel_r, SEXP nThreads_r, SEXP verbose_r, 
 			    SEXP nReport_r);
 
diff --git a/tests/testthat/test-PGOcc.R b/tests/testthat/test-PGOcc.R
index 580c5b7..d24e90c 100644
--- a/tests/testthat/test-PGOcc.R
+++ b/tests/testthat/test-PGOcc.R
@@ -1,6 +1,6 @@
 # Test PGOcc.R  ----------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept Only ----------------------------------------------------------
 set.seed(100)
diff --git a/tests/testthat/test-intPGOcc.R b/tests/testthat/test-intPGOcc.R
index 0f702b4..9511ca5 100644
--- a/tests/testthat/test-intPGOcc.R
+++ b/tests/testthat/test-intPGOcc.R
@@ -1,6 +1,6 @@
 # Test intPGOcc.R  ----------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 15
diff --git a/tests/testthat/test-lfJSDM.R b/tests/testthat/test-lfJSDM.R
index 220513c..b63a09d 100644
--- a/tests/testthat/test-lfJSDM.R
+++ b/tests/testthat/test-lfJSDM.R
@@ -1,6 +1,6 @@
 # Test lfJSDM.R  -------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-lfMsPGOcc.R b/tests/testthat/test-lfMsPGOcc.R
index e0a0580..d0bfdca 100644
--- a/tests/testthat/test-lfMsPGOcc.R
+++ b/tests/testthat/test-lfMsPGOcc.R
@@ -1,6 +1,6 @@
 # Test lfMsPGOcc.R  -------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-msPGOcc.R b/tests/testthat/test-msPGOcc.R
index f28d451..1266ff3 100644
--- a/tests/testthat/test-msPGOcc.R
+++ b/tests/testthat/test-msPGOcc.R
@@ -1,6 +1,6 @@
 # Test msPGOcc.R  --------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-sfJSDM-NNGP.R b/tests/testthat/test-sfJSDM-NNGP.R
index 1b15e7a..1be406c 100644
--- a/tests/testthat/test-sfJSDM-NNGP.R
+++ b/tests/testthat/test-sfJSDM-NNGP.R
@@ -1,7 +1,7 @@
 # Test sfJSDM.R  -------------------------------------------------------
 # NNGP --------------------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept Only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-sfMsPGOcc-NNGP.R b/tests/testthat/test-sfMsPGOcc-NNGP.R
index 9cadacf..30b41fa 100644
--- a/tests/testthat/test-sfMsPGOcc-NNGP.R
+++ b/tests/testthat/test-sfMsPGOcc-NNGP.R
@@ -1,7 +1,7 @@
 # Test sfMsPGOcc.R  -------------------------------------------------------
 # NNGP --------------------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept Only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-spIntPGOcc-GP.R b/tests/testthat/test-spIntPGOcc-GP.R
index 17a92fc..40e5a83 100644
--- a/tests/testthat/test-spIntPGOcc-GP.R
+++ b/tests/testthat/test-spIntPGOcc-GP.R
@@ -1,6 +1,6 @@
 # Test spIntPGOcc.R  ------------------------------------------------------
 # GP ----------------------------------------------------------------------
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 set.seed(1010)
diff --git a/tests/testthat/test-spIntPGOcc-NNGP.R b/tests/testthat/test-spIntPGOcc-NNGP.R
index 8b20b18..3138791 100644
--- a/tests/testthat/test-spIntPGOcc-NNGP.R
+++ b/tests/testthat/test-spIntPGOcc-NNGP.R
@@ -1,6 +1,6 @@
 # Test spIntPGOcc.R  ------------------------------------------------------
 # NNGP --------------------------------------------------------------------
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 set.seed(1010)
diff --git a/tests/testthat/test-spMsPGOcc-GP.R b/tests/testthat/test-spMsPGOcc-GP.R
index d6f4712..e30df63 100644
--- a/tests/testthat/test-spMsPGOcc-GP.R
+++ b/tests/testthat/test-spMsPGOcc-GP.R
@@ -1,7 +1,7 @@
 # Test spMsPGOcc.R  -------------------------------------------------------
 # GP ----------------------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept Only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-spMsPGOcc-NNGP.R b/tests/testthat/test-spMsPGOcc-NNGP.R
index 23cdc24..9572ff0 100644
--- a/tests/testthat/test-spMsPGOcc-NNGP.R
+++ b/tests/testthat/test-spMsPGOcc-NNGP.R
@@ -1,7 +1,7 @@
 # Test spMsPGOcc.R  -------------------------------------------------------
 # NNGP --------------------------------------------------------------------
 
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept Only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-spPGOcc-GP.R b/tests/testthat/test-spPGOcc-GP.R
index 7855700..0d9b897 100644
--- a/tests/testthat/test-spPGOcc-GP.R
+++ b/tests/testthat/test-spPGOcc-GP.R
@@ -1,6 +1,6 @@
 # Test spPGOcc.R  ---------------------------------------------------------
 # GP ----------------------------------------------------------------------
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 8
diff --git a/tests/testthat/test-spPGOcc-NNGP.R b/tests/testthat/test-spPGOcc-NNGP.R
index 6ffd2b3..596e9d5 100644
--- a/tests/testthat/test-spPGOcc-NNGP.R
+++ b/tests/testthat/test-spPGOcc-NNGP.R
@@ -1,6 +1,6 @@
 # Test spPGOcc.R  ---------------------------------------------------------
 # NNGP --------------------------------------------------------------------
-# skip_on_cran()
+skip_on_cran()
 
 # Intercept only ----------------------------------------------------------
 J.x <- 8
diff --git a/vignettes/factorModels.Rmd b/vignettes/factorModels.Rmd
new file mode 100644
index 0000000..3e38874
--- /dev/null
+++ b/vignettes/factorModels.Rmd
@@ -0,0 +1,937 @@
+---
+title: "Joint species distribution models with imperfect detection in `spOccupancy`"
+author: "Jeffrey W. Doser, Andrew O. Finley, Sudipto Banerjee"
+date: "`r format(Sys.time(), '%B %d, %Y')`"
+output: 
+  rmarkdown::html_vignette:
+    toc: true
+    toc_depth: 3
+bibliography: [referencesJSDM.bib]
+biblio-style: apalike
+vignette: >
+  %\VignetteIndexEntry{factorModels}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+  comment = "", cache = TRUE
+)
+```
+
+\newcommand{\bm}{\boldsymbol} 
+
+# Introduction
+
+This vignette provides worked examples for fitting joint species distribution models in the `spOccupancy` R package [@doser2021spoccupancy]. Joint species distribution models (JSDMs) are a series of regression-based approaches that explicitly accommodate residual species correlations [@latimer2009hierarchical; @ovaskainen2010modeling]. `spOccupancy` provides a series of functions that account for various combinations of the three major complexities often encountered in multi-species detection-nondetection data: (1) residual species correlations [@ovaskainen2010modeling], (2) imperfect detection [@mackenzie2002], and (3) spatial autocorrelation [@finley2009aoas]. In this vignette, we will provide step by step examples on how to fit the following models: 
+
+1. A latent factor multi-species occupancy model using `lfMsPGOcc()` that accommodates residual species correlations and imperfect detection. 
+2. A spatial latent factor multi-species occupancy model using `sfMsPGOcc()` that accommodates residual species correlations, imperfect detection, and spatial autocorrelation. 
+4. A spatial latent factor joint species distribution model using `sfJSDM()` that accommodates residual species correlations and spatial autocorrelation.
+3. A latent factor joint species distribution model using `lfJSDM()` that accommodates residual species correlations. 
+
+For a detailed vignette on non-spatial and spatial multi-species occupancy models that do not account for residual species correlations, see [the introductory `spOccupancy` vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting#multi-species-occupancy-models). 
+
+As with all models implemented in `spOccupancy`, we use P&oacute;lya-Gamma data augmentation for computational efficiency [@polson2013]. Here we provide a brief description of each model, with full statistical details provided on the Gibbs sampler implementations of the models in Appendix S2. In addition to fitting each model, we will show how `spOccupancy` provides functionality for posterior predictive checks as a Goodness of Fit assessment, model comparison and assessment using the Widely Applicable Information Criterion (WAIC), k-fold cross-validation, and out-of-sample predictions using standard R helper functions (e.g., `predict()`). 
+
+Below, we first load the `spOccupancy` package, the `coda` package for some additional MCMC diagnostics, as well as the `stars` and `ggplot2` packages to create some basic plots of our results. We also set a seed so you can reproduce the same results we do. 
+
+```{r, message = FALSE}
+library(spOccupancy)
+library(stars)
+library(ggplot2)
+set.seed(100)
+```
+
+## Example data set: Foliage-gleaning birds at Hubbard Brook
+
+As an example data set throughout this vignette, we will use data from twelve foliage-gleaning bird species collected from point count surveys at Hubbard Brook Experimental Forest (HBEF) in New Hampshire, USA. Specific details on the data set, which is just a small subset from a long-term survey, are available on the [Hubbard Brook website](https://portal.edirepository.org/nis/mapbrowse?scope=knb-lter-hbr&identifier=178) and @doser2022integrated. The data are provided as part of the `spOccupancy` package and are loaded with `data(hbef2015)`. Point count surveys were conducted at 373 sites over three replicates, each of 10 minutes in length and with a detection radius of 100m. In the data set provided here, we converted these data to detection-nondetection data. Some sites were not visited for all three replicates. Additional information on the data set (including individual species in the data set) can be viewed in the man page using `help(hbef2015)`.
+
+```{r}
+data(hbef2015) # Load the data set.
+str(hbef2015) # Get an overview of what's in the data.
+# Species codes
+sp.names <- rownames(hbef2015$y)
+```
+
+The object `hbef2015` is a list comprised of the detection-nondetection data (`y`), covariates on the occurrence portion of the model (`occ.covs`), covariates on the detection portion of the model (`det.covs`), and the spatial coordinates of each site (`coords`) for use in spatial occupancy models and in plotting. Note that `spOccupancy` functions assume the spatial coordinates are specified in a projected coordinate system. This list is the format required for input to `spOccupancy` model functions. `hbef2015` contains data on 12 species in the three-dimensional array `y`, where the dimensions of `y` correspond to species (12), sites (373), and replicates (3). 
+
+# Latent factor multi-species occupancy models
+
+## Basic model description
+
+Let $z_{i, j}$ be the true presence (1) or absence (0) of some species $i$ at site $j$ for a total of $i = 1, \dots, N$ species and $j = 1, \dots, J$ sites. For our HBEF example, $N = 12$ and $J = 373$. We assume $z_{i, j}$ arises from a Bernoulli process following
+
+\begin{equation}
+\begin{split}
+&z_{i, j} \sim \text{Bernoulli}(\psi_{i, j}), \\
+&\text{logit}(\psi_{i, j}) = \bm{x}^{\top}_{j} \bm{\beta}_i + \text{w}^*_{i, j},
+\end{split}
+\end{equation}
+
+where $\psi_{i, j}$ is the probability of occurrence of species $i$ at site $j$, which is a function of site-specific covariates $\bm{X}$, a vector of species-specific regression coefficients ($\bm{\beta}_i$) for those covariates, and a latent process $\text{w}^*_{i, j}$. We incorporate residual species correlations through the formulation of the latent process $\text{w}^*_{i, j}$. We use a factor modeling approach, which is a dimension reduction technique that can account for correlations among a large number of species without a massive computational cost [@hogan2004bayesian]. Specifically, we decompose $\text{w}^*_{i, j}$ into a linear combination of $q$ latent variables (i.e., factors) and their associated species-specific coefficients (i.e., factor loadings). Thus, we have 
+
+\begin{equation}
+  \text{w}^*_{i, j} = \bm{\lambda}_i^\top\textbf{w}_j, 
+\end{equation}
+
+where $\bm{\lambda}_i$ is the $i$th row of factor loadings from an $N \times q$ matrix $\bm{\Lambda}$, and $\textbf{w}_j$ is a $q \times 1$ vector of independent latent factors at site $j$. We achieve computational improvements by setting $q << N$, where often a small number of factors (e.g., $q = 5$) is sufficient [@taylor2019spatial]. For our HBEF example, our community is relatively small ($N = 12$) and so we use $q = 2$ latent factors as our initial choice, and will discuss assessing this choice of the number of factors later in the example. We account for residual species correlations via their individual responses (i.e., loadings) to the $q$ latent spatial factors. We can envision the latent variables $\textbf{w}_j$ as unmeasured site-specific covariates that are treated as random variables in the model estimation procedure. For the non-spatial latent factor model, we assign a standard normal prior distribution to the latent factors (i.e., we assume each latent factor is independent and arises from a normal distribution with mean 0 and standard deviation 1).  
+
+We envision the species-specific regression coefficients ($\bm{\beta}_i$) as random effects arising from a common community-level distribution:
+
+\begin{equation}
+\bm{\beta}_i \sim \text{Normal}(\bm{\mu_{\beta}}, \bm{T}_{\beta}),
+\end{equation}
+
+where $\bm{\mu_{\beta}}$ is a vector of community-level mean effects for each occurrence covariate effect (including the intercept) and $\bm{T}_{\beta}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\beta}$ that represent the variance of each occurrence covariate effect among species in the community.
+
+We do not directly observe $z_{i, j}$, but rather we observe an imperfect representation of the latent occurrence process. Let $y_{i, j, k}$ be the observed detection (1) or nondetection (0) of a species $i$ of interest at site $j$ during replicate $k$ for each of $k = 1, \dots, K_j$ replicates at each site $j$. We envision the detection-nondetection data as arising from a Bernoulli process conditional on the true latent occurrence process:
+
+\begin{equation}
+\begin{split}
+&y_{i, j, k} \sim \text{Bernoulli}(p_{i, j, k}z_{i, j}), \\
+&\text{logit}(p_{i, j, k}) = \bm{v}^{\top}_{i, j, k}\bm{\alpha}_i,
+\end{split}
+\end{equation}
+
+where $p_{i, j, k}$ is the probability of detecting species $i$ at site $j$ during replicate $k$ (given it is present at site $j$), which is a function of site and replicate-specific covariates $\bm{V}$ and a vector of species-specific regression coefficients ($\bm{\alpha}_i$). Similarly to the occurrence regression coefficients, the species-specific detection coefficients are envisioned as random effects arising from a common community-level distribution:
+
+\begin{equation}
+\bm{\alpha}_i \sim \text{Normal}(\bm{\mu_{\alpha}}, \bm{T}_{\alpha}),
+\end{equation}
+
+where $\bm{\mu_{\alpha}}$ is a vector of community-level mean effects for each detection covariate effect (including the intercept) and $\bm{T}_{\alpha}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\alpha}$ that represent the variability of each detection covariate effect among species in the community.
+
+We assign multivariate normal priors for the community-level occurrence ($\bm{\mu_{\beta}}$) and detection ($\bm{\mu_{\alpha}}$) means, and assign independent inverse-Gamma priors on the community-level occurrence ($\tau^2_{\beta}$) and detection ($\tau^2_{\alpha}$) variance parameters. To ensure identifiability of the latent factors and factor loadings, we set all elements in the upper triangle of the factor loadings matrix $\bm{\Lambda}$ equal to 0 and its diagonal elements equal to 1. 
+
+## Fitting latent factor multi-species occupancy models with `lfMsPGOcc()`
+
+The `lfMsPGOcc()` function fits latent factor multi-species occupancy models. `lfMsPGOcc()` has the following arguments: 
+
+```{r, eval = FALSE}
+lfMsPGOcc(occ.formula, det.formula, data, inits, priors, n.factors, 
+          n.samples, n.omp.threads = 1, verbose = TRUE, n.report = 100, 
+          n.burn = round(.10 * n.samples), n.thin = 1, n.chains = 1,
+          k.fold, k.fold.threads = 1, k.fold.seed, ...)
+```
+
+The first two arguments, `occ.formula` and `det.formula`, use standard R model syntax to denote the covariates to be included in the occurrence and detection portions of the model, respectively. We only specify the right hand side of the formula. We can include random intercepts in both the occurrence and detection portions of the model using `lme4` syntax [@bates2015]. The names of variables given in the formulas should correspond to those found in `data`, which is a list consisting of the following tags: `y` (detection-nondetection data), `occ.covs` (occurrence covariates), `det.covs` (detection covariates), and `coords` (spatial coordinates of sites). `y` is a three-dimensional array with dimensions corresponding to species, sites, and replicates, `occ.covs` is a matrix or data frame with site-specific covariate values, and `det.covs` is a list with each list element corresponding to a covariate to include in the detection portion of the model. Covariates on detection can vary by site and/or survey, and so these covariates may be specified as a site by survey matrix for survey-level covariates or as a one-dimensional vector for survey level covariates. The `hbef2015` list is already in the required format. For our example, we will model species-specific occurrence as a function of linear and quadratic elevation, and will include three observational covariates (linear and quadratic day of survey, time of day of survey) on the detection portion of the model. We standardize all covariates by using the `scale()` function in our model specification, and use the `I()` function to specify quadratic effects. 
+
+```{r}
+occ.formula <- ~ scale(Elevation) + I(scale(Elevation)^2)
+det.formula <- ~ scale(day) + scale(tod) + I(scale(day)^2)
+```
+
+Next, we will specify the number of latent factors to use in our model. This is not an arbitrary decision, and it is difficult to determine the optimal number of latent factors in the model. While other approaches exist to estimate the "optimal" number of factors directly in the modeling framework [@tikhonov2020computationally; @ovaskainen2016uncovering], these approaches do not allow for interpretability of the latent factors and the latent factor loadings (see Figure 2 in Doser, Finley, Banerjee (2022)). The specific restraints and priors we place on the factor loadings matrix ($\bm{\Lambda}$) in our approach allows for interpretation of the latent factors and the factor loadings, but does not automatically determine the number of factors for optimal predictive performance. Thus, there is a tradeoff between interpretability of the latent factor and factor loadings and optimal predictive performance. In the `spOccupancy` implementation, we chose to allow for interpretability of the factor and factor loadings at risk of inferior predictive performance if too many or too few factors are specified by the user. 
+
+The number of latent factors can range from 1 to $N$ (the total number of species in the modeled community). Conceptually, choosing the number of factors is similar to performing a Principal Components Analysis and looking at which components explain a large amount of variation. We want to choose the number of factors that explains an adequate amount of variability among species in the community, but we want to keep this number as small as possible to avoid overfitting the model and large model run times. When initially specifying the number of factors, we suggest the following: 
+
+1. Consider the size of the community and how much variation you expect from species to species. If you expect large variation in occurrence patterns for all species in the community, you may require a larger number of factors. If your modeled community is comprised of certain groups of species that you expect to behave similarly (e.g., insectivores, frugivores, granivores), then a smaller number of factors may suffice. Further, as shown by @tikhonov2020computationally, as the number of species increases, you will likely need more factors to adequately represent the community. 
+2. Consider how much time you have to run the model. The more factors included in the model, the longer the model will take to run. Under certain circumstances, like when you are running a model across a large number of spatial locations, you may simply be restricted to a small number of factors in order to achieve reasonable run times. 
+3. Consider how rare your species in the community are, how many data locations you have (i.e., sites), and how many replicates you have at each site. Models with more latent factors have more parameters to estimate, and thus require more data. If you have a lot of rare species in the community, you will likely be limited to a very small number of factors, as models with more than a few factors may not be identifiable. The same can be said if you are working with a small number of spatial locations (e.g., 30 sites) or replicates (e.g., 1 or 2 replicates at each site). 
+
+In our HBEF example, the community is relatively small ($N = 12$) and the species are all quite similar (after all, they are all classified as foliage-gleaning birds). Let's take a look at the raw probabilities of occurrence from the detection-nondetection data (ignoring imperfect detection) to give an idea of how rare the species are 
+
+```{r}
+apply(hbef2015$y, 1, mean, na.rm = TRUE)
+```
+
+It looks like we have some really rare species (e.g., AMRE, NAWA, BAWW) and some pretty common species (e.g., OVEN, REVI, BTBW). Taking all of this in consideration, it makes sense to initially try the model with a small number of factors, and so we will work with $q = 2$ factors.
+
+```{r}
+# Number of latent factors (q in statistical notation)
+n.factors <- 2
+```
+
+Because of the restrictions we place on the factor loadings matrix (diagonal elements equal to 1 and upper triangle elements equal to 0), another important modeling decision we need to make is how to order the species in our detection-nondetection array. More specifically, we need to carefully choose the first $q$ species in the our array, as these are the species that will have restrictions on their factor loadings. While from a theoretical perspective the order of the species will only influence the resulting interpretation of the latent factors and factor loadings matrix and not the model estimates, this decision does have practical implications. We have found that careful consideration of the ordering of species can lead to (1) increased interpretability of the factors and factor loadings; (2) faster model convergence; and (3) improved mixing. Determining the order of the factors is less important when you have an adequate number of observations for all species in the community, but it becomes increasingly important the more rare species you have in your data set. We suggest the following when considering the order of the species in the detection-nondetection array (`y`): 
+
+1. Place a common species first. The first species has all of its factor loadings set to fixed values, and so it can have a large influence on the resulting interpretation on the factor loadings and latent factors. We have also found that having a very rare species first can result in very slow mixing and increased sensitivity to initial values of the latent factor loadings matrix. 
+2. For the remaining $q - 1$ factors, place species that you believe will show different occurrence patterns than the first species, and the other species placed before it. Place these remaining $q - 1$ species in order of decreasing differences from the initial factor. For example, if I had $q = 3$, for the second species in the array, I would place a species that I a priori think is most different from the first species. For the third species in the array, I would place a species that I think will show different occurrence patterns than both the first and second species, but its patterns may not be as noticeably different compared to the first and second species. 
+
+In our HBEF example, it is clear that there is a set of fairly common species as well as very rare species. This is likely related to the specific elevation these species tend to occurr at as a result of varied habitat requirements. Accordingly, we will reorder the species matrix (`hbef2015$y`) to place one of the common species first that occurs at relatively moderate elevations (`OVEN`) and then place a rare species second that tends to occurr at high elevational habitat (`BLPW`). The order of the remaining $N - q = 10$ species does not matter. Below we reorder the species following this logic, and then create a new data object `hbef.ordered` that we will supply to `lfMsPGOcc()`. 
+
+```{r}
+# Current species ordering
+sp.names
+# Reorder species. 
+sp.ordered <- c('OVEN', 'BLPW', 'AMRE', 'BAWW', 'BHVI', 'BLBW', 
+	        'BTBW', 'BTNW', 'CAWA', 'MAWA', 'NAWA', 'REVI')
+# Create new detection-nondetection data matrix in the new order
+y.new <- hbef2015$y[sp.ordered, , ]
+# Create a new data array
+hbef.ordered <- hbef2015
+# Change the data to the new ordered data
+hbef.ordered$y <- y.new
+str(hbef.ordered)
+```
+
+Next we specify the initial values in `inits` and the prior distributions in `priors`. These arguments are optional, as `spOccupancy` will set default initial values and prior distributions if these arguments are not specified. If `verbose = TRUE`, messages will be printed to the screen to indicate what initial values and priors are used by default for each model parameter. Here (and throughout this vignette), we will explicitly specify initial values and priors. 
+
+However, we will point out that all models in `spOccupancy` that use a factor modeling approach can be fairly sensitive to the initial values of the latent factor loadings. This is primarily an issue when there are a large number of rare species. If you encounter difficulties in model convergence when running factor models in `spOccupancy` across multiple chains, we recommend first running a single chain of the model for a moderate number of iterations until the traceplots look like they are settling around a value (i.e., convergence is closed to being reached). Then extract the estimated mean values for the factor loadings matrix ($\bm{\Lambda}$) and supply these as initial values to the `spOccupancy` function when running the full model across multiple chains. When running multiple chains when not paying much attention to the initial values, you may see large discrepancies between certain chains with very large Rhat values for the latent factor loadings matrix (and spatial range parameters for spatially-explicit factor models). However, this may not necessarily be a convergence issue. Rather, what may happen is that depending on the initial values, the specific factors in the model may be estimated in a different order. For example, if estimating a model with two latent factors with two chains, the latent factors may correspond to a latitudinal and a longitudinal gradient in the first chain, but in the second chain these factors could be reversed with the first factor corresponding to the longitudinal gradient and the second factor corresponding to the latitudinal gradient. This is because it is only the sum of the product of the factor loadings and factors that influences occurrence probability, and so the specific ordering of the factors may switch depending on (1) the first $q$ species relationships to the latent factors and (2) the initial values. Thus, we encourage looking at the traceplots of each individual chain for the latent factor loadings (and spatial range parameters if using a spatial factor model). If the chain has an adequately large effective sample size for the parameters and appears to have reached convergence, we then recommend fixing the initial values at the estimated means from the preliminary model run and then running multiple chains to further assess convergence. 
+
+In `lfMsPGOcc()`, we will supply initial values for the following parameters: `alpha.comm` (community-level detection coefficients), `beta.comm` (community-level occurrence coefficients), `alpha` (species-level detection coefficients), `beta` (species-level occurrence coefficients), `tau.sq.beta` (community-level occurrence variance parameters), `tau.sq.alpha` (community-level detection variance parameters), `lambda` (the species-specific factor loadings), and `z` (latent occurrence variables for all species). These are all specified in a single list. Initial values for community-level parameters are either vectors of length corresponding to the number of community-level detection or occurrence parameters in the model (including the intercepts) or a single value if all parameters are assigned the same initial values. Initial values for species level regression coefficients are either matrices with the number of rows indicating the number of species, and each column corresponding to a different regression parameter, or a single value if the same initial value is used for all species and parameters. Initial values for the species-specific factor loadings (`lambda`) are specified as a numeric matrix with $N$ rows and $q$ columns, where $N$ is the number of species and $q$ is the number of latent factors used in the model. The diagonal elements of the matrix must be 1, and values in the upper triangle must be set to 0 to ensure identifiability of the latent factors. The initial values for the latent occurrence matrix are specified as a matrix with $N$ rows corresponding to the number of species and $J$ columns corresponding to the number of sites.
+
+
+```{r}
+# Number of species
+N <- nrow(hbef.ordered$y)
+# Initiate all lambda initial values to 0. 
+lambda.inits <- matrix(0, N, n.factors)
+# Set diagonal elements to 1
+diag(lambda.inits) <- 1
+# Set lower triangular elements to random values from a standard normal distribution
+lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
+# Check it out. Note this is also how spOccupancy specifies default
+# initial values for lambda.
+lambda.inits
+# Create list of initial values. 
+inits <- list(alpha.comm = 0,
+              beta.comm = 0,
+              beta = 0,
+              alpha = 0,
+              tau.sq.beta = 1,
+              tau.sq.alpha = 1,
+              lambda = lambda.inits, 
+              z = apply(hbef.ordered$y, c(1, 2), max, na.rm = TRUE))
+```
+
+Notice that we set initial values of the latent species occurrence ($z$) to 1 if there was at least one observation of the species at the given site, and 0 if the species was not detected at that site (this is also the default value `spOccupancy` will use if initial values for $z$ are not provided). We set the lower triangular elements of the factor loadings matrix to random values from a standard normal distribution, as we have found these parameters to be relatively insensitive to initial values for this specific data set.
+
+We specify the priors in the `priors` argument with the following tags: `beta.comm.normal` (normal prior on the community-level occurrence mean effects), `alpha.comm.normal` (normal prior on the community-level detection mean effects), `tau.sq.beta.ig` (inverse-Gamma prior on the community-level occurrence variance parameters), `tau.sq.alpha.ig` (inverse-Gamma prior on the community-level detection variance parameters). Each tag consists of a list with elements corresponding to the mean and variance for normal priors and scale and shape for inverse-Gamma priors. Values can be specified individually for each parameter or as a single value if the same prior is assigned to all parameters of a given type.
+
+Below we specify normal priors to be relatively vague on the probability scale with a mean of 0 and a variance of 2.72, and specify vague inverse gamma priors on the community-level variance parameters setting both the shape and scale parameters to 0.1.
+
+```{r}
+priors <- list(beta.comm.normal = list(mean = 0, var = 2.72),
+               alpha.comm.normal = list(mean = 0, var = 2.72),
+               tau.sq.beta.ig = list(a = 0.1, b = 0.1),
+               tau.sq.alpha.ig = list(a = 0.1, b = 0.1))
+```
+
+Our next step is to specify the number of samples to produce with the MCMC algorithm (`n.samples`), the length of burn-in (`n.burn`), the rate at which we want to thin the posterior samples (`n.thin`), and the number of MCMC chains to run (`n.chains`). Note that currently `spOccupancy` runs multiple chains sequentially and does not allow chains to be run simultaneously in parallel across multiple threads. Instead, we allow for within-chain parallelization using the `n.omp.threads` argument. We can set `n.omp.threads` to a number greater than 1 and smaller than the number of threads on the computer you are using. Generally, setting `n.omp.threads > 1` will not result in decreased run times for non-spatial joint species distribution models in `spOccupancy`, but can substantially decrease run time when fitting spatially-explicit models [@finley2020spnngp]. Here we set `n.omp.threads = 1`.
+
+```{r}
+n.samples <- 5000
+n.burn <- 1000
+n.thin <- 8
+n.chains <- 3
+```
+
+We are now nearly set to run the latent factor multi-species occupancy model. The verbose argument is a logical value indicating whether or not MCMC sampler progress is reported to the screen. If `verbose = TRUE`, sampler progress is reported after every multiple of the specified number of iterations in the n.report argument. We set `verbose = TRUE` and `n.report = 1000` to report progress after every 1000th MCMC iteration. Additionally, the last three arguments of `lfMsPGOcc()` (and all `spOccupancy` model fitting functions), `k.fold`, `k.fold.threads`, and `k.fold.seed`, allow us to perform k-fold cross-validation after fitting the model. Here we will not perform k-fold cross-validation, but see [the introductory `spOccupancy` vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting#kFold) for details and examples of running `spOccupancy` functions for k-fold cross-validation. 
+
+```{r}
+# Approx run time: 78 seconds
+out.lfMsPGOcc <- lfMsPGOcc(occ.formula = occ.formula, det.formula = det.formula, 
+                           data = hbef.ordered, inits = inits, priors = priors, 
+                           n.factors = n.factors, n.samples = n.samples, 
+                           n.omp.threads = 1, verbose = TRUE, n.report = 1000, 
+                           n.burn = n.burn, n.thin = n.thin, n.chains = n.chains)
+```
+
+The resulting object `out.lfMsPGOcc` is a list of class `lfMsPGOcc` consisting primarily of posterior samples of all community and species-level parameters, as well as some additional objects that are used for summaries, prediction, and model fit/evaluation. We can display a nice summary of these results using the `summary()` function. When using summary, we can specify the level of parameters we want to summarize. We do this using the argument `level`, which takes values `community`, `species`, or `both` to print results for community-level parameters, species-level parameters, or all parameters. The default value prints a summary for all model parameters. 
+
+```{r}
+summary(out.lfMsPGOcc)
+```
+
+We see the `summary()` function displays the posterior mean, standard deviation, and posterior quantiles (2.5%, 50%, and 97.5%) for a quick summarization of model findings, with all summaries of parameters on the logit scale. Note that all `spOccupancy` `summary()` functions have a `quantiles` argument where you can supply the specific quantiles you want to be displayed in the summary output (by default, this is set to `quantiles = c(0.025, 0.5, 0.975)`). Looking at the community-level parameters, we see there is large variation in average occurrence (i.e., the occurrence intercept) across the study region, and more moderate variation in the effect of elevation on occurrence of the 12 bird species across the region. On average, bird occurrence in the community tends to peak at mid-level elevations (i.e., the community-level quadratic effect of elevation is negative). 
+
+Additionally, `summary()` returns Rhat (the Gelman-Rubin diagnostic; @brooks1998) as well as the effective sample size (ESS) for convergence assessments. Here we see most Rhat values are less than 1.1 and the ESS values are sufficiently large. For a complete analysis, we would run the model for longer to ensure all Rhat values were less than 1.1 and ESS values were sufficiently large. Further, we can use the `coda::plot()` function to plot traceplots of the individual model parameters that are contained in the resulting `lfMsPGOcc` object. All posterior samples are stored in objects that end in "samples" in the resulting `out.lfMsPGOcc` object. 
+
+```{r, fig.width = 5, fig.height = 5, fig.align = 'center', units = 'in'}
+# Check out traceplot of the community-level occurrence means. 
+plot(out.lfMsPGOcc$beta.comm.samples, density = FALSE)
+```
+
+The `summary()` function does not present any information on the latent factor loadings or latent factors, but the full posterior samples are available in the `lambda.samples` and `w.samples` tags in the `out.lfMsPGOcc` object, respectively. Below we display the posterior summaries of the latent factor loadings.  
+
+```{r}
+summary(out.lfMsPGOcc$lambda.samples)
+```
+
+The latent factor loadings and latent factors can provide information on the additional environmental drivers of species occurrence patterns, and what species are respondingly similarly to these environmental gradients. See Figure 2 in Doser, Finley, Banerjee (2022) for an example using North American Breeding Bird Survey data. 
+
+As previously discussed, determining the number of factors to include in the model is not straightforward. However, looking at the posterior summaries of the latent factor loadings can provide information on how many factors are necessary for the given data set. In particular, we can look at the posterior mean or median of the latent factor loadings for each factor. If the factor loadings for all species are very close to zero for a given factor, that suggests that factor is not an important driver of species-specific occurrence across space, and thus you may consider removing it from the model. Additionally, we can look at the 95% credible intervals, and if the 95% credible intervals for the factor loadings of all species for a specific factor all contain zero this is further support to reduce the number of factors in the model. In our HBEF example, we see the factor loadings for the second factor for all species are very close to zero, and zero is contained in all 95% credible intervals. On the other hand, the estimated factor loadings for the first factor range from significantly positive to significantly negative values for different species, indicating it is an important driver of occurrence across space. Given these findings, we refit the model below with only a single latent factor. Note also that we fit the model with default initial values and priors, and when we set `verbose = TRUE` this information is clearly printed out to the screen.
+
+```{r}
+# Use default initial values and priors
+# Approx. run time: 75 seconds
+out.lfMsPGOcc.2 <- lfMsPGOcc(occ.formula = occ.formula, det.formula = det.formula, 
+                             data = hbef.ordered, n.factors = 1, n.samples = n.samples, 
+                             n.omp.threads = 1, verbose = TRUE, n.report = 1000, 
+                             n.burn = n.burn, n.thin = n.thin, n.chains = n.chains)
+```
+
+## Posterior Predictive Checks
+
+The `spOccupancy` function `ppcOcc()` performs a posterior predictive check for all `spOccupancy` model objects as an assessment of Goodness of Fit (GoF). The key idea of GoF testing is that a good model should generate data that closely align with the observed data. If there are large differences in the observed data from the model-generated data, our model is likely not very useful [@hooten2015guide]. We can use the `ppcOcc()` and `summary()` functions to generate a Bayesian p-value as a quick assessment of model fit. A Bayesian p-value that hovers around 0.5 indicates adequate model fit, while values less than 0.1 or greater than 0.9 suggest our model does not fit the data well. `ppcOcc` will return an overall Bayesian p-value for the entire community, as well as an individual Bayesian p-value for each species. See [the introductory `spOccupancy` vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting#posterior-predictive-checks) and the `ppcOcc()` help page for additional details. Below we perform a posterior predictive check with the Freeman-Tukey statistic, grouping the data by individual sites. 
+
+```{r}
+# Approx run time: 30 seconds
+ppc.out <- ppcOcc(out.lfMsPGOcc.2, fit.stat = 'freeman-tukey', group = 1)
+# Calculate Bayesian p-values
+summary(ppc.out)
+```
+Here, our overall Bayesian p-value for the full community is close to 0.5, and the individual species Bayesian p-values also indicate adequate model fit. 
+
+## Model Selection using WAIC
+
+The `spOccupancy` function `waicOCC()` calculates the Widely Applicable Information Criterion [@watanabe2010] for all `spOccupancy` fitted model objects. The WAIC is a useful fully Bayesian information criterion that is adequate for comparing a set of hierarchical models and selecting the best-performing model for final analysis (see [the introductory `spOccupancy` vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting#kFold) for further details on WAIC implementation in `spOccupancy`). Smaller values of WAIC indicate a better performing model. Below, we fit a multi-species occupancy model without species correlations using the `msPGOcc()` function, and subsequently compare the model to the model that does account for species correlations. Syntax for the `msPGOcc()` function is identical to that for `lfMsPGOcc()`, with the exception of the `n.factors` argument no longer being included (since species correlations are not accommodated).
+
+```{r}
+# Approx run time: 71 seconds
+out.msPGOcc <- msPGOcc(occ.formula = occ.formula, det.formula = det.formula, 
+                       data = hbef.ordered, inits = inits, priors = priors, 
+                       n.samples = n.samples, n.omp.threads = 1, 
+                       verbose = TRUE, n.report = 1000, n.burn = n.burn, 
+                       n.thin = n.thin, n.chains = n.chains)
+# Compute WAIC for the the latent factor multi-species occupancy model.
+waicOcc(out.lfMsPGOcc.2)
+# Compute WAIC for the basic multi-species occupancy model.
+waicOcc(out.msPGOcc)
+```
+
+Here, we see the WAIC for the latent factor multi-species occupancy model is substantially lower than the WAIC for the multi-species occupancy model, suggesting that accommodating residual species correlations leads to improved model performance in the foliage-gleaning bird data set. 
+
+## Prediction
+
+Finally, we can use the `predict()` function with all `spOccupancy` model-fitting functions to generate a series of posterior predictive samples at new locations, given a set of covariates and their spatial locations. `spOccupancy` supports prediction of both new occurrence values at a set of spatial locations and as of v0.3.0, `spOccupancy` supports predictions of detection probability over a range of covariate values. 
+
+First, we show how to use `predict()` to create a map of species richness across HBEF. The object `hbefElev` (which comes as part of the `spOccupancy` package) contains elevation data at a 30x30m resolution from the National Elevation Dataset across the entire HBEF. We load the data below.
+
+```{r}
+data(hbefElev)
+str(hbefElev)
+```
+
+The column `val` contains the elevation values, while `Easting` and `Northing` contain the spatial coordinates of the prediction sites. Below we standardize our new elevation values using the mean and standard deviation of the elevation values we used to fit the data, and then predict occurrence for each species across all `r nrow(hbefElev)` spatial locations. The `out.pred` object consists of posterior predictive samples of the latent occurrence probability (`psi.0.samples`) as well as the latent occurrence state (`z.0.samples`). We can calculate species richness as a derived quantity by summing up the latent occurrence states for each species at each MCMC sample. 
+
+```{r, eval = FALSE}
+# Not run (note this takes a large amount of memory to run).
+elev.pred <- (hbefElev$val - mean(hbef2015$occ.covs[, 1])) / sd(hbef2015$occ.covs[, 1])
+# Order: intercept, elevation (linear), elevation (quadratic)
+X.0 <- cbind(1, elev.pred, elev.pred^2) 
+# Spatial coordinates
+coords.0 <- as.matrix(hbefElev[, c('Easting', 'Northing')])
+# type = 'occupancy' specifies prediction of occupancy (or occurrence). 
+# This is also the default.
+# Approximate run time: 30 sec
+out.pred <- predict(out.lfMsPGOcc, X.0, coords.0, type = 'occupancy')
+str(out.pred)
+# Species richness samples
+rich.pred <- apply(out.pred$z.0.samples, c(1, 3), sum)
+plot.dat <- data.frame(x = hbefElev$Easting, 
+                       y = hbefElev$Northing, 
+                       rich.mean = apply(rich.pred, 2, mean), 
+                       rich.sd = apply(rich.pred, 2, sd))
+# Plot species richness of the foliage-gleaning bird community
+# across the Hubbard Brook Experimental Forest
+dat.stars <- st_as_stars(plot.dat, dims = c('x', 'y'))
+ggplot() + 
+  geom_stars(data = dat.stars, aes(x = x, y = y, fill = rich.mean)) +
+  scale_fill_viridis_c(na.value = 'transparent') +
+  labs(x = 'Easting', y = 'Northing', fill = '', 
+       title = 'Mean Species Richness') +
+  theme_bw()
+```
+
+Note that when predicting new values using a latent factor multi-species occupancy model (or a latent factor joint species distribution model as we will see with `lfJSDM()`), we make predictions at both sampled and non-sampled locations using the latent factors. At sampled locations, we directly use the posterior samples from the model fit in the prediction algorithm, which will generally lead to improved predictions at the sampled sites compared to a multi-species model that does not account for species correlations and incorporate these factors. At non-sampled sites, we do not know the value of the latent factors, and so we simply draw random values from a standard normal distribution at each iteration of the posterior predictive sampling algorithm. Because these values are drawn form a normal distribution with a mean of 0, including these predictions of the latent factors at new sites will not change the overall mean estimate of occurrence probability at that site, but it will account for the uncertainty we have in the latent factor values, and thus will fully propagate uncertainty from our model fit to the resulting predictions.  
+
+Finally, we can generate predicted values of detection probability across a range of covariate values to generate a marginal response curve for each individual species across any given covariate value. Below we predict and plot the relationships between detection probability and time of day for each of the 12 species, while holding the day of the year at it's mean value (0).
+
+```{r}
+# Minimum observed time of day
+min.tod <- min(hbef2015$det.covs$tod, na.rm = TRUE)
+# Maximum
+max.tod <- max(hbef2015$det.covs$tod, na.rm = TRUE)
+# Generate 100 time of day values between the observed range
+J.0 <- 100
+tod.pred.vals <- seq(from = min.tod, to = max.tod, length.out = J.0)
+# Standardize the new values by the mean and sd of the data
+# used to fit the model. 
+mean.tod <- mean(hbef2015$det.covs$tod, na.rm = TRUE)
+sd.tod <- sd(hbef2015$det.covs$tod, na.rm = TRUE)
+tod.stand <- (tod.pred.vals - mean.tod) / sd.tod
+# Generate covariate matrix for prediction
+X.p.0 <- cbind(1, 0, tod.stand, 0)
+colnames(X.p.0) <- c('intercept', 'day', 'tod', 'day2')
+out.det.pred <- predict(out.lfMsPGOcc, X.p.0, type = 'detection')
+str(out.det.pred)
+```
+
+The `p.0.samples` tag in the `out.det.pred` object consists of the posterior predictive samples of detection probability for each species across the 100 generated time of day values. We finally create a marginal response curve for each species using `ggplot2`. 
+
+```{r, fig.width = 7, fig.height = 5, fig.align = 'center', units = 'in'}
+# Extract the means from the posterior samples and convert to vector
+p.0.ests <- c(apply(out.det.pred$p.0.samples, c(2, 3), mean))
+p.plot.dat <- data.frame(det.prob = p.0.ests, 
+                         sp = rep(sp.names, J.0), 
+                         tod = rep(tod.pred.vals, each = N))
+ggplot(p.plot.dat, aes(x = tod, y = det.prob)) + 
+  geom_line() + 
+  theme_bw() + 
+  scale_y_continuous(limits = c(0, 1)) + 
+  facet_wrap(vars(sp)) + 
+  labs(x = 'Time of day (min since sunrise)', y = 'Detection Probability') 
+```
+
+The relatively flat lines here for most species indicates that detection probability does not vary to a large extent across the time of day range that is sampled in the data, although there is some apparent variability in the effect across species (e.g., BAWW vs. MAWA). 
+
+# Spatial factor multi-species occupancy models
+
+## Basic model description
+
+While the latent factor multi-species occupancy model accounts for species correlations and imperfect detection, it fails to address spatial autocorrelation. The spatial factor multi-species occupancy model is identical to the latent factor multi-species occupancy model (`lfMsPGOcc()`), except the latent factors are now assumed to arise from a spatial process rather than a standard normal distribution, which accounts for spatial autocorrelation in latent species occurrence. More specifically, each latent factor (now called a spatial factor) $\textbf{w}_r$ for each $r = 1, \dots, q$ is modeled using a Nearest Neighbor Gaussian Process [@datta2016hierarchical], i.e., 
+
+\begin{equation}
+    \text{w}_r(\bm{s}_j) \sim N(\bm{0}, \tilde{\bm{C}}_r(\bm{\theta}_r)),
+\end{equation}
+
+where $\tilde{\bm{C}}_r(\bm{\theta}_r)$ is the NNGP-derived covariance matrix for the $r^{\text{th}}$ spatial factor. The vector $\bm{\theta}_r$ consists of parameters governing the spatial process according to a spatial correlation function [@banerjee2014hierarchical]. `spOccupancy` implements four spatial correlation functions: exponential, spherical, Gaussian, and Matern. For the exponential, spherical, and Gaussian functions, $\bm{\theta}_r$ includes a spatial variance parameter, $\sigma^2_r$, and a spatial range parameter, $\phi_r$, while the Matern correlation function includes an additional spatial smoothness parameter, $\nu_r$. 
+
+We assume the same priors and identifiability constraints as the latent factor multi-species occupancy model. We assign a uniform prior the spatial range parameters, $\phi_r$, and the spatial smoothness parameters, $\nu_r$, if using a Matern correlation function. 
+
+## Fitting spatial factor multi-species occupancy models with `sfMsPGOcc`
+
+The function `sfMsPGOcc()` fits spatial factor multi-species occupancy models using P&oacute;lya-Gamma data augmentation. `sfMsPGOcc()` has the following arguments: 
+
+```{r, eval = FALSE}
+sfMsPGOcc(occ.formula, det.formula, data, inits, priors, 
+          tuning, cov.model = 'exponential', NNGP = TRUE, 
+          n.neighbors = 15, search.type = "cb", n.factors, 
+          n.batch, batch.length, accept.rate = 0.43,
+          n.omp.threads = 1, verbose = TRUE, n.report = 100, 
+          n.burn = round(.10 * n.batch * batch.length), 
+          n.thin = 1, n.chains = 1, k.fold, k.fold.threads = 1, 
+          k.fold.seed = 100, ...){
+```
+
+We will walk through each of the arguments to `sfMsPGOcc()` in the context of our HBEF example. The occurrence (`occ.formula`) and detection (`det.formula`) formulas, as well as the list of data (`data`) follow the same form as we saw in `lfMsPGOcc()`. We will specify these again below for clarity.
+
+```{r}
+occ.formula <- ~ scale(Elevation) + I(scale(Elevation)^2)
+det.formula <- ~ scale(day) + scale(tod) + I(scale(day)^2)
+# Remind ourselves what the data look like
+str(hbef.ordered)
+```
+
+Following our findings from using `lfMsPGOcc()`, we will use 1 latent spatial factor. 
+
+```{r}
+n.factors <- 1
+```
+
+We will next specify the initial values in the `inits` argument. Just as before, this argument is optional as `spOccupancy` will by default set the initial values based on the prior distributions. Valid tags for initial values include all the parameters described for the latent factor multi-species occupancy model using `lfMsPGOcc()` as well as parameters associated with the spatial latent processes. These include: `phi` (the spatial range parameter) and `nu` (the spatial smoothness parameter), where the latter is only specified if adopting a Matern covariance function (i.e., `cov.model = 'matern'`). `spOccupancy` supports four spatial covariance models (`exponential`, `spherical`, `gaussian`, and `matern`), which are specified in the `cov.model` argument. Here we will use an exponential correlation function.  When using an exponential correlation function, $\frac{3}{\phi}$ is the effective range, or the distance at which the residual spatial correlation between two sites drops to 0.05 [@banerjee2014hierarchical]. As an initial value for the spatial range parameter `phi`, we compute the mean distance between points in HBEF and then set it equal to 3 divided by this mean distance. Thus, our initial guess for the effective range is the average distance between sites across HBEF. We will set all other initial values to the same values we used for `lfMsPGOcc()`. 
+
+```{r}
+# Pair-wise distance between all sites
+dist.hbef <- dist(hbef.ordered$coords)
+# Exponential correlation model
+cov.model <- "exponential"
+# Specify all other initial values identical to lfMsPGOcc() from before
+# Number of species
+N <- nrow(hbef.ordered$y)
+# Initiate all lambda initial values to 0. 
+lambda.inits <- matrix(0, N, n.factors)
+# Set diagonal elements to 1
+diag(lambda.inits) <- 1
+# Set lower triangular elements to random values from a standard normal dist
+lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
+# Check it out
+lambda.inits
+# Create list of initial values. 
+inits <- list(alpha.comm = 0,
+              beta.comm = 0,
+              beta = 0,
+              alpha = 0,
+              tau.sq.beta = 1,
+              tau.sq.alpha = 1,
+              lambda = lambda.inits, 
+              phi = 3 / mean(dist.hbef),
+              z = apply(hbef.ordered$y, c(1, 2), max, na.rm = TRUE))
+```
+
+The next three arguments (`n.batch`, `batch.length`, and `accept.rate`) are all related to the Adaptive MCMC sampler used when we fit the model. Updates for the spatial range parameter (and the smoothness parameter if `cov.model = 'matern'`) require the use of a Metropolis-Hastings algorithm. We implement an adaptive Metropolis-Hastings algorithm as discussed in @roberts2009examples. This algorithm adjusts the tuning values for each parameter that requires a Metropolis-Hastings update within the sampler itself. This process results in a more efficient sampler than if we were to fix the tuning parameters prior to fitting the model. The parameter `accept.rate` is the target acceptance rate for each parameter, and the algorithm will adjust the tuning parameters to hover around this value. The default value is 0.43, which we suggest leaving as is unless you have a good reason to change it. The tuning parameters are updated after a single "batch". In `lfMsPGOcc()`, we specified an `n.samples` argument which consisted of the total number of samples to run each chain of the MCMC. For `sfMsPGOcc()` (and all spatially-explicit models in `spOccupancy`), we break up the total number of MCMC samples into a set of "batches", where each batch has a specific number of samples. We must specify both the total number of batches (`n.batch`) as well as the number of MCMC samples each batch contains (`batch.length`). Thus, the total number of MCMC samples is `n.batch * batch.length`. Typically, we set `batch.length = 25` and then play around with `n.batch` until convergence is reached. We recommend keeping this at 25 unless you have a specific reason to change it. Here we set `n.batch = 200` for a total of 5000 MCMC samples in each of 3 chains. We will additionally specify a burn-in period of length 3000 and a thinning rate of 2. Importantly, we also need to specify an initial value for the tuning parameters for the spatial decay and smoothness parameters (if applicable). These values are supplied as input in the form of a list with tags `phi` and `nu`. The initial tuning value can be any value greater than 0, but we recommend starting the value out around 0.5. After some initial runs of the model, if you notice the final acceptance rate of a parameter is much larger or smaller than the target acceptance rate (`accept.rate`), you can then change the initial tuning value to get closer to the target rate. Here we set the initial tuning value for `phi` to 1 after some initial exploratory runs of the model.
+
+```{r}
+batch.length <- 25
+n.batch <- 200
+n.burn <- 3000
+n.thin <- 2
+n.chains <- 3
+```
+
+Priors are again specified in a list in the `priors` argument. We assume uniform priors for the spatial decay parameter `phi` and smoothness parameter `nu` (if using the Matern correlation function), with the associated tags `phi.unif` and `nu.unif`. The lower and upper bounds of the uniform distribution are passed as a two-element vector for the uniform priors.
+
+Here we use an exponential correlation function, so we only need to specify priors for the spatial decay parameter `phi` for each of the spatial factors (which in this case is just 1). We recommend determining the bounds of the uniform distribution by computing the smallest distance between sites and the largest distance between sites in the observed data set. We then set the lower bound of the uniform to `3/max` and the upper bound to `3/min`, where `min` and `max` correspond to the predetermined distances between sites. This equates to a vague prior that states that spatial autocorrelation in the spatial factors could only be between sites that are very close to each other, or could span across the entire observed study area. We recommend using these bounds for the prior unless you have prior information about the range of the spatial autocorrelation. The remaining priors are identical to what we saw in `lfMsPGOcc()`. We use the same priors for all other parameters as those we used for `lfMsPGOcc()`. 
+
+```{r}
+min.dist <- min(dist.hbef)
+max.dist <- max(dist.hbef)
+priors <- list(beta.comm.normal = list(mean = 0, var = 2.72),
+               alpha.comm.normal = list(mean = 0, var = 2.72),
+               tau.sq.beta.ig = list(a = 0.1, b = 0.1),
+               tau.sq.alpha.ig = list(a = 0.1, b = 0.1), 
+               phi.unif = list(3 / max.dist, 3 / min.dist))
+```
+
+Importantly, we also need to specify an initial value for the tuning parameters for the spatial decay and smoothness parameters (if applicable). These values are supplied as input in the form of a list with tags `phi` and `nu`. The initial tuning value can be any value greater than 0, but we recommend starting the value out around 0.5. After some initial runs of the model, if you notice the final acceptance rate of a parameter is much larger or smaller than the target acceptance rate (`accept.rate`), you can then change the initial tuning value to get closer to the target rate. Here we set the initial tuning value for `phi` to 1 after some initial exploratory runs of the model.
+
+```{r}
+tuning <- list(phi = 1)
+```
+
+The argument `n.omp.threads` specifies the number of threads to use for within-chain parallelization, which can greatly decrease run time for spatially-explicit models [@finley2020spnngp], while `verbose` specifies whether or not to print the progress of the sampler. We *highly* recommend setting `verbose = TRUE` for all spatial models to ensure the adaptive MCMC is working as you want (and this is the reason for why this is the default for this argument). The argument `n.report` specifies the interval to report the Metropolis-Hastings sampler acceptance rate. Note that `n.report` is specified in terms of batches, not the overall number of samples. Below we set `n.report = 50`, which will result in information on the acceptance rate and tuning parameters every 50th batch (not sample). Ideally, you should see the printed acceptance rate values however around the value supplied to the `accept.rate` argument (which by default is 0.43). If by the end of the MCMC run you see the values are well below the target acceptance rate, we recommend rerunning the model with a larger initial tuning parameter (higher than the final reported value in the displayed output of model progress). If you see the values are well above the target acceptance rate, we recommend rerunning the model with a smaller initial tuning parameter (smaller than the final reported value). 
+
+```{r}
+n.omp.threads <- 1
+verbose <- TRUE
+n.report <- 50 # Report progress at every 50th batch.
+```
+
+The parameters `NNGP`, `n.neighbors`, and `search.type` relate to whether or not you want to fit the model with a Gaussian Process or with NNGP, which is a much more computationally efficient approximation. The argument `NNGP` is a logical value indicating whether to fit the model with an NNGP (`TRUE`) or a regular Gaussian Process (`FALSE`). Currently, `sfMsPGOcc()` only supports NNGP models, so you will receive an error message if you set `NNGP = FALSE` that tells you to switch to `NNGP = TRUE`. We plan to implement these models with a full Gaussian Process in future development. The argument `n.neighbors` and `search.type` specify the number of neighbors used in the NNGP and the nearest neighbor search algorithm, respectively, to use for the NNGP model. Generally, the default values of these arguments will be adequate. @datta2016hierarchical showed that setting `n.neighbors = 15` is usually sufficient, although for certain data sets a good approximation can be achieved with as few as five neighbors, which could substantially decrease run time for the model. We generally recommend leaving `search.type = "cb"`, as this results in a fast code book nearest neighbor search algorithm. However, details on when you may want to change this are described in @finley2020spnngp. We will run an NNGP model using the default value for `search.type` and setting `n.neighbors = 5`. Here we choose 5 neighbors because we found in @doser2021spoccupancy that estimates from a spatial multi-species occupancy model for this data set were relatively robust to the number of neighbors in the model. Generally, we recommend using the default value of `n.neighbors = 15`, and if additional decreases in computation time are desired, you can fit the model with `n.neighbors = 5` and compare their performance using WAIC and/or k-fold cross-validation. 
+
+We now fit the model using `sfMsPGOcc()` and summarize the results using `summary()`. 
+
+```{r}
+# Approx run time: 2 min
+out.sfMsPGOcc <- sfMsPGOcc(occ.formula = occ.formula, 
+                           det.formula = det.formula, 
+                           data = hbef.ordered, 
+                           inits = inits, 
+                           n.batch = n.batch, 
+                           batch.length = batch.length, 
+                           accept.rate = 0.43, 
+                           priors = priors, 
+                           n.factors = n.factors,
+                           cov.model = cov.model, 
+                           tuning = tuning, 
+                           n.omp.threads = n.omp.threads, 
+                           verbose = TRUE, 
+                           NNGP = TRUE, 
+                           n.neighbors = 5, 
+                           n.report = n.report, 
+                           n.burn = n.burn, 
+                           n.thin = n.thin, 
+                           n.chains = n.chains)
+# Take a look at the resulting object
+names(out.sfMsPGOcc)
+summary(out.sfMsPGOcc)
+```
+
+We see pretty adequate convergence and effective sample sizes for the parameters, although we certainly would run the model longer for a full analysis to ensure all Rhat values are less than 1.1. If we compare the community-level parameters from `sfMsPGOcc()` with those from `lfMsPGOcc()`, we see their is a fair amount of correspondence between the two models. 
+
+Next we summarize the spatial factor loadings
+
+```{r}
+summary(out.sfMsPGOcc$lambda.samples)
+```
+
+Here we see variable responses to the latent spatial factor. In particular, we see that common species (OVEN, BTBW, BTNW, REVI) that occur at low-mid level elevations have a positive coefficent, while more rare species (MAWA, NAWA) have negative values of the coefficient. As we did in Figure 2 of Doser, Finley, Banerjee (2022) for the Breeding Bird Survey data, we could plot the factor loadings and the estimated spatial factor side-by-side to better understand what these effects mean for the specific community of interest. 
+
+## Posterior predictive checks
+
+Analogous to `lfMsPGOcc()`, we can perform a posterior predictive check using `ppcOcc()`. 
+
+```{r}
+# Takes a few seconds to run. 
+ppc.occ.out <- ppcOcc(out.sfMsPGOcc, 'freeman-tukey', group = 2)
+summary(ppc.occ.out)
+```
+
+## Model selection using WAIC
+
+Below we compute the WAIC using `waicOcc()` and compare it to the WAIC for the non-spatial multi-species occupancy model. 
+
+```{r}
+waicOcc(out.sfMsPGOcc)
+waicOcc(out.lfMsPGOcc.2)
+```
+
+As always, remember there is Monte Carlo error in these numbers, and so the values you receive will be slightly different if you run this on your own machine. The WAIC for the spatial factor model is much smaller than the WAIC for the latent factor model, suggesting that accounting for spatial autocorrelation improves model fit. However, in a complete analysis we should ensure the models fully converge (and we have adequate ESS) before performing any model selection or comparison. 
+
+## Prediction
+
+Finally, we can use the `predict()` function as we saw with `lfMsPGOcc()` to predict new occurrence or detection probability values given a set of covariates and spatial locations.  
+
+Below, we provide code to produce a map of species richness across HBEF, which is exactly analogous to our code for `lfMsPGOcc()`. 
+
+```{r, eval = FALSE}
+# Not run (note this takes a large amount of memory to run).
+data(hbefElev)
+str(hbefElev)
+elev.pred <- (hbefElev$val - mean(hbef.ordered$occ.covs[, 1])) / sd(hbef.ordered$occ.covs[, 1])
+# Order: intercept, elevation (linear), elevation (quadratic)
+X.0 <- cbind(1, elev.pred, elev.pred^2) 
+# Spatial coordinates
+coords.0 <- as.matrix(hbefElev[, c('Easting', 'Northing')])
+# type = 'occupancy' specified prediction of occupancy (or occurrence). 
+# This is also the default.
+# Approximate run time: 30 sec
+out.pred <- predict(out.sfMsPGOcc, X.0, coords.0, type = 'occupancy')
+str(out.pred)
+# Species richness samples
+rich.pred <- apply(out.pred$z.0.samples, c(1, 3), sum)
+plot.dat <- data.frame(x = hbefElev$Easting, 
+                       y = hbefElev$Northing, 
+                       rich.mean = apply(rich.pred, 2, mean), 
+                       rich.sd = apply(rich.pred, 2, sd))
+# Plot species richness of the foliage-gleaning bird community
+# across the Hubbard Brook Experimental Forest
+dat.stars <- st_as_stars(plot.dat, dims = c('x', 'y'))
+ggplot() + 
+  geom_stars(data = dat.stars, aes(x = x, y = y, fill = rich.mean)) +
+  scale_fill_viridis_c(na.value = 'transparent') +
+  labs(x = 'Easting', y = 'Northing', fill = '', 
+       title = 'Mean Species Richness') +
+  theme_bw()
+```
+
+Additionally, we can generate predicted values of detection probability across a range of covariate values to generate a marginal response curve for each individual species across any given covariate value. As we saw with `lfMsPGOcc()`, below we predict and plot the relationships between detection probability and time of day for each of the 12 species, while holding the day of the year at it's mean value (0).
+
+```{r, fig.width = 7, fig.height = 5, fig.align = 'center', units = 'in'}
+# Minimum observed time of day
+min.tod <- min(hbef2015$det.covs$tod, na.rm = TRUE)
+# Maximum
+max.tod <- max(hbef2015$det.covs$tod, na.rm = TRUE)
+# Generate 100 time of day values between the observed range
+J.0 <- 100
+tod.pred.vals <- seq(from = min.tod, to = max.tod, length.out = J.0)
+mean.tod <- mean(hbef2015$det.covs$tod, na.rm = TRUE)
+sd.tod <- sd(hbef2015$det.covs$tod, na.rm = TRUE)
+tod.stand <- (tod.pred.vals - mean.tod) / sd.tod
+# Generate covariate matrix for prediction
+X.p.0 <- cbind(1, 0, tod.stand, 0)
+colnames(X.p.0) <- c('intercept', 'day', 'tod', 'day2')
+out.det.pred <- predict(out.sfMsPGOcc, X.p.0, type = 'detection')
+str(out.det.pred)
+# Extract the means from the posterior samples and convert to vector
+p.0.ests <- c(apply(out.det.pred$p.0.samples, c(2, 3), mean))
+p.plot.dat <- data.frame(det.prob = p.0.ests, 
+                         sp = rep(sp.names, J.0), 
+                         tod = rep(tod.pred.vals, each = N))
+ggplot(p.plot.dat, aes(x = tod, y = det.prob)) + 
+  geom_line() + 
+  theme_bw() + 
+  scale_y_continuous(limits = c(0, 1)) + 
+  facet_wrap(vars(sp)) + 
+  labs(x = 'Time of day (min since sunrise)', y = 'Detection Probability') 
+```
+
+# Spatial factor joint species distribution models
+
+The `spOccupancy` function `sfJSDM()` fits a spatial factor joint species distribution model. The spatial factor JSDM is a joint species distribution model that ignores imperfect detection, but accounts for species residual correlations and spatial autocorrelation. As in the spatial factor multi-species occupancy model, we account for species correlations using a spatial factor model, where the spatial factors arise from $q$ independent NNGPs. This model is very similar to the NNGP model of @tikhonov2020joint, with the only differences being in the prior distributions and the identifiability constraints placed on the spatial factor loadings matrix. 
+
+While `sfJSDM()` (and it's non-spatial counterpart `lfJSDM()`) are not occupancy models since they do not account for imperfect detection, we included them in `spOccupancy` to allow for direct comparison of these traditional JSDMs (which historically have not accounted for imperfect detection) with the JSDMs with imperfect detection fit by `lfMSPGOcc()` and `sfMsPGOcc()`. We hope inclusion of these functions, together with `lfMsPGOcc()`, `sfMsPGOcc()`, and the multi-species occupancy models that do not account for species correlations (`msPGOcc()` and `spMsPGOcc()`), will provide users and practitioners with practical tools to assess whether or not they need to account for species correlations, imperfect detection, and/or spatial autocorrelation in their specific data sets. 
+
+## Basic model description
+
+Because this model does not account for imperfect detection, we eliminate the detection sub-model and rather directly model a simplifieid version of the replicated detection-nondetection data. Define $y^*_{i, j} = I(\sum_{k = 1}^{K_j}y_{i, j, k} > 0)$, with $I(\cdot)$ an indicator function denoting whether or not species $i$ was detected during at least one of the $K_j$ replicate surveys at site $j$. Note that this model does not require there to be more than one replicate survey at any location (since we do not account for imperfect detection), and thus may be fit to data where `lfMsPGOcc()` and `sfMsPGOcc()` will not provide reliable estimates. However, this comes at the cost of not explicitly accounting for imperfect detection, and thus we need to interpret all covariate effects as effects on a confounded process of detection and occurrence rather than explicitly separating the two as we have seen in the previous two models. The model description of the spatial factor joint species distribution model is identical to the occurrence model of the spatial factor multi-species occupancy model, except we replace the latent occurrence $z_{i, j}$ with the observed data $y^*_{i, j}$. This model can be thought of as a Generalized Linear Mixed Model with a binary response and spatial random effects that are modeled using a spatial factor approach.
+
+## Fitting spatial factor joint species distribution models with `sfJSDM`
+
+The function `sfJSDM()` fits spatial factor joint species distribution models using P&oacute;lya-Gamma data augmentation. `sfJSDM()` has the following arguments: 
+
+```{r, eval = FALSE}
+sfJSDM(formula, data, inits, priors, tuning, 
+       cov.model = 'exponential', NNGP = TRUE, 
+       n.neighbors = 15, search.type = 'cb', n.factors, n.batch, 
+       batch.length, accept.rate = 0.43, n.omp.threads = 1, 
+       verbose = TRUE, n.report = 100, 
+       n.burn = round(.10 * n.batch * batch.length), n.thin = 1, 
+       n.chains = 1, k.fold, k.fold.threads = 1, k.fold.seed, ...)
+```
+
+Notice the similarity between the arguments for `sfJSDM()` and `sfMsPGOcc()`. The main differences when fitting JSDMs in `spOccupancy` is that there is now only a single formula that is specified in the model (`formula`), which is where we specify all covariate effects we think influences our observations. Let's walk through the arguments in the context of our HBEF example. The `data` argument for JSDMs in `spOccupancy` has three required elements: `y` (the collapsed detection-nondetection data), `covs` (the covariates), and `coords` (spatial coordinates of sites). `y` is a matrix with rows corresponding to species and columns corresponding to sites. `covs` is a matrix or data frame with site-specific covariate values. `coords` is a matrix of spatial coordinates that is exactly the same as we have seen before. For our example, we need to collapse the replicated detection-nondetection data into the required format for JSDMs. We can do this using our good friend the `apply()` function.
+
+```{r}
+# Form collapsed detection-nondetection data
+y.star <- apply(hbef.ordered$y, c(1, 2), max, na.rm = TRUE)
+str(y.star)
+```
+
+Notice in the code above we set the value for each site $j$ and each species $i$ to the maximum value observed across the 3 replicate surveys, which is equivalent to setting the value to 1 if the species was observed at that site and 0 if not. Next we specify our covariates. Because we have eliminated the replicate surveys, we can only include site-level covariates in our formula. In the context of our example, this means we cannot include the `day` or `tod` covariates that we placed on the detection portion of our occupancy models, as these covariates vary across the replicate surveys. Instead, we will simply include elevation in our model (which is what we used for modeling latent occurrence in our occupancy models). 
+
+```{r}
+covs <- hbef.ordered$occ.covs
+str(covs)
+```
+
+We finally can just grab the spatial coordinates we used in our occupancy models, and then combine all three elements into a list that we will use to fit the JSDM. 
+
+```{r}
+# Grab spatial coordinates of the sites
+coords <- hbef.ordered$coords
+# Put it all together in a list
+jsdm.list <- list(y = y.star, 
+                  covs = covs, 
+                  coords = coords)
+str(jsdm.list)
+```
+
+We next specify the covariates we wish to include in the model with `formula`, as well as the number of latent spatial factors to include in the model. We will include linear and quadratic elevation as covariates in the model, and fit the model with a single spatial factor. 
+
+```{r}
+jsdm.formula <- ~ scale(Elevation) + I(scale(Elevation)^2)
+n.factors <- 1
+```
+
+The remaining arguments are all identical to those we saw with the spatial factor multi-species occupancy model using `sfMsPGOcc()`. We specify values for all additional arguments below. See the section on "Fitting models with `sfMsPGOcc()`" for specific details on each of these arguments. Note that for the initial values and priors, we only need to specify these values for the "occurrence" values since there is no detection sub-model. Further, we do not need to specify initial values for the latent occurrence values $z$ (since there aren't any). 
+
+```{r}
+# Initial values ----------------------
+# Pair-wise distance between all sites
+dist.hbef <- dist(hbef.ordered$coords)
+# Exponential correlation model
+cov.model <- "exponential"
+# Specify all other initial values identical to sfMsPGOcc() from before
+# Number of species
+N <- nrow(jsdm.list$y)
+# Initiate all lambda initial values to 0. 
+lambda.inits <- matrix(0, N, n.factors)
+# Set diagonal elements to 1
+diag(lambda.inits) <- 1
+# Set lower triangular elements to random values from a standard normal dist
+lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
+# Check it out
+lambda.inits
+# Create list of initial values. 
+inits <- list(beta.comm = 0,
+              beta = 0,
+              tau.sq.beta = 1,
+              lambda = lambda.inits, 
+              phi = 3 / mean(dist.hbef))
+# Priors ------------------------------
+min.dist <- min(dist.hbef)
+max.dist <- max(dist.hbef)
+priors <- list(beta.comm.normal = list(mean = 0, var = 2.72),
+               tau.sq.beta.ig = list(a = 0.1, b = 0.1),
+               phi.unif = list(3 / max.dist, 3 / min.dist))
+# Additional arguments ----------------
+batch.length <- 25
+n.batch <- 200
+n.burn <- 3000
+n.thin <- 2
+n.chains <- 3
+tuning <- list(phi = 1)
+n.omp.threads <- 1
+verbose <- TRUE
+n.report <- 50 # Report progress at every 50th batch.
+```
+
+We are now ready to run the model. We run the model for 200 batches of length 25 (a total of 5000 MCMC samples), discarding the first 3000 as burn-in and thinning by every 2 samples. We fit the model with 5 nearest neighbors. 
+
+```{r}
+# Approx run time: 1.25 min
+out.sfJSDM <- sfJSDM(formula = jsdm.formula, 
+                     data = jsdm.list, 
+                     inits = inits, 
+                     n.batch = n.batch, 
+                     batch.length = batch.length, 
+                     accept.rate = 0.43, 
+                     priors = priors, 
+                     n.factors = n.factors,
+                     cov.model = cov.model, 
+                     tuning = tuning, 
+                     n.omp.threads = n.omp.threads, 
+                     verbose = TRUE, 
+                     NNGP = TRUE, 
+                     n.neighbors = 5, 
+                     n.report = n.report, 
+                     n.burn = n.burn, 
+                     n.thin = n.thin, 
+                     n.chains = n.chains)
+# Take a look at the resulting object
+names(out.sfJSDM)
+summary(out.sfJSDM)
+```
+
+Notice the difference in run time between `sfJSDM()` and `sfMsPGOcc()`. The run time for `sfJSDM()` is almost half that of `sfMsPGOcc()`. This is a nice benefit of models that don't account for imperfect detection, but of course it comes with big sacrifices. Looking at the above output, we see all parameters have converged. 
+
+## Model selection using WAIC
+
+We can compute the WAIC for spatial factor JSDMs using the `waicOcc()` function just as we have seen previously. 
+
+```{r}
+waicOcc(out.sfJSDM)
+```
+
+However, the WAIC values for JSDM models are **not** directly comparable to those from `lfMsPGOcc()` or `sfMsPGOcc()` because they do not use the same data (JSDMs use a collapsed form of the replicated detection-nondetection data used in the occupancy models). This makes comparisons between models that do and do not account for imperfect detection a bit tricky. See Doser, Finley, Banerjee (2022) for one cross-validation approach to comparing predictive performance of JSDMs and occupancy models. While k-fold cross-validation is implemented for JSDMs in `spOccupancy` and all other model-fitting functions, we have yet to implement the specific approach we used in Doser, Finley, Banerjee (2022) to directly compare predictive performance JSDMs and occupancy models. We plan to do this in future development of the package. 
+
+For now, we can do some visual comparisons of the predicted occurrence probabilities from the spatial factor JSDM and the spatial factor multi-species occupancy model. 
+
+```{r, fig.width = 5, fig.height = 5, fig.align = 'center', units = 'in'}
+# Extract mean occurrence probabilities for each species from sfMsPGOcc
+psi.mean.sfMsPGOcc <- apply(out.sfMsPGOcc$psi.samples, c(2, 3), mean)
+# Extract mean occurrence probabilities for each species from sfJSDM
+psi.mean.sfJSDM <- apply(out.sfJSDM$psi.samples, c(2, 3), mean)
+# Plot results for the Red-eyed Vireo (REVI)
+curr.sp <- which(sp.ordered == 'REVI')
+# Color the points blue if sfJSDM > sfMsPGOcc, red otherwise
+my.col <- ifelse(psi.mean.sfMsPGOcc[curr.sp, ] > psi.mean.sfJSDM[curr.sp, ], 
+		 'lightsalmon', 'lightskyblue1')
+plot(psi.mean.sfMsPGOcc[curr.sp, ], psi.mean.sfJSDM[curr.sp, ], pch = 21,
+     bg = my.col, xlab = 'sfMsPGOcc', ylab = 'sfJSDM', main = 'Red-eyed Vireo', 
+     ylim = c(0, 1), xlim = c(0, 1))
+abline(0, 1)
+```
+
+Not surprisingly, most estimates of occurrence probability are smaller for the model that does not account for imperfect detection (`sfJSDM()`). However, the differences here are not very large for this species, which is exactly what we would expect for a fairly common species. Let's take a look at the results for a more rare species (CAWA (Canda Warbler)). 
+
+```{r, fig.width = 5, fig.height = 5, fig.align = 'center', units = 'in'}
+curr.sp <- which(sp.ordered == 'CAWA')
+# Color the points blue if sfJSDM > sfMsPGOcc, red otherwise
+my.col <- ifelse(psi.mean.sfMsPGOcc[curr.sp, ] > psi.mean.sfJSDM[curr.sp, ], 
+		 'lightsalmon', 'lightskyblue1')
+plot(psi.mean.sfMsPGOcc[curr.sp, ], psi.mean.sfJSDM[curr.sp, ], pch = 21,
+     bg = my.col, xlab = 'sfMsPGOcc', ylab = 'sfJSDM', main = 'Canada Warbler', 
+     ylim = c(0, 1), xlim = c(0, 1))
+abline(0, 1)
+```
+
+Here we see a clear discrepancy between the occurrence probability estimates from models that do and do not account for imperfect detection. 
+
+## Prediction
+
+We can use the `predict()` function to generate new predictions of "occurrence" probability values given a set of covariates and spatial locations. Note that these predictions are estimates of a confounded probability of occurrence and detection (hence the quotations around occurrence throughout this section). This code looks analogous to what we saw with `sfMsPGOcc()`. 
+
+```{r, eval = FALSE}
+# Not run (note this takes a large amount of memory to run).
+data(hbefElev)
+str(hbefElev)
+elev.pred <- (hbefElev$val - mean(hbef.ordered$occ.covs[, 1])) / sd(hbef.ordered$occ.covs[, 1])
+# Order: intercept, elevation (linear), elevation (quadratic)
+X.0 <- cbind(1, elev.pred, elev.pred^2) 
+# Spatial coordinates
+coords.0 <- as.matrix(hbefElev[, c('Easting', 'Northing')])
+# Approximate run time: 30 sec
+out.pred <- predict(out.sfJSDM, X.0, coords.0)
+```
+
+# Latent factor joint species distribution models
+
+The `spOccupancy` function `lfJSDM()` fits a latent factor joint species distribution model. This model is analogous to the latent factor multi-species occupancy model, except it does not account for imperfect detection. Alternatively, the model can be viewed as a non-spatial alternative to `sfJSDM()`. Just as we saw with `lfMsPGOcc()`, we will account for species correlations using a latent factor model, where the latent factors are assumed to arise from independent standard normal distributions. 
+
+## Basic model description
+
+The latent factor joint species distribution model is identical to the spatial factor joint species distribution model fit by `sfJSDM()`, except now the latent factors do not have spatial structure and are modeled using independent standard normal distributions. See previous model descriptions for the spatial factor joint species distribution model as well as the latent factor multi-species occupancy model for additional details. 
+
+## Fitting latent factor joint species distribution models with `lfJSDM`
+
+The function `lfJSDM()` fits latent factor joint species distribution models using P&oacute;lya-Gamma data augmentation. `lfJSDM()` has the following arguments. 
+
+```{r, eval = FALSE}
+lfJSDM(formula, data, inits, priors, n.factors, 
+       n.samples, n.omp.threads = 1, verbose = TRUE, n.report = 100, 
+       n.burn = round(.10 * n.samples), n.thin = 1, n.chains = 1,
+       k.fold, k.fold.threads = 1, k.fold.seed, ...)
+```
+
+There are no new arguments in `lfJSDM()` that we have not seen before in previous factor model functions. Notice that like `sfJSDM()`, there is only a single `formula` since we do not explicitly separate imperfect detection from latent occurrence. Similar to `sfJSDM()`, the `data` list should consist of the collapsed detection-nondetection data matrix (`y`), the covariates (`covs`), and the spatial coordinates (`coords`). We can use the same data list we constructed previously for `sfJSDM()`, and we remind ourselves of it's structure below. 
+
+```{r}
+str(jsdm.list)
+```
+
+Specifying covariates in `lfJSDM()` is exactly analogous to what we saw with `sfJSDM()`. Again, note that we can include random intercepts in `formula` using lme4 syntax [@bates2015]. As we have done before, we will use a single latent factor. 
+
+```{r}
+jsdm.formula <- ~ scale(Elevation) + I(scale(Elevation)^2)
+n.factors <- 1
+```
+
+The remaining arguments are all identical to the arguments we saw in `lfMsPGOcc()` when fitting a latent factor multi-species occupancy model. See the section on "Fitting models with `lfMsPGOcc()`" for specific details on these arguments. Note that for the initial values and priors, we only need to specify these values for the "occurrence" values since there is no detection sub-model. Just like with `sfJSDM()`, we do not need to specify initial values for the latent occurrence values $z$ (since we assume perfect detection). 
+
+```{r}
+# Initial values ----------------------
+# Number of species
+N <- nrow(jsdm.list$y)
+# Initiate all lambda initial values to 0. 
+lambda.inits <- matrix(0, N, n.factors)
+# Set diagonal elements to 1
+diag(lambda.inits) <- 1
+# Set lower triangular elements to random values from a standard normal dist
+lambda.inits[lower.tri(lambda.inits)] <- rnorm(sum(lower.tri(lambda.inits)))
+# Create list of initial values. 
+inits <- list(beta.comm = 0,
+              beta = 0,
+              tau.sq.beta = 1,
+              lambda = lambda.inits)
+# Priors ------------------------------
+priors <- list(beta.comm.normal = list(mean = 0, var = 2.72),
+               tau.sq.beta.ig = list(a = 0.1, b = 0.1))
+# Additional arguments ----------------
+n.samples <- 5000
+n.burn <- 3000
+n.thin <- 2
+n.chains <- 3
+n.report <- 1000
+```
+
+With all the arguments set for `lfJSDM()`, we are now ready to run the model. We run the model for 5000 MCMC samples, eliminating the first 3000 as burn-in and thinning by every 2 samples.
+
+```{r}
+# Approx run time: 32 seconds
+out.lfJSDM <- lfJSDM(formula = jsdm.formula, 
+                     data = jsdm.list, 
+                     inits = inits, 
+                     priors = priors, 
+                     n.factors = n.factors, 
+                     n.samples = n.samples, 
+                     n.report = n.report, 
+                     n.burn = n.burn, 
+                     n.thin = n.thin, 
+                     n.chains = n.chains)
+# Quick summary of model results
+summary(out.lfJSDM)
+```
+
+Not surprisingly, `lfJSDM()` is the fastest of the four models we have fit to the Hubbard Brook data. This makes sense since it only accounts for one (residual species correlations) of the three complexities and ignores the other two (imperfect detection and spatial autocorrelation). Note that we also see nice converging and mixing of all model parameters. This is a constant battle we fight with fitting Bayesian models. Generally, the more complex a model is, the longer it takes to run to reach convergence. For more simple models like the latent factor joint species distribution model, models usually run quite fast and we often will see adequate convergence and mixing without having to run too many MCMC samples.
+
+## Model selection using WAIC
+
+We compute the WAIC for latent factor JSDMs using the `waicOcc()` function, and we compare this value to the WAIC for the spatial factor JSDM we fit with `sfJSDM()`. 
+
+```{r}
+waicOcc(out.lfJSDM)
+waicOcc(out.sfJSDM)
+```
+
+The WAIC value for the spatial factor JSDM is smaller than that of the latnet factor JSDM, indicating that accounting for spatial autocorrelation improves model fit. As we noted for `sfJSDM()`, we cannot directly compare the WAIC value from a latent factor JSDM to those obtained from `lfMsPGOcc()` or `sfMsPGOcc()` since they do not use the same data. As with all factor model functions, we can additionaly use k-fold cross-validation to assess out-of-sample predictive performance. See [the main `spOccupancy` vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting) for examples of cross-validation using `spOccupancy` model functions. 
+
+## Prediction
+
+We can use the `predict()` function to generate new predictions of "occurrence" probability values given a set of covariates and spatial locations. Again, note that these predictions are estimates of a confounded probability of occurrence and detection. This code is analogous to what we saw with `lfMsPGOcc()`. 
+
+```{r, eval = FALSE}
+# Not run (note this takes a large amount of memory to run).
+data(hbefElev)
+elev.pred <- (hbefElev$val - mean(hbef2015$occ.covs[, 1])) / sd(hbef2015$occ.covs[, 1])
+# Order: intercept, elevation (linear), elevation (quadratic)
+X.0 <- cbind(1, elev.pred, elev.pred^2) 
+# Spatial coordinates
+coords.0 <- as.matrix(hbefElev[, c('Easting', 'Northing')])
+# Approximate run time: 30 sec
+out.pred <- predict(out.lfJSDM, X.0, coords.0)
+str(out.pred)
+# Species richness samples
+rich.pred <- apply(out.pred$z.0.samples, c(1, 3), sum)
+plot.dat <- data.frame(x = hbefElev$Easting, 
+                       y = hbefElev$Northing, 
+                       rich.mean = apply(rich.pred, 2, mean), 
+                       rich.sd = apply(rich.pred, 2, sd))
+# Plot species richness of the foliage-gleaning bird community
+# across the Hubbard Brook Experimental Forest
+dat.stars <- st_as_stars(plot.dat, dims = c('x', 'y'))
+ggplot() + 
+  geom_stars(data = dat.stars, aes(x = x, y = y, fill = rich.mean)) +
+  scale_fill_viridis_c(na.value = 'transparent') +
+  labs(x = 'Easting', y = 'Northing', fill = '', 
+       title = 'Mean Species Richness') +
+  theme_bw()
+```
+ 
+# References {-}
+
diff --git a/vignettes/mcmcFactorModels.Rmd b/vignettes/mcmcFactorModels.Rmd
new file mode 100644
index 0000000..83aebb8
--- /dev/null
+++ b/vignettes/mcmcFactorModels.Rmd
@@ -0,0 +1,422 @@
+---
+title: "MCMC samplers for joint species distribution models in `spOccupancy`"
+author: "Jeffrey W. Doser, Andrew O. Finley, Sudipto Banerjee"
+date: "`r format(Sys.time(), '%B %d, %Y')`"
+output: 
+  bookdown::pdf_document2:
+    toc: true
+    toc_depth: 3
+    number_sections: true
+pkgdown:
+  as_is: true
+  extension: pdf
+bibliography: [referencesJSDM.bib]
+biblio-style: apalike
+vignette: >
+  %\VignetteIndexEntry{mcmcFactorModels}
+  %\VignetteEngine{knitr::rmarkdown}
+  %\VignetteEncoding{UTF-8}
+---
+
+```{r, include = FALSE}
+options(rmarkdown.html_vignette.check_title = FALSE)
+knitr::opts_chunk$set(
+  comment = "", eval = FALSE
+)
+```
+
+\newcommand{\bm}{\boldsymbol}
+\newcommand{\pg}{\text{P{\'o}lya-Gamma}}
+
+# Introduction
+
+This vignette provides statistical details on the MCMC algorithms used to fit all joint species distribution models in `spOccupancy`. In particular, we discuss the Gibbs samplers for each of the following four models presented in Doser, Finley, Banerjee (2022): 
+
+1. A spatial latent factor multi-species occupancy model using `sfMsPGOcc()` that accommodates residual species correlations, imperfect detection, and spatial autocorrelation. 
+2. A latent factor multi-species occupancy model using `lfMsPGOcc()` that accommodates residual species correlations and imperfect detection. 
+3. A spatial latent factor joint species distribution model using `sfJSDM()` that accommodates residual species correlations and spatial autocorrelation.
+4. A latent factor joint species distribution model using `lfJSDM()` that accommodates residual species correlations. 
+
+# $\pg$ data augmentation details
+
+We use $\pg$ data augmentation following [@polson2013] to yield an efficient Gibbs sampler for all joint species distribution models in `spOccupancy`. Traditionally, the species-specific regression coefficients (and intercepts) for occurrence ($\bm{\beta}_i$) and detection ($\bm{\alpha}_i)$ require a Metropolis update, which can lead to slow convergence and bad mixing of MCMC chains [@clark2019]. Instead, we introduce species-specific $\pg$ latent variables for both the occurrence and detection portions of the spatial factor multi-species occupancy model, which induces efficient Gibbs updates for the species-specific occurrence and detection regression coefficients.  
+
+Let $\omega_{i, \beta}(\bm{s}_j)$ for each species $i$ and location $j$ with coordinates $\bm{s}_j$ follow a $\pg$ distribution with parameters 1 and 0 (i.e., $\omega_{i, \beta}(\bm{s}_j) \sim \text{PG}(1, 0)$). Given this species-specific latent variable, we can re-express the Bernoulli process model (Equation 1 in Doser, Finley, Banerjee (2022)) as 
+
+\begin{equation}
+\begin{split}
+\psi_i(\bm{s}_j)^{z_i(\bm{s}_j)} (1 - \psi_i(\bm{s}_j))^{1 - z_i(\bm{s}_j)} &= \frac{\text{exp}(\bm{x}^\top_j\bm{\beta}_i + \text{w}_i^*(\bm{s}_j))^{z_i(\bm{s}_j)}}{1 + \text{exp}(\bm{x}^\top_j\bm{\beta} + \text{w}_i^*(\bm{s}_j))} \\
+&= \text{exp}(\kappa_{i}(\bm{s}_j)[\bm{x}^\top_j\bm{\beta}_i + \text{w}_i^*(\bm{s}_j)]) \times \\
+& \int \text{exp}(-\frac{\omega_{i, \beta}(\bm{s}_j)}{2}(\bm{x}_j^\top\bm{\beta}_i + \text{w}_i^*(\bm{s}_j))^2)p(\omega_{i, \beta}(\bm{s}_j) \mid 1, 0) d\omega_{i, \beta}(\bm{s}_j), 
+\end{split}
+(\#eq:pgLikelihood)
+\end{equation}
+
+where $\kappa_i(\bm{s}_j) = z_i(\bm{s}_j) - 0.5$ and $p(\omega_{i, \beta}(\bm{s}_j))$ is the probability density function of a \pg distribution with parameters 1 and 0 [@polson2013]. Similarly, we define $\omega_{i, k, \alpha}(\bm{s}_j) \sim \text{PG}(1, 0)$ as a latent variable for each site $j$, each species $i$, and each replicate $k$ in the detection portion of the occupancy model, which results in an analogous re-expression of the Bernoulli likelihood for $y_{i, k}(\bm{s}_j)$ as we showed in Equation \@ref(eq:pgLikelihood) for $z_i(\bm{s}_j)$. These re-expressions of the Bernoulli processes result in Gibbs updates for both the occurrence ($\bm{\beta}_i$) and detection ($\bm{\alpha}_i)$ regression coefficients when they are assigned normal priors [@polson2013; clark2019].
+
+# Spatial factor multi-species occupancy model
+
+## Model description
+
+Let $\bm{s}_j$ denote the spatial coordinates of site $j$, for all $j = 1, \dots, J$ sites. Define $z_i(\bm{s}_j)$ as the true latent presence (1) or absence (0) of species $i$ at site $j$ for $i = 1, \dots, N$ species. We assume $z_i(\bm{s}_j)$ arises from a Bernoulli process following
+
+\begin{equation}
+  z_i(\bm{s}_j) \sim \text{Bernoulli}(\psi_i(\bm{s}_j)),
+\end{equation}
+
+where $\psi_i(\bm{s}_j)$ is the probability of occurrence for species $i$ at site $j$. We model $\psi_i(\bm{s}_j)$ according to
+
+\begin{equation}
+  \text{logit}(\psi_i(\bm{s}_j)) = \bm{x}(\bm{s}_j)^\top\bm{\beta}_i + \text{w}^*_i(\bm{s}_j)
+(\#eq:psi)
+\end{equation}
+
+where $\bm{x}_j$ is a $p_{\psi} \times 1$ vector of an intercept and environmental covariates at site $j$, $\bm{\beta}_{i}$ is a $p_{\psi} \times 1$ species-specific coefficient vector (including an intercept parameter), and $\text{w}^*_{i}(\bm{s}_j)$ is a species-specific latent spatial process. We seek to jointly model the species-specific spatial processes to account for residual correlations between species. We use a spatial factor model [@hogan2004bayesian], a dimension reduction approach that can account for correlations among a large number of species. Specifically, we decompose $\text{w}^*_i(\bm{s}_j)$ into a linear combination of $q$ latent variables (i.e., factors) and their associated species-specific coefficients (i.e., factor loadings). In particular, we have
+
+\begin{equation}
+  \text{w}^*_i(\bm{s}_j) = \bm{\lambda}_i^\top\textbf{w}(\bm{s}_j),
+(\#eq:wStar)
+\end{equation}
+
+where $\bm{\lambda}_i$ is the $i$th row of factor loadings from an $N \times q$ matrix $\bm{\Lambda}$, and $\textbf{w}(\bm{s}_j)$ is a $q \times 1$ vector of independent spatial factors at site $j$. We achieve computational improvements and dimension reduction by setting $q << N$. We account for residual species correlations via their individual responses (i.e., loadings) to the $q$ latent spatial factors.
+
+Following @taylor2019spatial and [@tikhonov2020computationally], we model each $r = 1, \dots, q$ independent spatial process $\text{w}_r(\bm{s}_j)$ using an NNGP [@datta2016hierarchical] to achieve computational efficiency when modeling over a large number of spatial locations. More specifically, we have
+
+\begin{equation}
+    \text{w}_r(\bm{s}_j) \sim N(\bm{0}, \tilde{\bm{C}}_r(\bm{\theta}_r)),
+(\#eq:spatialProcess)
+\end{equation}
+
+where $\tilde{\bm{C}}_r(\bm{\theta}_r)$ is the NNGP-derived covariance matrix for the $r^{\text{th}}$ spatial process. The vector $\bm{\theta}_r$ consists of parameters governing the spatial process according to a spatial correlation function [@banerjee2014hierarchical]. For many correlation functions (e.g., exponential, spherical, Gaussian), $\bm{\theta}_r$ includes a spatial variance parameter, $\sigma^2_r$, and a spatial range parameter, $\phi_r$, while the Mat\'ern correlation function includes an additional spatial smoothness parameter, $\nu_r$.
+
+We assume all species-specific parameters ($\beta_{i, t}$ for all $t = 1, \dots, p_{\psi}$) arise from community-level distributions [@dorazio2005; gelfand2005modelling]. Specifically, we assign a normal prior with mean and variance hyperparameters that represent the community-level average and variance among species-specific effects across the community, respectively. For example, we model the non-spatial component of the species-specific occurrence intercept, $\beta_{i, 1}$, following
+
+\begin{equation} 
+  \beta_{i, 1} \sim N(\mu_{\beta_1}, \tau^2_{\beta_1}),
+(\#eq:commEffects)
+\end{equation}
+
+where $\mu_{\beta_1}$ is the average intercept across the community, and $\tau^2_{\beta_1}$ is the variability in the species-specific intercepts across the community.
+
+To estimate $\psi_i(\bm{s}_j)$ and $z_i(\bm{s}_j)$ while explicitly accounting for imperfect detection, we obtain $k = 1, \dots, K_j$ sampling replicates at each site $j$. Let $y_{i, k}(\bm{s}_j)$ denote the detection (1) or nondetection (0) of species $i$ during replicate $k$ at site $j$. We model the observed data $y_{i, k}(\bm{s}_j)$ conditional on the true species-specific occurrence $z_i(\bm{s}_j)$ at site $j$ following
+
+\begin{equation}
+\begin{split}
+&y_{i, j, k} \sim \text{Bernoulli}(\pi_{i, j, k}z_{i, j}), \\
+&\text{logit}(\pi_{i, j, k}) = \bm{v}^{\top}_{i, j, k}\bm{\alpha}_i,
+\end{split}
+\end{equation}
+
+where $\pi_{i, j, k}$ is the probability of detecting species $i$ at site $j$ during replicate $k$ (given it is present at site $j$), which is a function of site and replicate-specific covariates $\bm{V}$ and a vector of species-specific regression coefficients ($\bm{\alpha}_i$). Similarly to the occurrence regression coefficients, the species-specific detection coefficients are envisioned as random effects arising from a common community-level distribution:
+
+\begin{equation}
+\bm{\alpha}_i \sim \text{Normal}(\bm{\mu_{\alpha}}, \bm{T}_{\alpha}),
+\end{equation}
+
+where $\bm{\mu_{\alpha}}$ is a vector of community-level mean effects for each detection covariate effect (including the intercept) and $\bm{T}_{\alpha}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\alpha}$ that represent the variability of each detection covariate effect among species in the community.
+
+We assume normal priors for community-level mean parameters and inverse-Gamma priors for community-level variance parameters. Identifiability of the latent spatial factors requires additional constraints [@hogan2004bayesian]. Following [@taylor2019spatial], we set all elements in the upper triangle of the factor loadings matrix $\bm{\Lambda}$ equal to 0 and its diagonal elements equal to 1. We additionally fix the spatial variance parameters $\sigma^2_{r}$ of each latent spatial processes to 1. We assign standard normal priors for all lower triangular elements in $\bm{\Lambda}$ and assign each spatial range parameter $\phi_{r}$ an independent uniform prior.
+
+## Gibbs sampler {#sfMsPGOccSampler}
+
+Here we describe the Gibbs sampler for fitting the spatial factor multi-species occupancy model using `sfMsPGOcc()`.
+
+### Update community-level occurrence coefficients ($\bm{\mu_{\beta}}$)
+
+We first sample all community-level parameters followed by species level parameters. First we sample the community-level occurrence coefficients. Let $\bm{\mu}_{\beta}$ denote the vector of all community-level occurrence means, and similarly let $\bm{T}_{\beta}$ denote the variance matrix of all community-level occurrence variance parameters. Note that $\bm{T}_{\beta}$ is a diagonal matrix. Let $\bm{\mu}_{\beta} \sim N(\bm{\mu}_{0, \beta}, \bm{\Sigma}_{\beta})$ denote our prior distribution, where $\bm{\Sigma}_{\beta}$ is a diagonal matrix. Note this is equivalent to assigning an independent normal prior for each coefficient. Our full conditional for the community-level regression coefficients $\bm{\mu_{\beta}}$ is then
+
+\begin{equation}
+\bm{\mu_{\beta}} \mid \cdot \sim N([\bm{\Sigma}_{\beta}^{-1} + N\bm{T}_{\beta}^{-1}]^{-1}\Big[\sum_{i = 1}^N(\bm{T}_{\beta}\bm{\beta}_i) + \bm{\Sigma}_{\beta}^{-1}\bm{\mu}_{0, \beta}\Big], [\bm{\Sigma}_{\beta}^{-1} + N\bm{T}_{\beta}^{-1}]^{-1}).
+(\#eq:muBeta)
+\end{equation}
+
+
+### Update community-level detection coefficients ($\bm{\mu_{\alpha}}$)
+
+Next, we sample the community-level detection coefficients. Let $\bm{\mu}_{\alpha}$ denote the vector of all community-level detection means, and similarly let $\bm{T}_{\alpha}$ denote the diagonal variance matrix of all community-level detection variance parameters. Let $\bm{\mu}_{\alpha} \sim N(\bm{\mu}_{0, \alpha}, \bm{\Sigma}_{\alpha})$ denote the prior distribution, where $\bm{\Sigma}_{\alpha}$ is a diagonal matrix. Our full conditional then takes the form
+
+\begin{equation}
+\bm{\mu_{\alpha}} \mid \cdot \sim N([\bm{\Sigma}_{\alpha}^{-1} + N\bm{T}_{\alpha}^{-1}]^{-1}\Big[\sum_{i = 1}^N(\bm{T}_{\alpha}\bm{\alpha}_i) + \bm{\Sigma}_{\alpha}^{-1}\bm{\mu}_{0, \alpha}\Big], [\bm{\Sigma}_{\alpha}^{-1} + N\bm{T}_{\alpha}^{-1}]^{-1}).
+(\#eq:muAlpha)
+\end{equation}
+
+### Update community-level occurrence variances ($\bm{\tau^2_{\beta}}$)
+
+Let $\tau^2_{t, \beta}$ denote the community-level variance for the $t^{\text{th}}$ occurrence parameter ($t = 1, \dots, p_{\psi}$). We assign an inverse gamma normal prior to $\tau^2_{t, \beta}$ with shape parameter $a_{\tau_{t, \beta}}$ and scale parameter $b_{\tau_{t, \beta}}$. Our full conditional is then
+
+\begin{equation}
+\tau^2_{t, \beta} \mid \cdot \sim \text{IG}(a_{\tau_{t, \beta}} + \frac{N}{2}, b_{\tau_{t, \beta}} + \frac{\sum_{i = 1}^N(\beta_{i, t} - \mu_{\beta_t})^2}{2}).
+(\#eq:tauBeta)
+\end{equation}
+
+
+### Update community-level detection variances ($\bm{\tau^2_{\alpha}}$)
+
+Let $\tau^2_{t, \alpha}$ denote the community-level variance for the $t^{\text{th}}$ detection parameter ($t = 1, \dots, p_{\pi}$). We assign an inverse gamma normal prior to $\tau^2_{t, \alpha}$ with shape parameter $a_{\tau_{t, \alpha}}$ and scale parameter $b_{\tau_{t, \alpha}}$. Our full conditional is then
+
+\begin{equation}
+\tau^2_{t, \alpha} \mid \cdot \sim \text{IG}(a_{\tau_{t, \alpha}} + \frac{N}{2}, b_{\tau_{t, \alpha}} + \frac{\sum_{i = 1}^N(\alpha_{i, t} - \mu_{\alpha_t})^2}{2}).
+(\#eq:tauAlpha)
+\end{equation}
+
+### Update species-specific occurrence auxiliary variables ($\omega_{i, \beta}(\bm{s}_j)$)
+
+We next sample the occurrence auxiliary variable ($\omega_{i, \beta}(\bm{s}_j)$ individually for each species $i$ and site $j$. Our full conditional is
+
+\begin{equation}
+\omega_{i, \beta}(\bm{s}_j) \mid \cdot \sim \text{PG}(1, \bm{x}(\bm{s}_j)^{\top}\bm{\beta}_i + \text{w}^*_{i}(\bm{s}_j)).
+(\#eq:omegaBeta)
+\end{equation}
+
+### Update detection auxiliary variables ($\omega_{i, k, \alpha}(\bm{s}_j)$)
+
+We next update the latent $\pg$ auxiliary variable for the detection process, $\omega_{i, k, \alpha}(\bm{s}_j)$, for each replicate $k$ at each site $j$ for each species $i$. Note that we only need to sample $\omega_{i, k, \alpha}(\bm{s}_j)$ when $\sum_{k = 1}^{K_j}y_{i, j, k} = 0$. Following [@polson2013], we have
+
+\begin{equation}
+  \omega_{i, k, \alpha}(\bm{s}_j) \mid \cdot \sim \text{PG}(1, \bm{v}(\bm{s}_j)^\top\bm{\alpha}_i).
+\end{equation}
+
+### Update species-level occurrence regression coefficients ($\bm{\beta}_i$)
+
+We update the species-level occurrence regression coefficients ($\bm{\beta}_i$), including the intercept, from the following multivariate normal full conditional
+
+\begin{equation}
+\bm{\beta}_i \mid \cdot \sim \text{Normal}\Big([\bm{T}_{\beta}^{-1} + \bm{X}^{\top}\bm{S}_{\beta}\bm{X}]^{-1}[\bm{X}^{\top}(\bm{z}_i - 0.5 \bm{1}_J - \bm{S}_{\beta}\text{\textbf{w}}^*_i(\bm{s}_j)) + \bm{T}_{\beta}^{-1}\bm{\mu_{\beta}}], [\bm{T}_{\beta}^{-1} + \bm{X}^{\top}\bm{S}_{\beta}\bm{X}]^{-1}\Big),
+(\#eq:beta)
+\end{equation}
+
+where $\bm{S}_{\beta}$ is a diagonal $J \times J$ matrix with diagonal entries equal to the latent $\pg$ variable values for species $i$, $\bm{z}_i$ is the $J \times 1$ vector of latent occurrence values for species $i$, and $\bm{1}_J$ is a $J \times 1$ vector of 1s.
+
+
+### Update species-level detection regression coefficients ($\bm{\alpha}_i$)
+
+Next, we sample the species-specific detection regression coefficients for species $i$ ($\bm{\alpha}_i$) from
+
+\begin{equation}
+\bm{\alpha}_i \mid \cdot \sim \text{Normal}\Big([\bm{T}_{\alpha}^{-1} + \bm{\tilde{V}}^{\top}\bm{S}_{\alpha}\bm{\tilde{V}}]^{-1}[\bm{\tilde{V}}^{\top}(\bm{\tilde{y}}_i - 0.5 \bm{1}_{J_i^*}) + \bm{T}_{\alpha}^{-1}\bm{\mu_{\alpha}}], [\bm{T}_{\alpha}^{-1} + \bm{\tilde{V}}^{\top}\bm{S}_{\alpha}\bm{\tilde{V}}]^{-1}\Big).
+(\#eq:alpha)
+\end{equation}
+
+The species-level detection regression coefficients $\bm{\alpha}_i$ are only informed by the locations where $z_{i}(\bm{s}_j) = 1$, since we assume no false positive detections. We define $J_i^*$ as the total number of sites at the current iteration of the MCMC with $z_{i}(\bm{s}_j) = 1$. $\bm{S}_{\alpha}$ is a diagonal matrix with diagonal entries equal to the latent $\pg$ variable values at the site/replicate combinations that correspond to $z_i(\bm{s}_j) = 1$. The matrix $\bm{\tilde{V}}$ is the matrix of detection covariates associated with the sites where $z_{i}(\bm{s}_j) = 1$. Similarly, $\bm{\tilde{y}}_i$ is a vector of stacked detection-nondetection data values at the entries associated with $z_{i}(\bm{s}_j) = 1$.
+
+### Update latent spatial factors ($\textbf{w}(\bm{s}_j)$)
+
+Let $N(\bm{s}_j)$ denote the set of $m$ nearest neighbors of $\bm{s}_j$ among $\bm{s}_1, \bm{s}_2, \dots, \bm{s}_{j - 1}$. Let $\textbf{w}_r(N(\bm{s}_j))$ denote the $m$ realizations of the $r^{\text{th}}$ NNGP at the locations in $N(\bm{s}_j)$. Let $C(\cdot, \phi_r)$ denote the correlation function of the original Gaussian Process (GP) from which the $r^{\text{th}}$ NNGP is derived. For any two sets $A_1$ and $A_2$, define $\text{C}_{A_1, A_2}(\phi_r)$ as the correlation matrix between the observations in $A_1$ and $A_2$ for the $r^{\text{th}}$ GP. For $j \geq 1$, we have
+
+\begin{equation}
+  \textbf{b}_r(\bm{s}_j) = \textbf{C}_{\bm{s}_j, N(\bm{s}_j)}(\phi_r)\textbf{C}^{-1}_{N(\bm{s}_j), N(\bm{s}_j)}(\phi_r),
+(\#eq:bNNGP)
+\end{equation}
+
+where $\textbf{b}_r(\bm{s}_1) = \bm{0}$ for all $r = 1, \dots, q$. Further, we have
+
+\begin{equation}
+  f_r(\bm{s}_j) = \textbf{C}_{\bm{s}_j, \bm{s}_j}(\phi_r) - \textbf{C}_{\bm{s}_j, N(\bm{s}_j)}(\phi_r)\textbf{C}^{-1}_{N(\bm{s}_j), N(\bm{s}_j)}(\phi_r)\textbf{C}_{N(\bm{s}_j), \bm{s}_j}(\phi_r),
+(\#eq:fNNGP)
+\end{equation}
+
+where $f_r(\bm{s}_1) = 0$ for all $r = 1, \dots, q$. For any two locations $\bm{s}_1$ and $\bm{s}_2$, if $\bm{s}_1 \in N(\bm{s}_2)$ and is the $l^{\text{th}}$ member of $N(\bm{s}_2)$, then define $b_r(\bm{s}_2, \bm{s}_1)$ as the $l^{\text{th}}$ entry of $\textbf{b}_r(\bm{s}_2)$. Let $U(\bm{s}_1) = \{\bm{s}_2 \in S \mid \bm{s}_1 \in N(\bm{s}_2)\}$ be the collection of locations $\bm{s}_2$ for which $\bm{s}_1$ is a neighbor, where $S$ is the set of all $J$ spatial locations. For every $\bm{s}_2 \in U(\bm{s}_1)$, define $a_r(\bm{s}_2, \bm{s}_1) = \text{w}_r(\bm{s}_2) - \sum_{\bm{s} \in N(\bm{s}_2), \bm{s} \neq \bm{s}_2} \text{w}_r(\bm{s})b_r(\bm{s}_2, \bm{s})$. Extending this to matrix notation, let $\bm{B}(\bm{s}_j)$ be a $q \times mq$ block matrix, with each $q \times q$ diagonal block containing the elements of $\bm{b}_r(\bm{s}_j)$ for each of the $r = 1, \dots q$ spatial factors for each of the specific $m$ neighbors. Let $\bm{F}(\bm{s}_j)$ be a $q \times q$ diagonal matrix with diagonal elements of $f_r(\bm{s}_j)$. Let $\bm{a}(\bm{s}, \bm{s}_j)$ contain the values $a_r(\bm{s}, \bm{s}_j)$ for each of the $r = 1, \dots, q$ latent factors. Using this notation, the full conditional for $\textbf{w}(\bm{s}_j)$ is
+
+\begin{equation}
+  \begin{array}{c}
+    \textbf{w}(\bm{s}_j) \mid \cdot N_q(\bm{\mu}_j\bm{\Sigma}_j, \bm{\Sigma}_j) \mbox{ where, }\\
+    \bm{\mu}_j = \bm{F}(\bm{s}_j)^{-1}\bm{B}(\bm{s}_j)\textbf{w}(N(\bm{s}_j)) + \sum_{\bm{s} \in U(\bm{s}_j)}\bm{B}(\bm{s}, \bm{s}_j)^\top\bm{F}(\bm{s}_j)^{-1}\bm{a}(\bm{s}, \bm{s}_j) + \\
+    \bm{\Lambda}^\top\bm{S}_{j, \beta}((\bm{z}(\bm{s}_j) - 0.5\bm{1}_N)\bm{S}_{j, \beta}^{-1} - \bm{X}(\bm{s}_j)^\top\bm{\beta}) \mbox{ and } \\
+    \bm{\Sigma}_j = \bm{F}(\bm{s}_j)^{-1} + \sum_{\bm{s} \in U(\bm{s}_j)} \bm{B}(\bm{s}, \bm{s}_j)^\top\bm{F}(\bm{s}_j)^{-1}\bm{B}(\bm{s}, \bm{s}_j) + \bm{\Lambda}^\top\bm{S}_{j, \beta}\bm{\Lambda},
+  \end{array}
+(\#eq:w)
+\end{equation}
+
+where $\textbf{w}(N(\bm{s}_j))$ is a stacked $mq \times 1$ vector of the $m$ realizations of each of the $r$ NNGPs at the locations in $N(\bm{s}_j)$, $\bm{S}_{j, \beta}$ is an $N \times N$ diagonal matrix with the $\pg$ auxiliary variables for each species $i$ at site $j$ along the diagonal elements, $\bm{X}(\bm{s}_j)^\top$ is a $N \times (Np_{\psi})$ block-diagonal matrix with the $i$th diagonal block the length $\bm{x}(\bm{s}_j)$ vector of $p_{\psi}$ spatially-varying covariates, and $\bm{\beta}$ is the $(Np_{\psi}) \times 1$ stacked vector of species-specific regression coefficients (including the intercept).
+
+### Update latent spatial factor loadings ($\bm{\Lambda}$)
+
+Recall we set all diagonal elements of $\bm{\Lambda}$ to 1 and all upper triangular elements equal to 0 in order to ensure identifiability of the latent spatial factors. Given this requirement, let $q_i = \text{min}\{i - 1, q\}$ for $2 \leq i \leq N$, and let $\tilde{\bm{\lambda}}_i = (\lambda_{i, 1}, \dots, \lambda_{i, q_i})^\top$ be the vector representing the unrestricted elements in the $i^{\text{th}}$ row of $\bm{\Lambda}$. Define $\textbf{W}$ as the $J \times q$ matrix of latent spatial factors, and let $\textbf{W}_{1:i}$ be the first $i$ columns of \textbf{W}. Using this notation, the full conditional density for $\tilde{\bm{\lambda}}_i$ is $N_q(\bm{\Omega}_{\tilde{\bm{\lambda}}_i}\bm{\mu}_{\tilde{\bm{\lambda}}_i}, \bm{\Omega}_{\tilde{\bm{\lambda}}_i})$, where
+
+\begin{align}
+ \bm{\mu}_{\tilde{\bm{\lambda}}_i} &= \left\{ \begin{matrix}  \textbf{W}_{1:i}^\top\bm{S}_{i, \beta}((\bm{z}_i - 0.5\bm{1}_J)\bm{S}_{i, \beta}^{-1}) - \bm{X}_i^\top\bm{\beta}_i - \dot{\textbf{w}}_i)\hfill &\text{ if }&2\leq i\leq q  \\
+ \textbf{W}^\top\bm{S}_{i, \beta}((\bm{z}_i - 0.5\bm{1}_J)\bm{S}_{i, \beta}^{-1}) - \bm{X}_i^\top\bm{\beta}_i)\hfill &\text{ if }&i > q  \end{matrix}\right.,\text{ and} \\
+ \bm{\Omega}_{\tilde{\bm{\lambda}}_i} &= \left\{ \begin{matrix} (\textbf{W}_{1:(i - 1)}^\top\bm{S}_{i, \beta}\textbf{W}_{1:(i - 1)} + I_{j - 1})^{-1} \hfill &\text{ if }&2\leq i \leq q \\
+ (\textbf{W}^\top\bm{S}_{i, \beta}\textbf{W} + I_{q})^{-1} \hfill &\text{ if }&i > q \end{matrix}\right.,
+\end{align}
+
+where $\bm{S}_{i, \beta}$ is a $J \times J$ matrix with diagonal elements consisting of the latent $\pg$ auxiliary variables for species $i$, $\dot{\textbf{w}}_i$ is the $i^{\text{th}}$ column of $\textbf{W}$, and $\bm{X}_i^{\top}$ is an $N \times p_{\psi}$ matrix of spatially-varying covariates for species $i$ (which we assume are equivalent for all $i$ species).
+
+### Update spatial range parameters ($\bm{\phi}$)
+
+We use a Metropolis within Gibbs step to sample $\bm{\phi}$. The full conditional posterior density for $\phi_r$ for each $r = 1, \dots, q$ is proportional to
+\begin{equation}
+\begin{array}{c}
+p(\phi_r \mid \cdot ) \propto p_r(\phi_r)p(\textbf{w}_r \mid \phi_r) \\
+\propto p(\phi_r) \times \prod_{j = 1}^J N\left(\text{w}_r(\bm{s}_j) \mid \textbf{b}_r(\bm{s}_j)^\top\textbf{w}_r (N(\bm{s}_j)), f_r (\bm{s}_j). \right)
+\end{array}
+(\#eq:fullTheta)
+\end{equation}
+
+We sample $\phi_r$ using a random walk Metropolis step. We use a normal proposal distribution along with a Jacobian transformation.
+
+### Update latent occurrence values ($z_{i}(\bm{s}_j)$)
+
+Finally, we sample the latent occurrence states for each species. We set $z_{i}(\bm{s}_j) = 1$ for all sites where there is at least one detection of species $i$, and so we only need to sample $z_{i}(\bm{s}_j)$ at sites where there are no detections. Thus, for all locations with no detections of the species $i$, we sample $z_{i}(\bm{s}_j)$ according to
+
+\begin{equation}
+z_{i}(\bm{s}_j) \mid \cdot \sim \text{Bernoulli}\Bigg(\frac{\psi_{i}(\bm{s}_j) \prod_{k = 1}^{K_j}(1 - \pi_{i, k}(\bm{s}_j))}{1 - \psi_{i}(\bm{s}_j) + \psi_{i}(\bm{s}_j) \prod_{k = 1}^{K_j}(1 - \pi_{i, k}(\bm{s}_j))}\Bigg).
+(\#eq:z)
+\end{equation}
+
+# Latent factor multi-species occupancy model
+
+The `spOccupancy` function `lfMsPGOcc()` fits a latent factor multi-species occupancy model. The latent factor multi-species occupancy model is identical to the spatial factor multi-species occupancy model, except we do not assume any spatial structure for the latent factors. Instead, we assign each of the $r = 1, \dots, q$ latent factors a standard normal prior. This model is analogous to the model of [@tobler2019joint], except we use a logistic link function and $\pg$ latent variables rather than a probit link function, as well as different restrains on the factor loadings matrix.
+
+## Model description
+
+Let $z_{i, j}$ be the true presence (1) or absence of some species $i$ at site $j$ for a total of $i = 1, \dots, N$ species and $j = 1, \dots, J$ sites. We assume $z_{i, j}$ arises from a Bernoulli process following
+
+\begin{equation}
+\begin{split}
+&z_{i, j} \sim \text{Bernoulli}(\psi_{i, j}), \\
+&\text{logit}(\psi_{i, j}) = \bm{x}^{\top}_{j} \bm{\beta}_i + \text{w}^*_{i, j},
+\end{split}
+(\#eq:zlfMsPGOcc)
+\end{equation}
+
+where $\psi_{i, j}$ is the probability of occurrence of species $i$ at site $j$, which is a function of site-specific covariates $\bm{x}_j$, a vector of species-specific regression coefficients ($\bm{\beta}_i$) for those covariates, and a latent process $\text{w}^*_{i, j}$. We incorporate residual species correlations through the formulation of the latent process $\text{w}^*_{i, j}$. We use a factor modeling approach, which is a dimension reduction approach that can account for correlations among a large number of species. Specifically, we decompose $\text{w}^*_{i, j}$ into a linear combination of $q$ latent variables (i.e., factors) and their associated species-specific coefficients (i.e., factor loadings). Thus, we have
+
+\begin{equation}
+  \text{w}^*_{i, j}) = \bm{\lambda}_i^\top\textbf{w}_j,
+\end{equation}
+
+where $\bm{\lambda}_i$ is the $i$th row of factor loadings from an $N \times q$ matrix $\bm{\Lambda}$, and $\textbf{w}_j$ is a $q \times 1$ vector of independent latent factors at site $j$. We achieve computational improvements by setting $q << N$.  We account for residual species correlations via their individual responses (i.e., loadings) to the $q$ latent spatial factors. We can envision the latent variables $\textbf{w}_j$ as unmeasured site-specific covariates that are treated as random variables in the model estimation procedure. For the non-spatial latent factor model, we assign a standard normal prior distribution to the latent factors (i.e., we assume each latent factor is independent and arises from a normal distribution with mean 0 and standard deviation 1).
+
+We envision the species-specific regression coefficients ($\bm{\beta}_i$) as random effects arising from a common community-level distribution:
+
+\begin{equation}
+\bm{\beta}_i \sim \text{Normal}(\bm{\mu_{\beta}}, \bm{T}_{\beta}),
+\end{equation}
+
+where $\bm{\mu_{\beta}}$ is a vector of community-level mean effects for each occurrence covariate effect (including the intercept) and $\bm{T}_{\beta}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\beta}$ that represent the variability of each occurrence covariate effect among species in the community.
+
+We do not directly observe $z_{i, j}$, but rather we observe an imperfect representation of the latent occurrence process. Let $y_{i, j, k}$ be the observed detection (1) or nondetection (0) of a species $i$ of interest at site $j$ during replicate $k$ for each of $k = 1, \dots, K_j$ replicates at each site $j$. We envision the detection-nondetection data as arising from a Bernoulli process conditional on the true latent occurrence process:
+
+\begin{equation}
+\begin{split}
+&y_{i, j, k} \sim \text{Bernoulli}(p_{i, j, k}z_{i, j}), \\
+&\text{logit}(p_{i, j, k}) = \bm{v}^{\top}_{i, j, k}\bm{\alpha}_i,
+\end{split}
+\end{equation}
+
+where $p_{i, j, k}$ is the probability of detecting species $i$ at site $j$ during replicate $k$ (given it is present at site $j$), which is a function of site and replicate-specific covariates $\bm{V}$ and a vector of species-specific regression coefficients ($\bm{\alpha}_i$). Similarly to the occurrence regression coefficients, the species-specific detection coefficients are envisioned as random effects arising from a common community-level distribution:
+
+\begin{equation}
+\bm{\alpha}_i \sim \text{Normal}(\bm{\mu_{\alpha}}, \bm{T}_{\alpha}),
+\end{equation}
+
+where $\bm{\mu_{\alpha}}$ is a vector of community-level mean effects for each detection covariate effect (including the intercept) and $\bm{T}_{\alpha}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\alpha}$ that represent the variability of each detection covariate effect among species in the community.
+
+We assign multivariate normal priors for the community-level occurrence ($\bm{\mu_{\beta}}$) and detection ($\bm{\mu_{\alpha}}$) means, and assign independent inverse-Gamma priors on the community-level occurrence ($\tau^2_{\beta}$) and detection ($\tau^2_{\alpha}$) variance parameters. To ensure identifiability of the latent factors, we set all elements in the upper triangle of the factor loadings matrix $\bm{\Lambda}$ equal to 0 and its diagonal elements equal to 1. Analogous to the spatial factor multi-species occupancy model, we introduce $\pg$ auxiliary variables for both the occurrence and detection components of the model to induce a Gibbs update for the species-specific occurrence and detection random effects.
+
+## Gibbs sampler {#lfMsPGOccSampler}
+
+The Gibbs sampler for the latent factor multi-species occupancy model is identical to the sampler for the spatial factor multi-species occupancy model, with two exceptions: the spatial range parameters are no longer in the model, and the update for the latent factors is different. See Section \@ref(sfMsPGOccSampler) for the Gibbs updates for all parameters besides the latent factors.
+
+### Update latent factors ($\textbf{w}_j$)
+
+Let $\textbf{w}_j$ denote the $q$ latent factors at site $j$. Our full conditional is
+
+\begin{equation}
+  \begin{array}{c}
+    \textbf{w}_j \mid \cdot N_q(\bm{\mu}_j\bm{\Sigma}_j, \bm{\Sigma}_j) \mbox{ where, }\\
+    \bm{\mu}_j = \bm{\Lambda}^\top\bm{S}_{j, \beta}((\bm{z}_j - 0.5\bm{1}_N)\bm{S}_{j, \beta}^{-1} - \bm{X}_j^\top\bm{\beta}) \mbox{ and } \\
+    \bm{\Sigma}_j = I_q + \bm{\Lambda}^\top\bm{S}_{j, \beta}\bm{\Lambda},
+  \end{array}
+(\#eq:wlfMsPGocc)
+\end{equation}
+
+where $\bm{S}_{j, \beta}$ is an $N \times N$ diagonal matrix with the $\pg$ auxiliary variables for each species $i$ at site $j$ along the diagonal elements, $\bm{X}_j^\top$ is a $N \times (Np_{\psi})$ block-diagonal matrix with the $i$th diagonal block the length $\bm{x}_j$ vector of $p_{\psi}$ spatially-varying covariates, $\bm{\beta}$ is the $(Np_{\psi}) \times 1$ stacked vector of species-specific regression coefficients (including the intercept), and $I_q$ is the $q \times q$ identity matrix.
+
+# Spatial factor joint species distribution model
+
+The `spOccupancy` function `sfJSDM()` fits a spatial factor joint species distribution model. The spatial factor JSDM (`sfJSDM()`) is a joint species distribution model that ignores imperfect detection but accounts for species residual correlations and spatial autocorrelation. As in the spatial factor multi-species occupancy model, we account for species correlations using a spatial factor model, where the spatial factors arise from $q$ independent NNGPs. This is analogous to the NNGP model presented by @tikhonov2020computationally, and is similar to other spatially-explicit JSDMs [@thorson2015spatial; @ovaskainen2016uncovering]. Because this model does not account for imperfect detection, we eliminate the detection sub-model and rather directly model a simplified version of the replicated detection-nondetection data, denoted as $y^*_i(\bm{s}_j)$, where $y^*_i(\bm{s}_j) = I(\sum_{k = 1}^{K_j}y_{i, k}(\bm{s}_j) > 0)$, with $I(\cdot)$ an indicator function denoting whether or not species $i$ was detected during at least one of the $K_j$ replicates at site $j$. Note that in the following description, we will describe the covariate effects as effecting the probability of occurrence. However, since we do not explicitly account for imperfect detection, the estimated probability is really a confounded process of occurrence and detection, and thus all covariate effects should be interpreted as combined effects on occurrence and detection.
+
+## Model description
+
+Let $\bm{s}_j$ denote the spatial coordinates of site $j$, for all $j = 1, \dots, J$ sites. Define $y^*_i(\bm{s}_j)$ as the detection (1) or nondetection (0) of species $i$ at site $j$. We assume $y^*_i(\bm{s}_j)$ arises from a Bernoulli process following
+
+\begin{equation}
+  y^*_i(\bm{s}_j) \sim \text{Bernoulli}(\psi_i(\bm{s}_j)),
+\end{equation}
+
+where $\psi_i(\bm{s}_j)$ is the probability of occurrence for species $i$ at site $j$. We model $\psi_i(\bm{s}_j)$ according to
+
+\begin{equation}
+  \text{logit}(\psi_i(\bm{s}_j)) = \bm{x}(\bm{s}_j)^\top\bm{\beta}_i + \text{w}^*_i(\bm{s}_j)
+\end{equation}
+
+where $\bm{x}_j$ is a $p_{\psi} \times 1$ vector of an intercept and environmental covariates at site $j$, $\bm{\beta}_{i}$ is a $p_{\psi} \times 1$ species-specific coefficient vector (including an intercept parameter), and $\text{w}^*_{i}(\bm{s}_j)$ is a species-specific latent spatial process. Analogous to the spatial factor multi-species occupancy model, we model $\text{w}^*_i(\bm{s}_j)$ using a spatial facotr modeling approach, where we have
+
+\begin{equation}
+  \text{w}^*_i(\bm{s}_j) = \bm{\lambda}_i^\top\textbf{w}(\bm{s}_j),
+\end{equation}
+
+where $\bm{\lambda}_i$ is the $i$th row of factor loadings from an $N \times q$ matrix $\bm{\Lambda}$, and $\textbf{w}(\bm{s}_j)$ is a $q \times 1$ vector of independent spatial factors at site $j$. We achieve computational improvements and dimension reduction by setting $q << N$. We account for residual species correlations via their individual responses (i.e., loadings) to the $q$ latent spatial factors.
+
+We model each $r = 1, \dots, q$ independent spatial process $\text{w}_r(\bm{s}_j)$ using an NNGP [@datta2016hierarchical] to achieve computational efficiency when modeling over a large number of spatial locations. More specifically, we have
+
+\begin{equation}
+    \text{w}_r(\bm{s}_j) \sim N(\bm{0}, \tilde{\bm{C}}_r(\bm{\theta}_r)),
+\end{equation}
+
+where $\tilde{\bm{C}}_r(\bm{\theta}_r)$ is the NNGP-derived covariance matrix for the $r^{\text{th}}$ spatial process. The vector $\bm{\theta}_r$ consists of parameters governing the spatial process according to a spatial correlation function [@banerjee2014hierarchical]. For many correlation functions (e.g., exponential, spherical, Gaussian), $\bm{\theta}_r$ includes a spatial variance parameter, $\sigma^2_r$, and a spatial range parameter, $\phi_r$, while the Mat\'ern correlation function includes an additional spatial smoothness parameter, $\nu_r$.
+
+We assume all species-specific parameters ($\beta_{i, t}$ for all $t = 1, \dots, p_{\psi}$) arise from community-level distributions [@dorazio2005; @gelfand2005modelling]. Specifically, we assign a normal prior with mean and variance hyperparameters that represent the community-level average and variance among species-specific effects across the community, respectively. For example, we model the non-spatial component of the species-specific occurrence intercept, $\beta_{i, 1}$, following
+
+\begin{equation}
+  \beta_{i, 1} \sim N(\mu_{\beta_1}, \tau^2_{\beta_1}),
+\end{equation}
+
+where $\mu_{\beta_1}$ is the average intercept across the community, and $\tau^2_{\beta_1}$ is the variability in the species-specific intercepts across the community.
+
+## Gibbs sampler
+
+The Gibbs sampler for the spatial factor joint species distribution model is analogous to the updates for the occurrence parameters in the spatial factor multi-species occupancy model, with all instances of $\bm{z}_i(\bm{s}_j)$ replaced by $y^*_i(\bm{s}_j)$. See Section \@ref(sfMsPGOccSampler).
+
+# Latent factor joint species distribution model
+
+The `spOccupancy` function `lfJSDM()` fits a latent factor joint species distribution model. The latent factor JSDM (`lfJSDM()`) is a standard joint species distribution model that ignores imperfect detection and spatial autocorrelation but accounts for species residual correlations. As in the latent factor multi-species occupancy model, we account for species correlations using a latent factor model, where the latent factors arise from standard normal distributions. This model is analogous to many varieties of non-spatial JSDMs that leverage a factor modeling approach for dimension reduction [@hui2016boral; @ovaskainen2017make]. The model is identical to the spatial factor joint species distribution model implemented in `sfJSDM()`, except the latent factors are assumed to arise from standard normal distributions instead of a latent spatial process. This model is analogous to the latent factor multi-species occupancy model, except here we do not account for imperfect detection.
+
+## Model description
+
+Define $y^*_{i, j}$ as the detection (1) or nondetection (0) of species $i$ at site $j$ for $i = 1, \dots, N$ species at $j = 1, \dots, J$ sites. We assume $y^*_{i, j}$ arises from a Bernoulli process following
+
+\begin{equation}
+  y^*_{i, j} \sim \text{Bernoulli}(\psi_{i, j}),
+\end{equation}
+
+where $\psi_{i, j}$ is the probability of occurrence for species $i$ at site $j$. We model $\psi_{i, j}$ according to
+
+\begin{equation}
+  \text{logit}(\psi_{i, j}) = \bm{x}_j^\top\bm{\beta}_i + \text{w}^*_{i, j}
+\end{equation}
+
+where $\bm{x}_j$ is a $p_{\psi} \times 1$ vector of an intercept and environmental covariates at site $j$, $\bm{\beta}_{i}$ is a $p_{\psi} \times 1$ species-specific coefficient vector (including an intercept parameter), and $\text{w}^*_{i, j}$ is a species-specific latent process. Analogous to the latent factor multi-species occupancy model, we model $\text{w}^*_{i, j}$ using a factor modeling approach, where we have
+
+\begin{equation}
+  \text{w}^*_{i, j} = \bm{\lambda}_i^\top\textbf{w}_j,
+\end{equation}
+
+where $\bm{\lambda}_i$ is the $i$th row of factor loadings from an $N \times q$ matrix $\bm{\Lambda}$, and $\textbf{w}_j$ is a $q \times 1$ vector of independent latent factors at site $j$. We achieve computational improvements and dimension reduction by setting $q << N$. We account for residual species correlations via their individual responses (i.e., loadings) to the $q$ latent factors. We can envision the latent variables $\textbf{w}_j$ as unmeasured site-specific covariates that are treated as random variables in the model estimation procedure. Analogous to the latent factor multi-species occupancy model, we assign a standard normal prior distribution to the latent factors (i.e., we assume each latent factor is independent and arises from a normal distribution with mean 0 and standard deviation 1).
+
+We assume all species-specific parameters ($\beta_{i, t}$ for all $t = 1, \dots, p_{\psi}$) arise from community-level distributions [@dorazio2005; @gelfand2005modelling]. Specifically, we assign a normal prior with mean and variance hyperparameters that represent the community-level average and variance among species-specific effects across the community, respectively. For example, we model the non-spatial component of the species-specific occurrence intercept, $\beta_{i, 1}$, following
+
+\begin{equation}
+  \beta_{i, 1} \sim N(\mu_{\beta_1}, \tau^2_{\beta_1}),
+\end{equation}
+
+where $\mu_{\beta_1}$ is the average intercept across the community, and $\tau^2_{\beta_1}$ is the variability in the species-specific intercepts across the community.
+
+## Gibbs sampler
+
+The Gibbs sampler for the latent factor joint species distribution model is analogous to the updates for all occurrence parameters in the spatial factor multi-species occupancy model except for the latent factors $\bm{w}_j$, with all instances of $\bm{z}_i(\bm{s}_j)$ replaced by $y^*_i(\bm{s}_j)$. See Section \ref{sfMsPGOccSampler}. Additionally, the updates for the latent factors are identical to the updates in the latent factor multi-species occupancy model, again with all instances of $\bm{z}_{i, j}$ replaced by $y^*_i(\bm{s}_j)$. See Section \ref{lfMsPGOccSampler}.
+
+# References {-}
+
diff --git a/vignettes/mcmcSamplers.Rmd b/vignettes/mcmcSamplers.Rmd
index 0895f9d..8be4d6c 100644
--- a/vignettes/mcmcSamplers.Rmd
+++ b/vignettes/mcmcSamplers.Rmd
@@ -30,7 +30,16 @@ knitr::opts_chunk$set(
 
 # Introduction
 
-This vignette provides statistical details on the MCMC algorithms used to fit each occupancy model in `spOccupancy`. We provide detailed descriptions of the joint posterior distributions for each model, how each parameter is updated in the model fitting process, and provide relevant citations to more specific documentation of the approaches where necessary. We also provide information on the composition sampling algorithms used for each model to predict at out-of-sample locations. 
+This vignette provides statistical details on the MCMC algorithms used to fit the core occupancy models in `spOccupancy`. Specifically, this vignette will walk through the MCMC algorithms for the following models:
+
+1. Occupancy model using `PGOcc()`. 
+2. Spatial occupancy model using `spPGOcc()`. 
+3. Multi-species occupancy model using `msPGOcc()`.
+4. Spatial multi-species occupancy model using `spMsPGOcc()`.
+5. Integrated occupancy model using `intPGOcc()`.
+6. Spatial integrated occupancy model using `spIntPGOcc()`. 
+
+We provide detailed descriptions of the joint posterior distributions for each model, how each parameter is updated in the model fitting process, and provide relevant citations to more specific documentation of the approaches where necessary. We also provide information on the composition sampling algorithms used for each model to predict at out-of-sample locations. Details on models in `spOccupancy` that account for species interactions are provided in a separate vignette. 
 
 # Single-species occupancy model (`PGOcc`)
 
diff --git a/vignettes/modelFitting.Rmd b/vignettes/modelFitting.Rmd
index 7149258..cbb8369 100644
--- a/vignettes/modelFitting.Rmd
+++ b/vignettes/modelFitting.Rmd
@@ -34,7 +34,7 @@ This vignette provides worked examples and explanations for fitting single-speci
 5. Integrated occupancy model using `intPGOcc()`.
 6. Spatial integrated occupancy model using `spIntPGOcc()`. 
 
-We fit all occupancy models using P&oacute;lya-Gamma data augmentation [@polson2013] for computational efficiency (**P**&oacute;lya-**G**amma is where the `PG` comes from in all `spOccupancy` model fitting function names). In this vignette, we will provide a brief description of each model, with full statistical details provided in a separate MCMC sampler vignette. We will also show how `spOccupancy` provides functions for posterior predictive checks as a Goodness of Fit assessment, model comparison and assessment using the Widely Applicable Information Criterion (WAIC), k-fold cross-validation, and out of sample predictions using standard R helper functions (e.g., `predict()`).   
+We fit all occupancy models using P&oacute;lya-Gamma data augmentation [@polson2013] for computational efficiency (**P**&oacute;lya-**G**amma is where the `PG` comes from in all `spOccupancy` model fitting function names). In this vignette, we will provide a brief description of each model, with full statistical details provided in a separate [MCMC sampler vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/mcmcSamplers.pdf). We will also show how `spOccupancy` provides functions for posterior predictive checks as a Goodness of Fit assessment, model comparison and assessment using the Widely Applicable Information Criterion (WAIC), k-fold cross-validation, and out of sample predictions using standard R helper functions (e.g., `predict()`).   
 
 To get started, we load the `spOccupancy` package, as well as the `coda` package, which we will use for some MCMC summary and diagnostics. We will also use the `stars` and `ggplot2` packages to create some basic plots of our results. We then set a seed so you can reproduce the same results as we do.
 
@@ -114,7 +114,9 @@ oven.det.formula <- ~ scale(day) + scale(tod) + I(scale(day)^2)
 str(ovenHBEF)
 ```
 
-Next, we specify the initial values for the MCMC sampler in `inits`. `PGOcc()` (and all other `spOccupancy` model fitting functions) will set initial values by default, but here we will do this explicitly, since in more complicated cases setting initial values close to the presumed solutions can be vital for success of an MCMC-based analysis. The default initial values for occurrence and detection regression coefficients (including the intercepts) are random values from the prior distributions adopted in the model, while the default initial values for the latent occurrence effects are set to 1 if the species was observed at a site and 0 otherwise. Initial values are specified in a list with the following tags: `z` (latent occurrence values), `alpha` (detection intercept and regression coefficients), and `beta` (occurrence intercept and regression coefficients). Below we set all initial values of the regression coefficients to 0, and set initial values for `z` based on the detection-nondetection data matrix. For the occurrence (`beta`) and detection (`alpha`) regression coefficients, the initial values are passed either as a vector of length equal to the number of estimated parameters (including an intercept, and in the order specified in the model formula), or as a single value if setting the same initial value for all parameters (including the intercept). Below we take the latter approach. To specify the initial values for the latent occurrence at each site (`z`), we must ensure we set the value to 1 at all sites where OVEN was detected at least once, because if we detect OVEN at a site then the value of `z` is 1 with complete certainty (under the assumption of the model that there are no false positives). If the initial value for `z` is set to 0 at one or more sites when the species was detected, the occupancy model will fail. `spOccupancy` will provide a clear error message if the supplied initial values for `z` are invalid. Below we use the raw detection-nondetection data and the `apply()` function to set the initial values to 1 if OVEN was detected at that site and 0 otherwise. 
+Next, we specify the initial values for the MCMC sampler in `inits`. `PGOcc()` (and all other `spOccupancy` model fitting functions) will set initial values by default, but here we will do this explicitly, since in more complicated cases setting initial values close to the presumed solutions can be vital for success of an MCMC-based analysis. For all models described in this vignette (in particular the non-spatial models), choice of the initial values is largely inconsequential, with the exception being that specifying initial values close to the presumed solutions can decrease the amount of samples you need to run to arrive at convergence of the MCMC chains. Thus, when first running a model in `spOccupancy`, we recommend fitting the model using the default initial values that `spOccupancy` provides, which are based on the prior distributions. After running the model for a reasonable period, if you find the chains are taking a long time to reach convergence, you then may wish to set the initial values to the mean estimates of the parameters from the initial model fit, as this will likely help reduce the amount of time you need to run the model.
+
+The default initial values for occurrence and detection regression coefficients (including the intercepts) are random values from the prior distributions adopted in the model, while the default initial values for the latent occurrence effects are set to 1 if the species was observed at a site and 0 otherwise. Initial values are specified in a list with the following tags: `z` (latent occurrence values), `alpha` (detection intercept and regression coefficients), and `beta` (occurrence intercept and regression coefficients). Below we set all initial values of the regression coefficients to 0, and set initial values for `z` based on the detection-nondetection data matrix. For the occurrence (`beta`) and detection (`alpha`) regression coefficients, the initial values are passed either as a vector of length equal to the number of estimated parameters (including an intercept, and in the order specified in the model formula), or as a single value if setting the same initial value for all parameters (including the intercept). Below we take the latter approach. To specify the initial values for the latent occurrence at each site (`z`), we must ensure we set the value to 1 at all sites where OVEN was detected at least once, because if we detect OVEN at a site then the value of `z` is 1 with complete certainty (under the assumption of the model that there are no false positives). If the initial value for `z` is set to 0 at one or more sites when the species was detected, the occupancy model will fail. `spOccupancy` will provide a clear error message if the supplied initial values for `z` are invalid. Below we use the raw detection-nondetection data and the `apply()` function to set the initial values to 1 if OVEN was detected at that site and 0 otherwise. 
 
 ```{r}
 # Format with explicit specification of inits for alpha and beta
@@ -196,7 +198,9 @@ The function `ppcOcc()` performs a posterior predictive check on all `spOccupanc
 3. Compute a fit statistic on both the actual data and also on the model-generated 'replicate data'.
 4. Compare the fit statistics for the true data and replicate data. If they are widely different, this suggests a lack of fit of the model to the actual data set at hand. 
 
-To peform a posterior predictive check, we send the resulting `PGOcc` model object as input to the `ppcOcc()` function, along with a fit statistic (`fit.stat`) and a numeric value indicating how to group, or bin, the data (`group`). Currently supported fit statistics include the Freeman-Tukey statistic and the Chi-Squared statistic (`freeman-tukey` or `chi-squared`, respectively, @kery2015applied). Currently, `ppcOcc()` allows the user to group the data by row (site; `group = 1`) or column (replicate; `group = 2`). `ppcOcc()` will then return a set of posterior samples for the fit statistic (or discrepancy measure) using the actual data (`fit.y`) and model generated replicate data set (`fit.y.rep`), summed across all data points in the chosen manner. These values can be used with the `summary()` function to generate a Bayesian p-value, which is the probability, under the fitted model, to obtain a value of the fit statistic that is more extreme (i.e., larger) than the one observed, i.e., for the actual data. Bayesian p-values are sensitive to individual values, so we should also explore the discrepancy measures for each "grouped" data point. `ppcOcc()` returns a matrix of posterior quantiles for the fit statistic for both the observed (`fit.y.group.quants`) and model generated, replicate data (`fit.y.rep.group.quants`) for each "grouped" data point. 
+To perform a posterior predictive check, we send the resulting `PGOcc` model object as input to the `ppcOcc()` function, along with a fit statistic (`fit.stat`) and a numeric value indicating how to group, or bin, the data (`group`). Currently supported fit statistics include the Freeman-Tukey statistic and the Chi-Squared statistic (`freeman-tukey` or `chi-squared`, respectively, @kery2015applied). Currently, `ppcOcc()` allows the user to group the data by row (site; `group = 1`) or column (replicate; `group = 2`). `ppcOcc()` will then return a set of posterior samples for the fit statistic (or discrepancy measure) using the actual data (`fit.y`) and model generated replicate data set (`fit.y.rep`), summed across all data points in the chosen manner. We generally recommend performing a posterior predictive check when grouping data both across sites (`group = 1`) as well as across replicates (`group = 2`), as they may reveal (or fail to reveal) different inadequacies of the model for the specific data set at hand [@kery2015applied]. In particular, binning the data across sites (`group = 1`) may help reveal whether the model fails to adequately represent variation in occurrence and detection probability across space, while binning the data across replicates (`group = 2`) may help reveal whether the model fails to adequately represent variation in detection probability across the different replicate surveys. Similarly, we suggest exploring posterior predictive checks using both the Freeman-Tukey Statistic as well as the Chi-Squared statistic. Generally, the more ways we explore the fit of our model to the data, the more confidence we have that our model is adequately representing the data and we can draw reasonable conclusions from it. By performing posterior predictive checks with the two fit statistics and two ways of grouping the data, `spOccupancy` provides us with four different measures that we can use to assess the validity of our model. Throughout this vignette, we will display different types of posterior predictive checks using different combinations of the fit statistic and grouping approach. In a more complete analysis, we would run all four types of posterior predictive checks for each model we fit as a more complete GoF assessment. 
+
+The resulting values from a call to `ppcOcc()` can be used with the `summary()` function to generate a Bayesian p-value, which is the probability, under the fitted model, to obtain a value of the fit statistic that is more extreme (i.e., larger) than the one observed, i.e., for the actual data. Bayesian p-values are sensitive to individual values, so we should also explore the discrepancy measures for each "grouped" data point. `ppcOcc()` returns a matrix of posterior quantiles for the fit statistic for both the observed (`fit.y.group.quants`) and model generated, replicate data (`fit.y.rep.group.quants`) for each "grouped" data point. 
 
 We next perform a posterior predictive check using the Freeman-Tukey statistic grouping the data by sites. We summarize the posterior predictive check with the `summary()` function, which reports a Bayesian p-value. A Bayesian p-value that hovers around 0.5 indicates adequate model fit, while values less than 0.1 or greater than 0.9 suggest our model does not fit the data well [@hobbs2015]. As always with a simulation-based analysis using MCMC, you will get numerically slightly different values. 
 
@@ -224,7 +228,7 @@ diff.fit <- ppc.out$fit.y.rep.group.quants[3, ] - ppc.out$fit.y.group.quants[3,
 plot(diff.fit, pch = 19, xlab = 'Site ID', ylab = 'Replicate - True Discrepancy')
 ```
 
-We see there are four sites that contriubte to the overall GoF measure far more than in proportion to their number, i.e., whose discrepancy measure for the actual data is much larger than the corresponding discrepancy for the replicate data. Here we will ignore this, but in a real analysis it might be very insightful to further explore these sites to see what could explain this pattern (e.g., are the sites close together in space? Could the data be the result of erroneous recording, or of extraordinary local habitat that is not adequately described by the occurrence covariates?). Although rarely done, closer investigations of outlying values in statistical analyses may sometimes teach one more than mere acceptance of a fitting model. 
+We see there are four sites that contribute to the overall GoF measure far more than in proportion to their number, i.e., whose discrepancy measure for the actual data is much larger than the corresponding discrepancy for the replicate data. Here we will ignore this, but in a real analysis it might be very insightful to further explore these sites to see what could explain this pattern (e.g., are the sites close together in space? Could the data be the result of erroneous recording, or of extraordinary local habitat that is not adequately described by the occurrence covariates?). Although rarely done, closer investigations of outlying values in statistical analyses may sometimes teach one more than mere acceptance of a fitting model. 
 
 ## Model selection using WAIC and k-fold cross-validation {#kFold}
 
@@ -238,7 +242,7 @@ $$
 \text{WAIC} = -2 \times (\text{elpd} - \text{pD}), 
 $$
 
-where elpd is the expected log pointwise predictive density and PD is the effective number of parameters. We calculate elpd by calculating the likelihood for each posterior sample, taking the mean of these likelihood values, taking the log of the mean of the likelihood values, and summing these values across all sites. We calculate the effective number of parameters by calculating the variance of the log likelihood for each site taken over all posterior samples, and then summing these values across all sites. See Appendix S1 from @broms2016model for more details. 
+where elpd is the expected log point-wise predictive density and PD is the effective number of parameters. We calculate elpd by calculating the likelihood for each posterior sample, taking the mean of these likelihood values, taking the log of the mean of the likelihood values, and summing these values across all sites. We calculate the effective number of parameters by calculating the variance of the log likelihood for each site taken over all posterior samples, and then summing these values across all sites. See Appendix S1 from @broms2016model for more details. 
 
 We calculate the WAIC using `waicOcc()` for our OVEN model below (as always, note some slight differences with your solutions due to Monte Carlo error).
 
@@ -299,7 +303,7 @@ out.k.fold <- PGOcc(occ.formula = oven.occ.formula,
 We subsequently refit the intercept only occupancy model, and compare the deviance metrics from the 4-fold cross-validation. 
 
 ```{r}
-# Model fitting information is surpressed for space.
+# Model fitting information is suppressed for space.
 out.int.k.fold <- PGOcc(occ.formula = ~ 1,
                         det.formula = oven.det.formula, 
                         data = ovenHBEF,
@@ -327,7 +331,7 @@ Similar to the results from the WAIC, CV also suggests that the model including
 
 ## Prediction
 
-All model objects resulting from a call to `spOccupancy` model-fitting functions can be used with `predict()` to generate a series of posterior predictive samples at new locations, given the values of all covariates used in the model fitting process. The object `hbefElev` (which comes as part of the `spOccupancy` package) contains elevation data at a 30x30m resolution from the National Elevation Dataset across the entire HBEF. We load the data below
+All model objects resulting from a call to `spOccupancy` model-fitting functions can be used with `predict()` to generate a series of posterior predictive samples at new locations, given the values of all covariates used in the model fitting process. The object `hbefElev` (which comes as part of the `spOccupancy` package) contains elevation data at a 30x30m resolution from the National Elevation Data set across the entire HBEF. We load the data below
 
 ```{r}
 data(hbefElev)
@@ -343,9 +347,9 @@ X.0 <- cbind(1, elev.pred, elev.pred^2)
 out.pred <- predict(out, X.0)
 ```
 
-For `PGOcc` objects, the `predict` function takes two arguments: (1) the `PGOcc` fitted model object; and (2) a matrix or data frame consisting of the design matrix for the prediction locations (which must include and intercept if our model contained one). The resulting object consists of posterior predictive samples for the latent occurrence probabilities (`psi.0.samples`) and latent occurrence values (`z.0.samples`). The beauty of the Bayesian paradigm, and the MCMC computing machinery, is that these predictions all have fully propagated uncertainty. We can use these values to create plots of the predicted mean occurrence values, as well as of their standard deviation as a measure of the uncertainty in these predictions aking to a standard error associated with maximum likelihood estimates. We could also produce a map for the Bayesian credible interval length as an alternative measure of prediction uncertainty or two maps, one showing the lower and the other the upper limit of, say, a 95% credible interval.
+For `PGOcc` objects, the `predict` function takes two arguments: (1) the `PGOcc` fitted model object; and (2) a matrix or data frame consisting of the design matrix for the prediction locations (which must include and intercept if our model contained one). The resulting object consists of posterior predictive samples for the latent occurrence probabilities (`psi.0.samples`) and latent occurrence values (`z.0.samples`). The beauty of the Bayesian paradigm, and the MCMC computing machinery, is that these predictions all have fully propagated uncertainty. We can use these values to create plots of the predicted mean occurrence values, as well as of their standard deviation as a measure of the uncertainty in these predictions akin to a standard error associated with maximum likelihood estimates. We could also produce a map for the Bayesian credible interval length as an alternative measure of prediction uncertainty or two maps, one showing the lower and the other the upper limit of, say, a 95% credible interval.
 
-```{r, warning = FALSE, message = FALSE}
+```{r, warning = FALSE, message = FALSE, fig.width = 7, fig.align = 'center', units = 'in'}
 plot.dat <- data.frame(x = hbefElev$Easting, 
                        y = hbefElev$Northing, 
                        mean.psi = apply(out.pred$psi.0.samples, 2, mean), 
@@ -370,7 +374,6 @@ ggplot() +
   theme_bw()
 ```
 
-
 # Single-species spatial occupancy models {#spPGOcc}
 
 ## Basic model description
@@ -395,7 +398,7 @@ When the number of sites is moderately large, say 1000, the above described spat
 
 ## Fitting single-species spatial occupancy models with `spPGOcc()`
 
-The function `spPGOcc()` fits single-species spatial occupancy models using Poacute;lya-Gamma latent variables, where spatial autocorrelation is accounted for using a spatial Gaussian Process. `spPGOcc()` fits spatial occupancy models using either a full Gaussian process or an NNGP. See @finley2020spnngp for details on using NNGPs with Poacute;lya-Gamma latent variables.
+The function `spPGOcc()` fits single-species spatial occupancy models using P&oacute;lya-Gamma latent variables, where spatial autocorrelation is accounted for using a spatial Gaussian Process. `spPGOcc()` fits spatial occupancy models using either a full Gaussian process or an NNGP. See @finley2020spnngp for details on using NNGPs with P&oacute;lya-Gamma latent variables.
 
 We will fit the same occupancy model for OVEN that we fit previously using `PGOcc()`, but we will now make the model spatially explicit by incorporating a spatial process with `spPGOcc()`. First, let's take a look at the arguments for `spPGOcc()`: 
 
@@ -418,7 +421,9 @@ oven.det.formula <- ~ scale(day) + scale(tod) + I(scale(day)^2)
 str(ovenHBEF) # coords is required for spPGOcc.
 ```
 
-The initial values (`inits`) are again specified in a list. Valid tags for initial values now additionally include the parameters associated with the spatial random effects. These include: `sigma.sq` (spatial variance parameter), `phi` (spatial range parameter), `w` (the latent spatial random effects at each site), and `nu` (spatial smoothness parameter), where the latter is only specified if adopting a Matern covariance function (i.e., `cov.model = 'matern'`). `spOccupancy` supports four spatial covariance models (`exponential`, `spherical`, `gaussian`, and `matern`), which are specified in the `cov.model` argument. Here we will use an exponential covariance model. As a initial value for the spatial range parameter `phi`, we compute the mean distance between points in HBEF and then set it equal to 3 divided by this mean distance. When using an exponential covariance function, $\frac{3}{\phi}$ is the effective range, or the distance at which the residual spatial correlation between two sites drops to 0.05 [@banerjee2003]. Thus our initial guess for this effective range is the average distance betweeen sites across HBEF. 
+The initial values (`inits`) are again specified in a list. Valid tags for initial values now additionally include the parameters associated with the spatial random effects. These include: `sigma.sq` (spatial variance parameter), `phi` (spatial range parameter), `w` (the latent spatial random effects at each site), and `nu` (spatial smoothness parameter), where the latter is only specified if adopting a Matern covariance function (i.e., `cov.model = 'matern'`). `spOccupancy` supports four spatial covariance models (`exponential`, `spherical`, `gaussian`, and `matern`), which are specified in the `cov.model` argument. Throughout this vignette, we will use an exponential covariance model, which we often use as our default covariance model when fitting spatially-explicit models and is commonly used throughout ecology. To determine which covariance function to use, we can fit models with the different covariance functions and compare them using WAIC or k-fold cross-validation to select the best performing function. We will note that the Matern covariance function has the additional spatial smoothness parameter $\nu$ and thus can often be more flexible than the other functions. However, because we need to estimate an additional parameter, this also tends to require more data (i.e., a larger number of sites) than the other covariance functions, and so we encourage use of the three simpler functions if your data set is sparse. We note that model estimates are generally fairly robust to the different covariance functions, although certain functions may provide substantially better estimates depending on the specific form of the underlying spatial autocorrelation in the data. For example, the Gaussian covariance function is often useful for accounting for spatial autocorrelation that is very smooth (i.e., long range spatial dependence). See Chapter 2 in @banerjee2003 for a more thorough discussion of these functions and their mathematical properties.
+
+The default initial values for `phi`, `sigma.sq`, and `nu` are all set to random values from the prior distribution, which generally will be sufficient for the models discussed in this vignette. In all spatially-explicit models described in this vignette, the spatial range parameter `phi` is the most sensitive to initial values. In general, the spatial range parameter will often have poor mixing and take longer to converge than the rest of the parameters in the model, so specifying an initial value that is reasonably close to the resulting value can really help decrease run times for complicated models. As an initial value for the spatial range parameter `phi`, we compute the mean distance between points in HBEF and then set it equal to 3 divided by this mean distance. When using an exponential covariance function, $\frac{3}{\phi}$ is the effective range, or the distance at which the residual spatial correlation between two sites drops to 0.05 [@banerjee2003]. Thus our initial guess for this effective range is the average distance between sites across HBEF. As with all other parameters, we generally recommend using the default initial values for an initial model run, and if the model is taking a very long time to converge you can rerun the model with initial values based on the posterior means of estimated parameters from the initial model fit. For the spatial variance parameter `sigma.sq`, we set the initial value to 2. This corresponds to a fairly small spatial variance, which we expect based on our previous work with this data set. Further, we set the initial values of the latent spatial random effects at each site to 0. The initial values for these random effects has an extremely small influence on the model results, so we generally recommend setting their initial values to 0 as we have done here (this is also the default). However, if you are running your model for a very long time and are seeing very slow convergence of the MCMC chains, setting the initial values of the spatial random effects to the mean estimates from a previous run of the model could help reach convergence faster. 
 
 ```{r, tidy = FALSE}
 # Pair-wise distances between all sites
@@ -434,7 +439,7 @@ oven.inits <- list(alpha = 0,
                    w = rep(0, nrow(ovenHBEF$y)))
 ```
 
-The next three arguments (`n.batch`, `batch.length`, and `accept.rate`) are all related to the Adaptive MCMC sampler used when we fit the model. Updates for the spatial range parameter (and the smoothness parameter if `cov.model = 'matern'`) require the use of a Metropolis-Hastings algorithm. We implement an adaptive Metropolis-Hastings algorithm as discussed in @roberts2009examples. This algorithm adjusts the tuning values for each parameter that requires a Metropolis-Hastings update within the sampler itself. This process results in a more efficient sampler than if we were to fix the tuning parameters prior to fitting the model. The parameter `accept.rate` is the target acceptance rate for each parameter, and the algorithm will adjust the tuning parameters to hover around this value. The default value is 0.43, which we suggest leaving as is unless you have a good reason to change it. The tuning parameters are updated after a single "batch". We must specify the total `n.batch` batches, where each "batch" consists of `batch.length` MCMC samples. Thus, the total number of MCMC samples is `n.batch * batch.length`. Typically, we set `batch.length = 25` and then play around with `n.batch` until convergence is reached. Here we set `n.batch = 400` for a total of 10000 MCMC samples in each of 3 chains. We will additionally specify a burn-in period of length 2000 and a thinning rate of 8. Importantly, we also need to specify an initial value for the tuning parameters for the spatial decay and smoothness parameters (if applicable). These values are supplied as input in the form of a list with tags `phi` and `nu`. The initial tuning value can be any value greater than 0, but we recommend starting the value out around 0.5. After some initial runs of the model, if you notice the final acceptance rate of a parameter is much larger or smaller than the target acceptance rate (`accept.rate`), you can then change the initial tuning value to get closer to the target rate. Here we set the initial tuning value for `phi` to 1 after some initial exploratory runs of the model.
+The next three arguments (`n.batch`, `batch.length`, and `accept.rate`) are all related to the specific type of MCMC sampler we use when we fit the model. The spatial range parameter (and the spatial smoothness parameter if `cov.model = 'matern'`) are the two hardest parameters to estimate in spatially-explicit models in `spOccupancy`. In other words, you will often see slow mixing and convergence of the MCMC chains for these parameters. To try to speed up this slow mixing and convergence of these parameters, we use an algorithm called an adaptive Metropolis-Hastings algorithm for all spatially-explicit models in `spOccupancy`(see @roberts2009examples for more details on this algorithm). In this approach, we break up the total number of MCMC samples into a set of "batches", where each batch has a specific number of MCMC samples. When we fit a spatially-explicit model in `spOccupancy`, instead of specifying the total number of MCMC samples in the `n.samples` argument like we did in `PGOcc()`, we must specify the total number of batches (`n.batch`) as well as the number of MCMC samples each batch contains. Thus, the total number of MCMC samples is `n.batch * batch.length`. Typically, we set `batch.length = 25` and then play around with `n.batch` until convergence of all model parameters is reached. We recommend setting `batch.length = 25` unless you have a specific reason to change it. Here we set `n.batch = 400` for a total of 10,000 MCMC samples in each of 3 chains. We additionally specify a burn-in period of length 2000 and a thinning rate of 20. 
 
 ```{r}
 batch.length <- 25
@@ -442,12 +447,19 @@ n.batch <- 400
 n.burn <- 2000
 n.thin <- 20
 n.chains <- 3
+```
+
+Importantly, we also need to specify an acceptance rate and a tuning parameter for the spatial range parameter (and spatial smoothness parameter if `cov.model = 'matern'`), which are both features of the adaptive algorithm we use to sample these parameters. In this algorithm, we propose new values for `phi` (and `nu`), compare them to our previous values, and use a statistical algorithm to determine if we should accept the new proposed value or keep the old one. The `accept.rate` argument specifies the ideal proportion of times we will accept the newly proposed values for these parameters. @roberts2009examples show that if we accept new values around 43% of the time, this will lead to optimal mixing and convergence of the MCMC chains. Following these recommendations, we should strive for an algorithm that accepts new values about 43% of the time. Thus, we recommend setting `accept.rate = 0.43` unless you have a specific reason not to (this is the default value). The values specified in the `tuning` argument helps control the initial values we will propose for both `phi` and `nu`. These values are supplied as input in the form of a list with tags `phi` and `nu`. The initial tuning value can be any value greater than 0, but we generally recommend starting the value out around 0.5. This initial tuning value is closely related to the ideal (or target) acceptance rate we specified in `accept.rate`. In short, the algorithm we use is "adaptive" because the algorithm will change the tuning values after each batch of the MCMC to yield acceptance rates that are close to our target acceptance rate that we specified in the `accept.rate` argument. Information on the acceptance rates for `phi` and `nu` in your model will be displayed when setting `verbose = TRUE`. After some initial runs of the model, if you notice the final acceptance rate is much larger or smaller than the target acceptance rate (`accept.rate`), you can then change the initial tuning value to get closer to the target rate. While use of this algorithm requires us to specify more arguments in spatially-explicit models, this leads to much shorter run times compared to a more simple approach where we do not have an "adaptive" sampling approach, and it should thus save you time in the long haul when waiting for these models to run. For our example here, we set the initial tuning value to 1 after some initial exploratory runs of the model. 
+
+
+```{r}
 oven.tuning <- list(phi = 1)
+# accept.rate = 0.43 by default, so we do not specify it.
 ```
 
-Priors are again specified in a list in the argument `priors`. We assume an inverse gamma prior for the spatial variance parameter `sigma.sq` (the tag of which is `sigma.sq.ig`), and uniform priors for the spatial decay parameter `phi` and smoothness parameter `nu` (if using the Matern correlation function), with the associated tags `phi.unif` and `nu.unif`. The hyperparameters of the inverse Gamma are passed as a vector of length two, with the first and second elements corresponding to the shape and scale, respectively. The lower and upper bounds of the uniform distribution are passed as a two-element vector for the uniform priors. 
+Priors are again specified in a list in the argument `priors`. We follow standard recommendations for prior distributions from the spatial statistics literature [@banerjee2003]. We assume an inverse gamma prior for the spatial variance parameter `sigma.sq` (the tag of which is `sigma.sq.ig`), and uniform priors for the spatial decay parameter `phi` and smoothness parameter `nu` (if using the Matern correlation function), with the associated tags `phi.unif` and `nu.unif`. The hyperparameters of the inverse Gamma are passed as a vector of length two, with the first and second elements corresponding to the shape and scale, respectively. The lower and upper bounds of the uniform distribution are passed as a two-element vector for the uniform priors. 
 
-Note that the priors for the spatial parameters in a spatially-explicit model must be at least weakly informative for the model to converge [@banerjee2003]. For the inverse-Gamma prior on the spatial variance, we typically set the shape parameter to 2 and the scale parameter equal to our best guess of the spatial variance. Based on our previous work with these data, we expected the residual spatial variation to be relatively small, and so we set the scale parameter below to 1. For the spatial decay parameter, we determine the bounds of the uniform distribution by computing the smallest distance between sites and the largest distance between sites. We then set the lower bound of the uniform to `3/max` and the upper bound to `3/min`, where `min` and `max` correspond to the predetermined distances between sites. 
+Note that the priors for the spatial parameters in a spatially-explicit model must be at least weakly informative for the model to converge [@banerjee2003]. For the inverse-Gamma prior on the spatial variance, we typically set the shape parameter to 2 and the scale parameter equal to our best guess of the spatial variance. The default prior hyperparameter values for the spatial variance $\sigma^2$ are a shape parameter of 2 and a scale parameter of 1. This weakly informative prior suggests a prior mean of 1 for the spatial variance, which is a moderately small amount of spatial variation. Based on our previous work with these data, we expected the residual spatial variation to be relatively small, and so we use this default value and set the scale parameter below to 1. For the spatial decay parameter, our default approach is to set the lower and upper bounds of the uniform prior based on the minimum and maximum distances between sites in the data. More specifically, by default we set the lower bound to `3 / max` and the upper bound to `3 / min`, where `min` and `max` are the minimum and maximum distances between sites in the data set, respectively. This equates to a vague prior that states the spatial autocorrelation in the data could only exist between sites that are very close together, or could span across the entire observed study area. If additional information is known on the extent of the spatial autocorrelation in the data, you may place more restrictive bounds on the uniform prior, which would reduce the amount of time needed for adequate mixing and convergence of the MCMC chains. Here we use this default approach, but will explicit set the values for transparency. 
 
 ```{r}
 min.dist <- min(dist.hbef)
@@ -497,7 +509,7 @@ All Rhat values are less than 1.1, indicating we have reached adequate convergen
 
 ## Posterior predictive checks
 
-For our posterior predictive check, we send the `spPGOcc` model object to the `ppcOcc()` function, this time grouping by replicate, or survey occassion, (`group = 2`) instead of by site (`group = 1`). 
+For our posterior predictive check, we send the `spPGOcc` model object to the `ppcOcc()` function, this time grouping by replicate, or survey occasion, (`group = 2`) instead of by site (`group = 1`). 
 
 ```{r}
 ppc.sp.out <- ppcOcc(out.sp, fit.stat = 'freeman-tukey', group = 2)
@@ -524,7 +536,7 @@ k-fold cross-validation is accomplished by specifying the `k.fold` argument in `
 
 Finally, we can perform out of sample prediction using the `predict` function just as before. Prediction for spatial models is more computationally intensive than for non-spatial models, and so the `predict` function for `spPGOcc` class objects also has options for parallelization (`n.omp.threads`) and reporting sampler progress (`verbose` and `n.report`). Note that for `spPGOcc()`, you also need to supply the coordinates of the out of sample prediction locations in addition to the covariate values.
 
-```{r}
+```{r, warning = FALSE, message = FALSE, fig.width = 7, fig.align = 'center', units = 'in'}
 # Do prediction. 
 coords.0 <- as.matrix(hbefElev[, c('Easting', 'Northing')])
 # Approx. run time: 6 min
@@ -590,7 +602,7 @@ where $p_{i, j, k}$ is the probability of detecting species $i$ at site $j$ duri
 
 where $\bm{\mu}_{\alpha}$ is a vector of community level mean effects for each detection covariate effect (including the intercept) and $\bm{T}_{\alpha}$ is a diagonal matrix with diagonal elements $\bm{\tau}^2_{\alpha}$ that represent the variability of each detection covariate effect among species in the community.
 
-To complete the Bayesian specification of the model, we assign multivariate normal priors for the occurrence ($\bm{\mu}_{\beta}$) and detection ($\bm{\mu}_{\alpha}$) community-level regression coefficient means and independent inverse-Gamma priors for each element of $\bm{\tau}^2_{\beta}$ and $\bm{\tau}^2_{\alpha}$. We again use Poacute;lya-Gamma data augmentation to yield an efficient implementation of the multi-species occupancy model, which is described in depth in the MCMC sampler vignette. 
+To complete the Bayesian specification of the model, we assign multivariate normal priors for the occurrence ($\bm{\mu}_{\beta}$) and detection ($\bm{\mu}_{\alpha}$) community-level regression coefficient means and independent inverse-Gamma priors for each element of $\bm{\tau}^2_{\beta}$ and $\bm{\tau}^2_{\alpha}$. We again use P&oacute;lya-Gamma data augmentation to yield an efficient implementation of the multi-species occupancy model, which is described in depth in the MCMC sampler vignette. 
 
 ## Fitting multi-species occupancy models with `msPGOcc()`
 
@@ -633,7 +645,7 @@ ms.inits <- list(alpha.comm = 0,
 
 In multi-species models, we specify priors on the community-level coefficients (or hyperparameters) rather than the species-level effects.  For nonspatial models, these priors are specified with the following tags: `beta.comm.normal` (normal prior on the community-level occurrence mean effects), `alpha.comm.normal` (normal prior on the community-level detection mean effects), `tau.sq.beta.ig` (inverse-Gamma prior on the community-level occurrence variance parameters), `tau.sq.alpha.ig` (inverse-Gamma prior on the community-level detection variance parameters). Each tag consists of a list with elements corresponding to the mean and variance for normal priors and scale and shape for inverse-Gamma priors. Values can be specified individually for each parameter or as a single value if the same prior is assigned to all parameters of a given type. 
 
-Below we specify normal priors to be relatively vague on the probability scale with a mean of 0 and a variance of 2.72, and specify vague inverse gamma priors on the community-level variance parameters setting both the shape and scale parameters to 0.1. 
+By default, we set the prior hyperparameter values for the community-level means to a mean of 0 and a variance of 2.72, which results in a vague prior on the probability scale as we discussed for the single-species spatial occupancy model. Below we specify these priors explicitly. For the community-level variance parameters, by default we set the scale and shape parameters to 0.1 following the recommendations of [@lunn2013bugs], which results in a weakly informative prior on the community-level variances. This may lead to shrinkage of the community-level variance towards zero under certain circumstances so that all species will have fairly similar values for the species-specific covariate effect [@gelman2006prior], although we have found multi-species occupancy models to be relatively robust to this prior specification. However, if you want to explore the impact of this prior on species-specific covariate effects, we recommend running single-species occupancy models for a select number of species in the data set, and assessing how large the differences in the estimated species-specific effects are between the single-species model estimates and those from the multi-species model. Below we explicitly set these default prior values for our HBEF example. 
 
 ```{r}
 ms.priors <- list(beta.comm.normal = list(mean = 0, var = 2.72),
@@ -744,7 +756,7 @@ occ.ms.sp.formula <- ~ scale(Elevation) + I(scale(Elevation)^2)
 det.ms.sp.formula <- ~ scale(day) + scale(tod) + I(scale(day)^2)
 ```
 
-Our initial values in the `inits` argument will look analogous to what we had specified for the nonspatial multi-species occupancy model using `msPGOcc()`, but we will now also include additional initial values for the parameters controlling the spatial processes: `sigma.sq` is the species-specific spatial variance parameter, `phi` is the species specific spatial decay parameter, and `w` is the latent spatial proccess for each species at each site. We will use an exponential covariance model, but note that when using a Matern covariance model we must also specify initial values for `nu`, the species-specific spatial smoothness parameter. Note that all species-specific spatial parameters are independent of each other. We currently do not leverage any correlation between spatial processes of different species, although this is something we plan to incorporate for future `spOccupancy` development. Initial values for `phi`, `sigma.sq`, and `nu` (if applicable) are specified as vectors with $N$ elements (the number of species being modeled) or as a single value that is used for all species, while the initial values for the latent spatial processes are specified as a matrix with $N$ rows (i.e., species) and $J$ columns (i.e., sites). Here we set the initial value for the spatial variances equal to 2 for all species and set the initial values for the spatial decay parameter to yield an effective range of the average distance between sites across the HBEF.
+Our initial values in the `inits` argument will look analogous to what we had specified for the nonspatial multi-species occupancy model using `msPGOcc()`, but we will now also include additional initial values for the parameters controlling the spatial processes: `sigma.sq` is the species-specific spatial variance parameter, `phi` is the species specific spatial decay parameter, and `w` is the latent spatial process for each species at each site. We will use an exponential covariance model, but note that when using a Matern covariance model we must also specify initial values for `nu`, the species-specific spatial smoothness parameter. Note that all species-specific spatial parameters are independent of each other. We currently do not leverage any correlation between spatial processes of different species, although this is something we plan to incorporate for future `spOccupancy` development. Initial values for `phi`, `sigma.sq`, and `nu` (if applicable) are specified as vectors with $N$ elements (the number of species being modeled) or as a single value that is used for all species, while the initial values for the latent spatial processes are specified as a matrix with $N$ rows (i.e., species) and $J$ columns (i.e., sites). Here we set the initial value for the spatial variances equal to 2 for all species and set the initial values for the spatial decay parameter to yield an effective range of the average distance between sites across the HBEF.
 
 ```{r}
 # Number of species
@@ -765,7 +777,7 @@ ms.inits <- list(alpha.comm = 0,
                  w = matrix(0, N, dim(hbef2015$y)[2]))
 ```
 
-We next specify the priors in the `priors` argument. The priors are the same as those we specified for the non-spatial multi-species model, with the addition of priors for the parameters controlling the species-specific spatial processes. We assume independent priors for all spatial parameters across the different species. For each species, we assign an inverse gamma prior for the spatial variance parameter `sigma.sq` (tag is `sigma.sq.ig`) and uniform priors for the spatial decay parameter `phi` and smoothness parameter `nu` (if `cov.model = 'matern'`), with the associated tags `phi.unif` and `nu.unif`. All priors are specified as lists with two elements. For the inverse-Gamma prior, the first element is a length $N$ vector of shape parameters for each species, and the second element is a length $N$ vector of scale parameters for each species. As we have seen many times before, if the same prior is used for all species, both elements can be specified as single values. For the uniform priors, the first element is a length $N$ vector of the lower bounds for each species, and the second element is a length $N$ vector of upper bounds for each species. If the same prior is used for all species, both the lower and upper bounds can be specified as single values. For the inverse-Gamma prior on the spatial variances, here we set the shape parameter to 2 and the scale parameter also equal to 2. For a more formal analysis, we would likely want to do some exploratory data analysis to obtain a better guess for the spatial variance for each species, and then replace the scale parameter with this estimated guess for each species. For the spatial decay parameter, we determine the bounds of the uniform distribution by computing the smallest distance between sites and the largest distance between sites. We then set the lower bound of the uniform to `3/max` and the upper bound to `3/min`, where `min` and `max` correspond to the predetermined distances between sites. 
+We next specify the priors in the `priors` argument. The priors are the same as those we specified for the non-spatial multi-species model, with the addition of priors for the parameters controlling the species-specific spatial processes. We assume independent priors for all spatial parameters across the different species. Priors (and their default values in `spOccupancy`) for the spatial parameters are exactly analogous to those we saw for the single-species spatially-explicit occupancy model. For each species, we assign an inverse gamma prior for the spatial variance parameter `sigma.sq` (tag is `sigma.sq.ig`) and uniform priors for the spatial decay parameter `phi` and smoothness parameter `nu` (if `cov.model = 'matern'`), with the associated tags `phi.unif` and `nu.unif`. All priors are specified as lists with two elements. For the inverse-Gamma prior, the first element is a length $N$ vector of shape parameters for each species, and the second element is a length $N$ vector of scale parameters for each species. As we have seen many times before, if the same prior is used for all species, both elements can be specified as single values. For the uniform priors, the first element is a length $N$ vector of the lower bounds for each species, and the second element is a length $N$ vector of upper bounds for each species. If the same prior is used for all species, both the lower and upper bounds can be specified as single values. For the inverse-Gamma prior on the spatial variances, here we set the shape parameter to 2 and the scale parameter also equal to 2. For a more formal analysis, we may want to do some exploratory data analysis to obtain a better guess for the spatial variance for each species, and then replace the scale parameter with this estimated guess for each species. For the spatial decay parameter, we determine the bounds of the uniform distribution by computing the smallest distance between sites and the largest distance between sites. We then set the lower bound of the uniform to `3/max` and the upper bound to `3/min`, where `min` and `max` correspond to the predetermined distances between sites. Again, these are the default hyperparameter values for the prior distribution of `phi` that `spOccupancy` will assign.
 
 ```{r}
 # Minimum value is 0, so need to grab second element.
@@ -886,11 +898,11 @@ We do not directly observe $z_j$, but rather we observe an imperfect representat
 
 where $p_{r, a, k}$ is the probability of detecting a species at site $a$ during replicate $k$ (given it is present at site $a$) for data source $r$, which is a function of site, replicate, and data source specific covariates $\bm{V}_r$ and a vector of regression coefficients specific to each data source ($\bm{\alpha}_r$). Note that $z_{j[a]}$ is the true occurrence status at site $j$ corresponding to the $a$th data source site in the given data set $r$. Each data source may be available at all $J$ sites in the region of interest or at a subset of the $J$ sites. Additionally, data sources can overlap in the sites they sample, or they can be obtained at distinct sites within all $J$ sites of interest in the overall region.
 
-We assume multivariate normal priors for the occurrence ($\bm{\beta}$) and data-set specific detection ($\bm{\alpha}$) regression coefficients to complete the Bayesian specification of a single-species occupancy model. P&oacute;lya-Gamma data augmentation is implemented in an analgous manner to that of the previous models to yield an efficient implementation of integrated occupancy models.
+We assume multivariate normal priors for the occurrence ($\bm{\beta}$) and data-set specific detection ($\bm{\alpha}$) regression coefficients to complete the Bayesian specification of a single-species occupancy model. P&oacute;lya-Gamma data augmentation is implemented in an analogous manner to that of the previous models to yield an efficient implementation of integrated occupancy models.
 
 ## Example data sources: Ovenbird occurrence in the White Mountain National Forest
 
-To illustrate an integrated occupancy model, we will use two data sets that come from the White Mountain National Forest (WMNF) in New Hampshire and Maine, USA. Our goal is to model the occurrence of OVEN in the WMNF in 2015. Our first data source is the HBEF data set we have used to illustrate all single data source models. Our second data source comes from the National Ecological Observatory Network (NEON) at Bartlett Experimental Forest [@barnett2019terrestrial; @neonData]. The Barlett Forest and HBEF both lie within the larger WMNF. Suppose we are interested in OVEN occurrence across the entire WMNF. By leveraging both data sources in a single integrated model, we will expand the range of covariates across which we can make reliable predictions, and may obtain results that are more indicative across the entire region of interest and not just a single data source location [@doser2021ICOM]. In this particular case, there is no overlap between the two data sources (i.e., Bartlett Forest and HBEF do not overlap spatially). However, the integrated occupancy models fit by `spOccupancy` can integrate data sources with no overlap, partial overlap, or complete overlap. 
+To illustrate an integrated occupancy model, we will use two data sets that come from the White Mountain National Forest (WMNF) in New Hampshire and Maine, USA. Our goal is to model the occurrence of OVEN in the WMNF in 2015. Our first data source is the HBEF data set we have used to illustrate all single data source models. Our second data source comes from the National Ecological Observatory Network (NEON) at Bartlett Experimental Forest [@barnett2019terrestrial; @neonData]. The Bartlett Forest and HBEF both lie within the larger WMNF. Suppose we are interested in OVEN occurrence across the entire WMNF. By leveraging both data sources in a single integrated model, we will expand the range of covariates across which we can make reliable predictions, and may obtain results that are more indicative across the entire region of interest and not just a single data source location [@doser2021ICOM]. In this particular case, there is no overlap between the two data sources (i.e., Bartlett Forest and HBEF do not overlap spatially). However, the integrated occupancy models fit by `spOccupancy` can integrate data sources with no overlap, partial overlap, or complete overlap. 
 
 The NEON data are provided along with `spOccupancy` in the `neon2015` list. We load the NEON data along with the HBEF data below
 
@@ -1036,7 +1048,7 @@ The `summary` function for integrated models returns the detection parameters se
 
 ## Posterior predictive checks
 
-We perform posterior predictive checks using `ppcOcc()` as before. GoF assessment for integrated models is an active area of research and no consensus has so far appeared on the best approach. In `spOccupancy`, we compute posterior predictive checks separately for each dataset in the integrated model. 
+We perform posterior predictive checks using `ppcOcc()` as before. GoF assessment for integrated models is an active area of research and no consensus has so far appeared on the best approach. In `spOccupancy`, we compute posterior predictive checks separately for each data set in the integrated model. 
 
 ```{r}
 ppc.int.out <- ppcOcc(out.int, 'freeman-tukey', group = 2)
@@ -1133,7 +1145,7 @@ X.0 <- cbind(1, elev.pred, elev.pred^2)
 out.int.pred <- predict(out.int, X.0)
 ```
 
-```{r, warning = FALSE, message = FALSE}
+```{r, warning = FALSE, message = FALSE, fig.width = 7, fig.align = 'center', units = 'in'}
 # Producing an SDM for HBEF alone (posterior mean)
 plot.dat <- data.frame(x = hbefElev$Easting, 
                        y = hbefElev$Northing, 
@@ -1239,7 +1251,7 @@ We see the occurrence and detection parameters have converged. We should run the
 
 ## Posterior predictive checks
 
-Below we perform a poseterior predictive check for each of the data sets included in the occupancy model using `ppcOcc()`. 
+Below we perform a posterior predictive check for each of the data sets included in the occupancy model using `ppcOcc()`. 
 
 ```{r}
 ppc.sp.int.out <- ppcOcc(out.sp.int, 'freeman-tukey', group = 2)
diff --git a/vignettes/randomEffects.Rmd b/vignettes/randomEffects.Rmd
index aeec1aa..59303ce 100644
--- a/vignettes/randomEffects.Rmd
+++ b/vignettes/randomEffects.Rmd
@@ -25,19 +25,18 @@ knitr::opts_chunk$set(
 
 # Introduction
 
-This vignette details how to include random intercepts when fitting single-species or multi-species occupancy models in `spOccupancy`. For an introduction to the basic `spOccupancy` functionality, see the [introductory vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting). As of v0.3.0, `spOccupancy` supports random intercepts in the occurrence and detection portions of all single-species and multi-species occupancy models. That is, random intercepts are supported in `PGOcc`, `spPGOcc`, `msPGOcc`, `spMsPGOcc`, `lfMsPGOcc`, `sfMsPGOcc`, `lfJSDM`, and `sfJSDM`. Future updates will allow for random intercepts in integrated occupancy models (approx. summer 2022) as well as random slopes in all models (approx. fall 2022). Here I show how to simulate data for a spatially explicit single-species occupancy model with random intercepts in occurrence and detection using `simOcc`, and subsequently show how to include random intercepts using `lme4` syntax [@bates2015] with `spPGOcc`. Random intercepts are included in all other single-species and multi-species models in an analogous manner. 
+This vignette details how to include random intercepts when fitting single-species or multi-species occupancy models in `spOccupancy`. For an introduction to the basic `spOccupancy` functionality, see the [introductory vignette](https://www.jeffdoser.com/files/spoccupancy-web/articles/modelfitting). As of v0.3.0, `spOccupancy` supports random intercepts in the occurrence and detection portions of all single-species and multi-species occupancy models. That is, random intercepts are supported in `PGOcc()`, `spPGOcc()`, `msPGOcc()`, `spMsPGOcc()`, `lfJSDM()`, `sfJSDM()`, `lfMsPGOcc()`, and `sfMsPGOcc()`. Future updates will allow for random intercepts in integrated occupancy models (approx. summer 2022) as well as random slopes in all models (approx. early winter 2022). Here I show how to simulate data for a spatially explicit single-species occupancy model with random intercepts in occurrence and detection using `simOcc()`, and subsequently show how to include random intercepts using `lme4` syntax [@bates2015] with `spPGOcc()`. Random intercepts are included in all other single-species and multi-species models in an analogous manner. 
 
-# Simulating data with `simOcc`
+# Simulating data with `simOcc()`
 
-The function `simOcc` simulates single-species detection-nondetection data. `simOcc` has the following arguments. 
+The function `simOcc()` simulates single-species detection-nondetection data. `simOcc()` has the following arguments. 
 
 ```{r, eval = FALSE}
-simOcc(J.x, J.y, n.rep, beta, alpha, psi.RE = list(), 
-       p.RE = list(), sp = FALSE, cov.model, sigma.sq, 
-       phi, nu, ...)
+simOcc(J.x, J.y, n.rep, beta, alpha, psi.RE = list(), p.RE = list(), 
+       sp = FALSE, cov.model, sigma.sq, phi, nu, ...)
 ```
 
-`J.x` and `J.y` indicate the number of spatial locations to simulate data along a horizontal and vertical axis, respectively, such that `J.x * J.y` is the total number of sites (i.e., `J`). `n.rep` is a numeric vector of length `J` that indicates the number of replicates at each of the J sites. `beta` and `alpha` are numeric vectors containing the intercept and any regression coefficient parameters for the occurrence and detection portions of the occupancy model, respectively. `psi.RE` and `p.RE` are lists that are used to specify random intercepts on occurrence and detection, respectively. These are only specified when we want to simulate data with random intercepts. Each list should be comprised of two tags: `levels`, a vector that specifies the number of levels for each random effect included in the model, and `sigma.sq.psi` or `sigma.sq.p`, which specify the variances of the random effects for each random effect included in the model. `sp` is a logical value indicating whether to simulate data with a spatial Gaussian process. `cov.model` specifies the covariance function used to model the spatial dependence structure, with supported values of `exponential`, `matern`, `spherical`, and `gaussian`. Finally, `sigma.sq` is the spatial variance parameter, `phi` is the spatial range parameter, and `nu` is the spatial smoothness parameter (only applicable when `cov.model = 'matern'`. Below we simulate data across 225 sites with 1-4 replicates at a given site, a single covariate effect on occurrence, a single covariate effect on detection, spatial autocorrelation following a spherical correlation function, and a random intercept on occurrence and detection. 
+`J.x` and `J.y` indicate the number of spatial locations to simulate data along a horizontal and vertical axis, respectively, such that `J.x * J.y` is the total number of sites (i.e., `J`). `n.rep` is a numeric vector of length `J` that indicates the number of replicates at each of the J sites. `beta` and `alpha` are numeric vectors containing the intercept and any regression coefficient parameters for the occurrence and detection portions of the occupancy model, respectively. `psi.RE` and `p.RE` are lists that are used to specify random intercepts on occurrence and detection, respectively. These are only specified when we want to simulate data with random intercepts. Each list should be comprised of two tags: `levels`, a vector that specifies the number of levels for each random effect included in the model, and `sigma.sq.psi` or `sigma.sq.p`, which specify the variances of the random effects for each random effect included in the model. `sp` is a logical value indicating whether to simulate data with a spatial Gaussian process. `cov.model` specifies the covariance function used to model the spatial dependence structure, with supported values of `exponential`, `matern`, `spherical`, and `gaussian`. Finally, `sigma.sq` is the spatial variance parameter, `phi` is the spatial range parameter, and `nu` is the spatial smoothness parameter (only applicable when `cov.model = 'matern'`). Below we simulate data across 225 sites with 1-4 replicates at a given site, a single covariate effect on occurrence, a single covariate effect on detection, spatial autocorrelation following a spherical correlation function, and a random intercept on occurrence and detection. 
 
 ```{r}
 library(spOccupancy)
@@ -52,9 +51,9 @@ n.rep <- sample(1:4, J, replace = TRUE)
 beta <- c(0, 0.7)
 # Intercept and covariate effect on detection
 alpha <- c(-0.5, 0.2)
-# Single random intercept on occurrence.
+# Single random intercept on occurrence
 psi.RE <- list(levels = 10, sigma.sq.psi = 2.5)
-# Single random intercept on detection. 
+# Single random intercept on detection 
 p.RE <- list(levels = 25, sigma.sq.p = 1.5)
 # Spatial range
 phi <- 3 / .7
@@ -62,9 +61,9 @@ phi <- 3 / .7
 sigma.sq <- 2
 # Simulate the data
 dat <- simOcc(J.x = J.x, J.y = J.y, n.rep = n.rep, beta = beta, 
-	      alpha = alpha, psi.RE = psi.RE, p.RE = p.RE, 
-	      sp = TRUE, sigma.sq = sigma.sq, phi = phi, 
-	      cov.model = 'spherical')
+              alpha = alpha, psi.RE = psi.RE, p.RE = p.RE, 
+              sp = TRUE, sigma.sq = sigma.sq, phi = phi, 
+              cov.model = 'spherical')
 ```
 
 For the occurrence random effect, we assumed there were 10 levels and a variance of 2.5. For example, we could suppose these levels corresponded to different administrative units across the 225 sites we simulated, and we want to account for potential correlation between sites within each of the units. For the detection random effect, we assumed there were 25 levels and a variance of 1.5. For example, we could suppose there were 25 different observers that collected the data, and we wanted to account for variation in observer skill (and thus detection probability) across the different observers.
@@ -77,7 +76,7 @@ str(dat)
 
 The simulated data object consists of the following objects: `X` (the occurrence design matrix), `X.p` (the detection design matrix), `coords` (the spatial coordinates of each site), `w` (the latent spatial process, `psi` (occurrence probability), `z` (the latent occupancy status), `y` (the detection-nondetection data, `X.p.re` (the detection random effect levels for each site), `X.re` (the occurrence random effect levels for each site), `alpha.star` (the detection random effects for each level of the random effect), `beta.star` (the occurrence random effects for each level of the random effect). 
 
-```{r}
+```{r, fig.width = 5, fig.height = 5, fig.align = 'center', units = 'in'}
 # Detection-nondetection data
 y <- dat$y
 # Occurrence design matrix for fixed effects
@@ -122,26 +121,26 @@ head(occ.covs)
 
 One important thing to note about including random effects in `spOccupancy` is that we must supply the random effects in as numeric values. Notice in the `occ.factor.1` column in `occ.covs`, the random effect levels are specified as a numeric value, which indicates the specific level of the random effect at that given site. `spOccupancy` will return an informative error if we supply random effects as factors or character vectors and will tell us to convert them to numeric in order to fit the model.  
 
-# Fit the model using `spPGOcc`
+# Fit the model using `spPGOcc()`
 
-We now fit the model using `spPGOcc`. Random effects are included in the model formulas using standard `lme4` notation. Below we run a spatially-explicit occupancy model for 400 batches each of length 25 (10000 total MCMC iterations) using a spherical correlation function. We use the default intial values, priors, and tuning values that `spOccupancy` provides. We use a Nearest Neighbor Gaussian Process with 5 neighbors. 
+We now fit the model using `spPGOcc()`. Random effects are included in the model formulas using standard `lme4` notation. Below we run a spatially-explicit occupancy model for 400 batches each of length 25 (10000 total MCMC iterations) using a spherical correlation function. We use the default intial values, priors, and tuning values that `spOccupancy` provides. We use a Nearest Neighbor Gaussian Process with 5 neighbors and the spherical spatial correlation function. 
 
 ```{r}
 n.batch <- 400
 batch.length <- 25
 out.full <- spPGOcc(occ.formula = ~ occ.cov.1 + (1 | occ.factor.1), 
-	            det.formula = ~ det.cov.1 + (1 | det.factor.1), 
-	            data = data.list, 
-	            n.batch = n.batch, 
-	            batch.length = batch.length, 
-	            accept.rate = 0.43, 
-	            cov.model = "spherical", 
-	            NNGP = TRUE, 
-	            n.neighbors = 5,
-	            n.report = 100, 
-	            n.burn = 2000, 
-	            n.thin = 4, 
-	            n.chains = 2) 
+                    det.formula = ~ det.cov.1 + (1 | det.factor.1), 
+                    data = data.list, 
+                    n.batch = n.batch, 
+                    batch.length = batch.length, 
+                    accept.rate = 0.43, 
+                    cov.model = "spherical", 
+                    NNGP = TRUE, 
+                    n.neighbors = 5,
+                    n.report = 100, 
+                    n.burn = 2000, 
+                    n.thin = 4, 
+                    n.chains = 2) 
 summary(out.full)
 ```
 
@@ -149,25 +148,25 @@ The summary of the model output shows our model recovers the values we used to s
 
 ```{r}
 out.no.re <- spPGOcc(occ.formula = ~ occ.cov.1, 
-	             det.formula = ~ det.cov.1, 
-	             data = data.list, 
-	             n.batch = n.batch, 
-	             batch.length = batch.length, 
-	             accept.rate = 0.43, 
-	             cov.model = "spherical", 
-	             NNGP = TRUE, 
-	             n.neighbors = 5,
-	             n.report = 100, 
-	             n.burn = 2000, 
-	             n.thin = 2, 
-	             n.chains = 2) 
+                     det.formula = ~ det.cov.1, 
+                     data = data.list, 
+                     n.batch = n.batch, 
+                     batch.length = batch.length, 
+                     accept.rate = 0.43, 
+                     cov.model = "spherical", 
+                     NNGP = TRUE, 
+                     n.neighbors = 5,
+                     n.report = 100, 
+                     n.burn = 2000, 
+                     n.thin = 2, 
+                     n.chains = 2) 
 waicOcc(out.full)
 waicOcc(out.no.re)
 ```
 
-We see the WAIC is substantially smaller for the model that includes the occurrence and detection random effects. Finally, let's look at the predicted occurrence values from the model to see how they compare to the values we used to simulate the data. 
+We see the WAIC is substantially smaller for the model that includes the occurrence and detection random effects. Finally, let's look at the predicted occurrence values from the full model to see how they compare to the values we used to simulate the data. 
 
-```{r}
+```{r, fig.width = 7, fig.height = 5, fig.align = 'center', units = 'in'}
 par(mfrow = c(1, 2))
 plot(coords, type = "n", xlab = "", ylab = "", asp = TRUE, main = "Simulated Occurrence", 
      bty = 'n')
@@ -179,7 +178,35 @@ plot(coords, type = "n", xlab = "", ylab = "", asp = TRUE, main = "Predicted Occ
 points(coords, pch=15, cex = 2.1, col = rgb(0,0,0,(psi.means-min(psi.means))/diff(range(psi.means))))
 ```
 
-TODO: need to add in something about prediction and how that is slightly complicated with random effects. 
+Just like with models with only fixed effects, we can predict new values of occurrence or detection probability given a set of covariate values and spatial coordinates using the `predict()` function. However, prediction becomes a bit more complicated when we have non-structured random effects in the model, as we could imagine predicting at observed levels of the random effect, or predicting at new levels of the random effect. `spOccupancy` allows for prediction at both observed levels and new levels of random effects, and also allows for prediction to take into account the random effects, or simply ignore the random effects when making predictions and just generate predictions using the fixed effects. All `predict()` functions for `spOccupancy` objects contain the argument `ignore.RE`, which is a logical value that takes value `TRUE` or `FALSE`. When `ignore.RE = FALSE` (the default), both sampled levels and non-sampled levels of random effects are supported for prediction. For sampled levels, the posterior distribution for the random intercept corresponding to that level of the random effect will be used in the prediction, which will likely result in a more accurate estimate of occurrence/detection probability for that site. For non-sampled levels, random values are drawn from a normal distribution using the posterior samples of the random effect variance, which results in fully propagated uncertainty in predictions with models that incorporate random effects. Alternatively, if `ignore.RE = TRUE`, the random effects will not be used for prediction and predictions will simply be generated using the fixed effects in the model. 
+
+Suppose we wish to predict occurrence at a new site, which has spatial coordinates `(0.32, 0.55)`, a value of `0.34` for our occurrence covariate (`occ.cov.1`) and a random effect level of 5 (which is a level that is sampled in our original data set). Below we predict occurrence at this new site, using the random effect in the prediction by setting `ignore.RE = FALSE` (which we could just not specify since it is the default). Note that when predicting using random effects, the column name of the random effect must match the name of the random effect used when fitting the model. 
+
+```{r}
+# Create the design matrix for the new site
+X.0 <- cbind(1, 0.34, 5)
+# Make sure column names of random effects align with how we fit the model. 
+colnames(X.0) <- c('intercept', 'occ.cov.1', 'occ.factor.1')
+# Coordinates of new cite
+coords.0 <- cbind(0.32, 0.55)
+# Predict at new site
+pred.out <- predict(out.full, X.0, coords.0, ignore.RE = FALSE, verbose = FALSE)
+str(pred.out)
+# Get summary of occurrence probability at new site
+summary(pred.out$psi.0.samples)
+```
+
+Alternatively, we can just predict occurrence probability at the new site using the fixed effects only by setting `ignore.RE = TRUE`. 
+
+```{r}
+# Remove random effect from design matrix
+X.0 <- X.0[, -3, drop = FALSE] 
+pred.no.re <- predict(out.full, X.0, coords.0, ignore.RE = TRUE, verbose = FALSE)
+# Get summary of occurrence probability
+summary(pred.no.re$psi.0.samples)
+```
+
+Notice that here we see a large discrepancy between the predicted occurrence probability when we use the random effect and when we don't use the random effect. Predicting with random effects will allow you to fully propagate uncertainty in our predictions, but may not be desired if predicting at new locations where the level of the random effect is unknown or not sampled in the data. 
 
 # References {-}
 
diff --git a/vignettes/referencesJSDM.bib b/vignettes/referencesJSDM.bib
new file mode 100644
index 0000000..ad991e2
--- /dev/null
+++ b/vignettes/referencesJSDM.bib
@@ -0,0 +1,1392 @@
+
+@article{finley2020svc,
+    author = {Andrew O Finley and Sudipto Banerjee},
+	doi = {https://doi.org/10.1016/j.envsoft.2019.104608},
+	issn = {1364-8152},
+	journal = {Environmental Modelling \& Software},
+	keywords = {MCMC, Multivariate Gaussian process, Kriging, R},
+	pages = {104608},
+	title = {{Bayesian spatially varying coefficient models in the spBayes R package}},
+	url = {https://www.sciencedirect.com/science/article/pii/S1364815219310412},
+	volume = {125},
+	year = {2020},
+	Bdsk-Url-1 = {https://www.sciencedirect.com/science/article/pii/S1364815219310412},
+	Bdsk-Url-2 = {https://doi.org/10.1016/j.envsoft.2019.104608}
+	}
+
+
+@article{gelfand2004,
+	author = {Gelfand, Alan E. and Schmidt, Alexandra M. and Banerjee, Sudipto and Sirmans, C. F.},
+	da = {2004/12/01},
+	date-added = {2021-11-07 13:08:32 -0500},
+	date-modified = {2021-11-07 13:08:32 -0500},
+	doi = {10.1007/BF02595775},
+	id = {Gelfand2004},
+	isbn = {1863-8260},
+	journal = {Test},
+	number = {2},
+	pages = {263--312},
+	title = {Nonstationary multivariate process modeling through spatially varying coregionalization},
+	ty = {JOUR},
+	url = {https://doi.org/10.1007/BF02595775},
+	volume = {13},
+	year = {2004},
+	Bdsk-Url-1 = {https://doi.org/10.1007/BF02595775}}
+
+
+@article{carpenter2017,
+title={Stan: A probabilistic programming language},
+author={Carpenter, Bob and Gelman, Andrew and Hoffman, Matthew D and Lee, Daniel and Goodrich, Ben and Betancourt, Michael and Brubaker, Marcus and Guo, Jiqiang and Li, Peter and Riddell, Allen},
+journal={Journal of Statistical Software},
+volume={76},
+number={1},
+year={2017},
+publisher={Columbia Univ., New York, NY (United States); Harvard Univ., Cambridge, MA (United States)}
+}
+
+@article{banerjee2012,
+	author = {Banerjee, Sudipto and Fuentes, Montserrat},
+	doi = {https://doi.org/10.1002/wics.187},
+	eprint = {https://wires.onlinelibrary.wiley.com/doi/pdf/10.1002/wics.187},
+	journal = {WIREs Computational Statistics},
+	keywords = {Bayesian spatial statistics, Gaussian predictive process},
+	number = {1},
+	pages = {59-66},
+	title = {Bayesian modeling for large spatial datasets},
+	url = {https://wires.onlinelibrary.wiley.com/doi/abs/10.1002/wics.187},
+	volume = {4},
+	year = {2012},
+	Bdsk-Url-1 = {https://wires.onlinelibrary.wiley.com/doi/abs/10.1002/wics.187},
+	Bdsk-Url-2 = {https://doi.org/10.1002/wics.187}}
+
+  
+  @Article{deValpine2017,
+    title = {Programming with models: writing statistical algorithms
+      for general model structures with {NIMBLE}},
+    journal = {Journal of Computational and Graphical Statistics},
+    volume = {26},
+    issue = {2},
+    pages = {403-413},
+    year = {2017},
+    author = {Perry {de Valpine} and Daniel Turek and Christopher
+      Paciorek and Cliff Anderson-Bergman and Duncan {Temple Lang} and
+      Ras Bodik},
+    doi = {10.1080/10618600.2016.1172487},
+  }
+
+@MISC{plummer03,
+    author = {Martyn Plummer},
+    title = {{JAGS: A program for analysis of Bayesian graphical models using Gibbs sampling}},
+    year = {2003}
+}
+
+@article{mackenzie2002,
+  title={Estimating site occupancy rates when detection probabilities are less than one},
+  author={MacKenzie, Darryl I and Nichols, James D and Lachman, Gideon B and Droege, Sam and Royle, J Andrew and Langtimm, Catherine A},
+  journal={Ecology},
+  volume={83},
+  number={8},
+  pages={2248--2255},
+  year={2002},
+  publisher={Wiley Online Library}
+}
+
+@article{dorazio2005,
+  title={Estimating size and composition of biological communities by modeling the occurrence of species},
+  author={Dorazio, Robert M and Royle, J Andrew},
+  journal={Journal of the American Statistical Association},
+  volume={100},
+  number={470},
+  pages={389--398},
+  year={2005},
+  publisher={Taylor \& Francis}
+}
+
+@article{guelat2018,
+  title={Effects of spatial autocorrelation and imperfect detection on species distribution models},
+  author={Gu{\'e}lat, J{\'e}r{\^o}me and K{\'e}ry, Marc},
+  journal={Methods in Ecology and Evolution},
+  volume={9},
+  number={6},
+  pages={1614--1625},
+  year={2018},
+  publisher={Wiley Online Library}
+}
+
+@article{fiske2011,
+  title={{Unmarked: an R package for fitting hierarchical models of wildlife occurrence and abundance}},
+  author={Fiske, Ian and Chandler, Richard},
+  journal={Journal of Statistical Software},
+  volume={43},
+  number={10},
+  pages={1--23},
+  year={2011}
+}
+
+@article{clark2019,
+  title={{Efficient Bayesian analysis of occupancy models with logit link functions}},
+  author={Clark, Allan E and Altwegg, Res},
+  journal={Ecology and Evolution},
+  volume={9},
+  number={2},
+  pages={756--768},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{polson2013,
+  title={{Bayesian inference for logistic models using P{\'o}lya--Gamma latent variables}},
+  author={Polson, Nicholas G and Scott, James G and Windle, Jesse},
+  journal={Journal of the American Statistical Association},
+  volume={108},
+  number={504},
+  pages={1339--1349},
+  year={2013},
+  publisher={Taylor \& Francis}
+}
+
+@article{miller2019recent,
+  title={The recent past and promising future for data integration methods to estimate species’ distributions},
+  author={Miller, David AW and Pacifici, Krishna and Sanderlin, Jamie S and Reich, Brian J},
+  journal={Methods in Ecology and Evolution},
+  volume={10},
+  number={1},
+  pages={22--37},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{zipkin2021addressing,
+  title={Addressing data integration challenges to link ecological processes across scales},
+  author={Zipkin, Elise F and Zylstra, Erin R and Wright, Alexander D and Saunders, Sarah P and Finley, Andrew O and Dietze, Michael C and Itter, Malcolm S and Tingley, Morgan W},
+  journal={Frontiers in Ecology and the Environment},
+  volume={19},
+  number={1},
+  pages={30--38},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{beissinger2016incorporating,
+  title={Incorporating imperfect detection into joint models of communities: A response to Warton et al.},
+  author={Beissinger, Steven R and Iknayan, Kelly J and Guillera-Arroita, Gurutzeta and Zipkin, Elise F and Dorazio, Robert M and Royle, J Andrew and K{\'e}ry, Marc},
+  journal={Trends in ecology \& evolution},
+  volume={31},
+  number={10},
+  pages={736--737},
+  year={2016}
+}
+
+@article{heaton2019case,
+  title={A case study competition among methods for analyzing large spatial data},
+  author={Heaton, Matthew J and Datta, Abhirup and Finley, Andrew O and Furrer, Reinhard and Guinness, Joseph and Guhaniyogi, Rajarshi and Gerber, Florian and Gramacy, Robert B and Hammerling, Dorit and Katzfuss, Matthias and others},
+  journal={Journal of Agricultural, Biological and Environmental Statistics},
+  volume={24},
+  number={3},
+  pages={398--425},
+  year={2019},
+  publisher={Springer}
+}
+
+@article{wright2021spatial,
+  title={{Spatial Gaussian processes improve multi-species occupancy models when range boundaries are uncertain and nonoverlapping}},
+  author={Wright, Wilson J and Irvine, Kathryn M and Rodhouse, Thomas J and Litt, Andrea R},
+  journal={Ecology and Evolution},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{finley2020spnngp,
+  title={{spNNGP R package for nearest neighbor Gaussian process models}},
+  author={Finley, Andrew O and Datta, Abhirup and Banerjee, Sudipto},
+  journal={arXiv preprint arXiv:2001.09111},
+  year={2020}
+}
+
+@article{datta2016hierarchical,
+  title={{Hierarchical nearest-neighbor Gaussian process models for large geostatistical datasets}},
+  author={Datta, Abhirup and Banerjee, Sudipto and Finley, Andrew O and Gelfand, Alan E},
+  journal={Journal of the American Statistical Association},
+  volume={111},
+  number={514},
+  pages={800--812},
+  year={2016},
+  publisher={Taylor \& Francis}
+}
+
+@article{finley2019efficient,
+  title={{Efficient algorithms for Bayesian nearest neighbor Gaussian processes}},
+  author={Finley, Andrew O and Datta, Abhirup and Cook, Bruce D and Morton, Douglas C and Andersen, Hans E and Banerjee, Sudipto},
+  journal={Journal of Computational and Graphical Statistics},
+  volume={28},
+  number={2},
+  pages={401--414},
+  year={2019},
+  publisher={Taylor \& Francis}
+}
+
+@article{dagum1998openmp,
+  title={OpenMP: an industry standard API for shared-memory programming},
+  author={Dagum, Leonardo and Menon, Ramesh},
+  journal={IEEE Computational Science and Engineering},
+  volume={5},
+  number={1},
+  pages={46--55},
+  year={1998},
+  publisher={IEEE}
+}
+
+@article{finley2007spbayes,
+  title={{spBayes: an R package for univariate and multivariate hierarchical point-referenced spatial models}},
+  author={Finley, Andrew O and Banerjee, Sudipto and Carlin, Bradley P},
+  journal={Journal of Statistical Software},
+  volume={19},
+  number={4},
+  pages={1},
+  year={2007},
+  publisher={NIH Public Access}
+}
+
+@article{dewan2010integrated,
+  title={An integrated sampling and analysis approach for improved biodiversity monitoring},
+  author={DeWan, Amielle A and Zipkin, Elise F},
+  journal={Environmental Management},
+  volume={45},
+  number={5},
+  pages={1223--1230},
+  year={2010},
+  publisher={Springer}
+}
+
+@article{zipkin2010multi,
+  title={Multi-species occurrence models to evaluate the effects of conservation and management actions},
+  author={Zipkin, Elise F and Royle, J Andrew and Dawson, Deanna K and Bates, Scott},
+  journal={Biological Conservation},
+  volume={143},
+  number={2},
+  pages={479--484},
+  year={2010},
+  publisher={Elsevier}
+}
+
+@article{devarajan2020multi,
+  title={Multi-species occupancy models: review, roadmap, and recommendations},
+  author={Devarajan, Kadambari and Morelli, Toni Lyn and Tenan, Simone},
+  journal={Ecography},
+  volume={43},
+  number={11},
+  pages={1612--1624},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{johnson2013spatial,
+  title={Spatial occupancy models for large data sets},
+  author={Johnson, Devin S and Conn, Paul B and Hooten, Mevin B and Ray, Justina C and Pond, Bruce A},
+  journal={Ecology},
+  volume={94},
+  number={4},
+  pages={801--808},
+  year={2013},
+  publisher={Wiley Online Library}
+}
+
+@Manual{hSDM,
+  title = {hSDM: Hierarchical Bayesian Species Distribution Models},
+  author = {Ghislain Vieilledent},
+  year = {2019},
+  note = {R package version 1.4.1},
+  url = {https://CRAN.R-project.org/package=hSDM},
+}
+
+@Manual{ubms,
+  title = {ubms: Bayesian Models for Data from Unmarked Animals using 'Stan'},
+  author = {Ken Kellner},
+  year = {2021},
+  note = {R package version 1.0.2},
+  url = {https://CRAN.R-project.org/package=ubms},
+}
+
+@article{tikhonov2020joint,
+  title={{Joint species distribution modelling with the r-package Hmsc}},
+  author={Tikhonov, Gleb and Opedal, {\O}ystein H and Abrego, Nerea and Lehikoinen, Aleksi and de Jonge, Melinda MJ and Oksanen, Jari and Ovaskainen, Otso},
+  journal={Methods in Ecology and Evolution},
+  volume={11},
+  number={3},
+  pages={442--447},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@book{banerjee2003,
+  title={Hierarchical modeling and analysis for spatial data},
+  author={Banerjee, Sudipto and Carlin, Bradley P and Gelfand, Alan E},
+  year={2003},
+  publisher={Chapman and Hall/CRC}
+}
+
+@Book{ggplot2,
+  author = {Hadley Wickham},
+  title = {ggplot2: Elegant Graphics for Data Analysis},
+  publisher = {Springer-Verlag New York},
+  year = {2016},
+  isbn = {978-3-319-24277-4},
+  url = {https://ggplot2.tidyverse.org},
+}
+
+@Article{coda,
+    title = {{CODA: Convergence Diagnosis and Output Analysis for MCMC}},
+    author = {Martyn Plummer and Nicky Best and Kate Cowles and Karen Vines},
+    journal = {R News},
+    year = {2006},
+    volume = {6},
+    number = {1},
+    pages = {7--11},
+    url = {https://journal.r-project.org/archive/},
+    pdf = {https://www.r-project.org/doc/Rnews/Rnews_2006-1.pdf},
+}
+
+@article{bates2015,
+  title = {Fitting Linear Mixed-Effects Models Using {lme4}},
+  author = {Douglas Bates and Martin M{\"a}chler and Ben Bolker and Steve Walker},
+  journal = {Journal of Statistical Software},
+  year = {2015},
+  volume = {67},
+  number = {1},
+  pages = {1--48},
+  doi = {10.18637/jss.v067.i01},
+}
+
+@article{mccullagh2019,
+  title={Generalized linear models},
+  author={McCullagh, Peter},
+  year={2019},
+  publisher={Routledge}
+}
+
+@article{watanabe2010,
+  title={Asymptotic equivalence of Bayes cross validation and widely applicable information criterion in singular learning theory.},
+  author={Watanabe, Sumio},
+  journal={Journal of Machine Learning Research},
+  volume={11},
+  number={12},
+  year={2010}
+}
+
+@article{finley2011comparing,
+  title={Comparing spatially-varying coefficients models for analysis of ecological data with non-stationary and anisotropic residual dependence},
+  author={Finley, Andrew O},
+  journal={Methods in Ecology and Evolution},
+  volume={2},
+  number={2},
+  pages={143--154},
+  year={2011},
+  publisher={Wiley Online Library}
+}
+
+@article{tobler2019joint,
+  title={Joint species distribution models with species correlations and imperfect detection},
+  author={Tobler, Mathias W and K{\'e}ry, Marc and Hui, Francis KC and Guillera-Arroita, Gurutzeta and Knaus, Peter and Sattler, Thomas},
+  journal={Ecology},
+  volume={100},
+  number={8},
+  pages={e02754},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{mackenzie2003estimating,
+  title={Estimating site occupancy, colonization, and local extinction when a species is detected imperfectly},
+  author={MacKenzie, Darryl I and Nichols, James D and Hines, James E and Knutson, Melinda G and Franklin, Alan B},
+  journal={Ecology},
+  volume={84},
+  number={8},
+  pages={2200--2207},
+  year={2003},
+  publisher={Wiley Online Library}
+}
+
+@article{doser2022integrated,
+  title={Integrated community occupancy models: A framework to assess occurrence and biodiversity dynamics using multiple data sources},
+  author={Doser, Jeffrey W and Leuenberger, Wendy and Sillett, T Scott and Hallworth, Michael T and Zipkin, Elise F},
+  journal={Methods in Ecology and Evolution},
+  year={2022}
+}
+
+@article{lauret2021Ecology,
+author = {Lauret, Valentin and Labach, Hélène and Authier, Matthieu and Gimenez, Olivier},
+title = {Using single visits into integrated occupancy models to make the most of existing monitoring programs},
+year = {2021},
+journal = {Ecology},
+pages = {e03535},
+keywords = {bottlenose dolphin, data integration, ecological monitoring, integrated species distribution models, occupancy models, single-visit models},
+doi = {https://doi.org/10.1002/ecy.3535},
+url = {https://esajournals.onlinelibrary.wiley.com/doi/abs/10.1002/ecy.3535},
+eprint = {https://esajournals.onlinelibrary.wiley.com/doi/pdf/10.1002/ecy.3535},
+}
+
+@article{hines2010tigers,
+  title={{Tigers on trails: Occupancy modeling for cluster sampling}},
+  author={Hines, James E and Nichols, James D and Royle, J Andrew and MacKenzie, Darryl I and Gopalaswamy, AM and Kumar, N Samba and Karanth, KU},
+  journal={Ecological Applications},
+  volume={20},
+  number={5},
+  pages={1456--1466},
+  year={2010},
+  publisher={Wiley Online Library}
+}
+
+@article{pardieck2020north,
+  title={North American breeding bird survey dataset 1966--2019},
+  author={Pardieck, K.L. and Ziolkowski Jr, D.J. and Lutmerding, M. and Aponte, V.I. and Hudson, M-A.R.},
+  journal={U.S. Geological Survey data release, https://doi.org/10.5066/P9J6QUF6},
+  year={2020}
+}
+
+@article{brooks1998,
+  title = {General Methods for Monitoring Convergence of Iterative Simulations},
+  author = {Stephen P. Brooks and Andrew Gelman},
+  journal = {Journal of Computational and Graphical Statistics},
+  volume = {7},
+  number = {4},
+  pages = {434-455},
+  year = {1998}
+}
+
+@article{hooten2015guide,
+  title={{A guide to Bayesian model selection for ecologists}},
+  author={Hooten, Mevin B and Hobbs, N Thompson},
+  journal={Ecological Monographs},
+  volume={85},
+  number={1},
+  pages={3--28},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{broms2016model,
+  title={Model selection and assessment for multi-species occupancy models},
+  author={Broms, Kristin M and Hooten, Mevin B and Fitzpatrick, Ryan M},
+  journal={Ecology},
+  volume={97},
+  number={7},
+  pages={1759--1770},
+  year={2016},
+  publisher={Wiley Online Library}
+}
+
+@misc{hbefData,
+  howpublished = {https://doi.org/10.6073/pasta/faca2b2cf2db9d415c39b695cc7fc21},
+  author = {Rodenhouse, Nicholas L and Sillett, T. Scott},
+  language = {en},
+  title = {{Valley-wide Bird Survey, Hubbard Brook Experimental Forest, 1999-2016 (ongoing)}},
+  publisher = {Environmental Data Initiative},
+  year = {2021}, 
+  note = {Accessed: 2021-09-07}
+}
+
+@article{Swanson2013,
+author = {Swanson, Alan K. and Dobrowski, Solomon Z. and Finley, Andrew O. and Thorne, James H. and Schwartz, Michael K.},
+title = {Spatial regression methods capture prediction uncertainty in species distribution model projections through time},
+journal = {Global Ecology and Biogeography},
+volume = {22},
+number = {2},
+pages = {242-251},
+keywords = {California, climate change, conservation planning, GLM, GLMM, historic data, species distribution models, transferability, uncertainty},
+year = {2013}
+}
+
+@article{doser2021integrating,
+  title={Integrating automated acoustic vocalization data and point count surveys for estimation of bird abundance},
+  author={Doser, Jeffrey W and Finley, Andrew O and Weed, Aaron S and Zipkin, Elise F},
+  journal={Methods in Ecology and Evolution},
+  volume={12},
+  number={6},
+  pages={1040--1049},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{zulian2021integrating,
+  title={Integrating citizen-science and planned-survey data improves species distribution estimates},
+  author={Zulian, Viviane and Miller, David AW and Ferraz, Gon{\c{c}}alo},
+  journal={Diversity and Distributions},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{zipkin2009impacts,
+  title={Impacts of forest fragmentation on species richness: a hierarchical approach to community modelling},
+  author={Zipkin, Elise F and DeWan, Amielle and Andrew Royle, J},
+  journal={Journal of Applied Ecology},
+  volume={46},
+  number={4},
+  pages={815--822},
+  year={2009},
+  publisher={Wiley Online Library}
+}
+
+@article{saunders2020community,
+  title={Community science validates climate suitability projections from ecological niche modeling},
+  author={Saunders, Sarah P and Michel, Nicole L and Bateman, Brooke L and Wilsey, Chad B and Dale, Kathy and LeBaron, Geoffrey S and Langham, Gary M},
+  journal={Ecological Applications},
+  volume={30},
+  number={6},
+  pages={e02128},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{sutherland2014demographic,
+  title={A demographic, spatially explicit patch occupancy model of metapopulation dynamics and persistence},
+  author={Sutherland, CS and Elston, DA and Lambin, X},
+  journal={Ecology},
+  volume={95},
+  number={11},
+  pages={3149--3160},
+  year={2014},
+  publisher={Wiley Online Library}
+}
+
+@article{pillay2021using,
+  title={Using interview surveys and multispecies occupancy models to inform vertebrate conservation},
+  author={Pillay, Rajeev and Miller, David AW and Raghunath, R and Joshi, Atul A and Mishra, Charudutt and Johnsingh, AJT and Madhusudan, MD},
+  journal={Conservation Biology},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{kery2004monitoring,
+  title={Monitoring programs need to take into account imperfect species detectability},
+  author={K{\'e}ry, Marc and Schmid, Hans},
+  journal={Basic and applied ecology},
+  volume={5},
+  number={1},
+  pages={65--73},
+  year={2004},
+  publisher={Elsevier}
+}
+
+@article{tyre2003improving,
+  title={Improving precision and reducing bias in biological surveys: estimating false-negative error rates},
+  author={Tyre, Andrew J and Tenhumberg, Brigitte and Field, Scott A and Niejalke, Darren and Parris, Kirsten and Possingham, Hugh P},
+  journal={Ecological Applications},
+  volume={13},
+  number={6},
+  pages={1790--1801},
+  year={2003},
+  publisher={Wiley Online Library}
+}
+
+@article{gelfand2005modelling,
+  title={Modelling species diversity through species level hierarchical modelling},
+  author={Gelfand, Alan E and Schmidt, Alexandra M and Wu, Shanshan and Silander Jr, John A and Latimer, Andrew and Rebelo, Anthony G},
+  journal={Journal of the Royal Statistical Society: Series C (Applied Statistics)},
+  volume={54},
+  number={1},
+  pages={1--20},
+  year={2005},
+  publisher={Wiley Online Library}
+}
+
+@article{dorazio2006estimating,
+  title={Estimating species richness and accumulation by modeling species occurrence and detectability},
+  author={Dorazio, Robert M and Royle, J Andrew and S{\"o}derstr{\"o}m, Bo and Glimsk{\"a}r, Anders},
+  journal={Ecology},
+  volume={87},
+  number={4},
+  pages={842--854},
+  year={2006},
+  publisher={Wiley Online Library}
+}
+
+@article{dorazio2014accounting,
+  title={Accounting for imperfect detection and survey bias in statistical analysis of presence-only data},
+  author={Dorazio, Robert M},
+  journal={Global Ecology and Biogeography},
+  volume={23},
+  number={12},
+  pages={1472--1484},
+  year={2014},
+  publisher={Wiley Online Library}
+}
+
+@book{keryRoyle2021, 
+  author={K{\'e}ry, Marc and Royle, J Andrew},
+  year = {2021}, 
+  title = {Applied hierarchical modeling in ecology: Analysis of distribution, abundance, and species richness in R and BUGS: Volume 2: Dynamic and advanced models.}, 
+  location = {London, UK}, 
+  publisher = {Academic Press}
+}
+
+@article{hepler2021spatiotemporal,
+  title={A spatiotemporal model for multivariate occupancy data},
+  author={Hepler, Staci A and Erhardt, Robert J},
+  journal={Environmetrics},
+  volume={32},
+  number={2},
+  pages={e2657},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{ver2018spatial,
+  title={Spatial autoregressive models for statistical inference from ecological data},
+  author={Ver Hoef, Jay M and Peterson, Erin E and Hooten, Mevin B and Hanks, Ephraim M and Fortin, Marie-Jos{\`e}e},
+  journal={Ecological Monographs},
+  volume={88},
+  number={1},
+  pages={36--59},
+  year={2018},
+  publisher={Wiley Online Library}
+}
+
+@article{hines2006presence,
+  title={PRESENCE 3.1 Software to estimate patch occupancy and related parameters},
+  author={Hines, James E},
+  journal={http://www. mbr-pwrc. usgs. gov/software/presence. html},
+  year={2006},
+  publisher={USGS-PWRC}
+}
+
+@article{white1999program,
+  title={{Program MARK: survival estimation from populations of marked animals}},
+  author={White, Gary C and Burnham, Kenneth P},
+  journal={Bird Study},
+  volume={46},
+  number={sup1},
+  pages={S120--S139},
+  year={1999},
+  publisher={Taylor \& Francis}
+}
+
+@article{hodges2010adding,
+  title={Adding spatially-correlated errors can mess up the fixed effect you love},
+  author={Hodges, James S and Reich, Brian J},
+  journal={The American Statistician},
+  volume={64},
+  number={4},
+  pages={325--334},
+  year={2010},
+  publisher={Taylor \& Francis}
+}
+
+@article{sauer1994observer,
+  title={Observer differences in the North American breeding bird survey},
+  author={Sauer, John R and Peterjohn, Bruce G and Link, William A},
+  journal={The Auk},
+  volume={111},
+  number={1},
+  pages={50--62},
+  year={1994},
+  publisher={Oxford University Press}
+}
+
+@book{kery2016applied,
+  title={Applied Hierarchical Modeling in Ecology: Volume 1: Prelude and Static Models},
+  author={K{\'e}ry, Marc and Royle, J Andrew},
+  year={2016},
+  publisher={Elsevier Science}
+}
+
+@article{pacifici2017integrating,
+  title={Integrating multiple data sources in species distribution modeling: a framework for data fusion},
+  author={Pacifici, Krishna and Reich, Brian J and Miller, David AW and Gardner, Beth and Stauffer, Glenn and Singh, Susheela and McKerrow, Alexa and Collazo, Jaime A},
+  journal={Ecology},
+  volume={98},
+  number={3},
+  pages={840--850},
+  year={2017},
+  publisher={Wiley Online Library}
+}
+
+@article{chen2013imperfect,
+  title={Imperfect detection is the rule rather than the exception in plant distribution studies},
+  author={Chen, Guoke and K{\'e}ry, Marc and Plattner, Matthias and Ma, Keping and Gardner, Beth},
+  journal={Journal of Ecology},
+  volume={101},
+  number={1},
+  pages={183--191},
+  year={2013},
+  publisher={Wiley Online Library}
+}
+
+@article{kellner2014accounting,
+  title={Accounting for imperfect detection in ecology: a quantitative review},
+  author={Kellner, Kenneth F and Swihart, Robert K},
+  journal={PloS one},
+  volume={9},
+  number={10},
+  pages={e111436},
+  year={2014},
+  publisher={Public Library of Science San Francisco, USA}
+}
+
+@article{guisan2000predictive,
+  title={Predictive habitat distribution models in ecology},
+  author={Guisan, Antoine and Zimmermann, Niklaus E},
+  journal={Ecological Modelling},
+  volume={135},
+  number={2-3},
+  pages={147--186},
+  year={2000},
+  publisher={Elsevier}
+}
+
+@article{pulliam2000relationship,
+  title={On the relationship between niche and distribution},
+  author={Pulliam, H Ronald},
+  journal={Ecology Letters},
+  volume={3},
+  number={4},
+  pages={349--361},
+  year={2000},
+  publisher={Wiley Online Library}
+}
+
+@article{elith2009species,
+  title={Species distribution models: Ecological explanation and prediction across space and time},
+  author={Elith, Jane and Leathwick, John R},
+  journal={Annual Review of Ecology, Evolution, and Systematics},
+  volume={40},
+  pages={677--697},
+  year={2009},
+  publisher={Annual Reviews}
+}
+
+@article{araujo2012uses,
+  title={Uses and misuses of bioclimatic envelope modeling},
+  author={Ara{\'u}jo, Miguel B and Peterson, A Townsend},
+  journal={Ecology},
+  volume={93},
+  number={7},
+  pages={1527--1539},
+  year={2012},
+  publisher={Wiley Online Library}
+}
+
+@article{pollock2014understanding,
+  title={{Understanding co-occurrence by modelling species simultaneously with a Joint Species Distribution Model (JSDM)}},
+  author={Pollock, Laura J and Tingley, Reid and Morris, William K and Golding, Nick and O'Hara, Robert B and Parris, Kirsten M and Vesk, Peter A and McCarthy, Michael A},
+  journal={Methods in Ecology and Evolution},
+  volume={5},
+  number={5},
+  pages={397--406},
+  year={2014},
+  publisher={Wiley Online Library}
+}
+
+@article{ovaskainen2010modeling,
+  title={Modeling species co-occurrence by multivariate logistic regression generates new hypotheses on fungal interactions},
+  author={Ovaskainen, Otso and Hottola, Jenni and Siitonen, Juha},
+  journal={Ecology},
+  volume={91},
+  number={9},
+  pages={2514--2521},
+  year={2010},
+  publisher={Wiley Online Library}
+}
+
+@article{lany2020complementary,
+  title={Complementary strengths of spatially-explicit and multi-species distribution models},
+  author={Lany, Nina K and Zarnetske, Phoebe L and Finley, Andrew O and McCullough, Deborah G},
+  journal={Ecography},
+  volume={43},
+  number={3},
+  pages={456--466},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{kays2020empirical,
+  title={An empirical evaluation of camera trap study design: How many, how long and when?},
+  author={Kays, Roland and Arbogast, Brian S and Baker-Whatton, Megan and Beirne, Chris and Boone, Hailey M and Bowler, Mark and Burneo, Santiago F and Cove, Michael V and Ding, Ping and Espinosa, Santiago and others},
+  journal={Methods in Ecology and Evolution},
+  volume={11},
+  number={6},
+  pages={700--713},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{kery2010predicting,
+  title={Predicting species distributions from checklist data using site-occupancy models},
+  author={K{\'e}ry, Marc and Gardner, Beth and Monnerat, Christian},
+  journal={Journal of Biogeography},
+  volume={37},
+  number={10},
+  pages={1851--1862},
+  year={2010},
+  publisher={Wiley Online Library}
+}
+
+@article{sullivan2009ebird,
+  title={{eBird: A citizen-based bird observation network in the biological sciences}},
+  author={Sullivan, Brian L and Wood, Christopher L and Iliff, Marshall J and Bonney, Rick E and Fink, Daniel and Kelling, Steve},
+  journal={Biological Conservation},
+  volume={142},
+  number={10},
+  pages={2282--2292},
+  year={2009},
+  publisher={Elsevier}
+}
+
+@article{datta2016nearest,
+  title={{On nearest-neighbor Gaussian process models for massive spatial data}},
+  author={Datta, Abhirup and Banerjee, Sudipto and Finley, Andrew O and Gelfand, Alan E},
+  journal={Wiley Interdisciplinary Reviews: Computational Statistics},
+  volume={8},
+  number={5},
+  pages={162--171},
+  year={2016},
+  publisher={Wiley Online Library}
+}
+
+@article{royle2007bayesian,
+  title={{A Bayesian state-space formulation of dynamic occupancy models}},
+  author={Royle, J Andrew and K{\'e}ry, Marc},
+  journal={Ecology},
+  volume={88},
+  number={7},
+  pages={1813--1823},
+  year={2007},
+  publisher={Wiley Online Library}
+}
+
+@article{austin2007species,
+  title={{Species distribution models and ecological theory: A critical assessment and some possible new approaches}},
+  author={Austin, Mike},
+  journal={Ecological Modelling},
+  volume={200},
+  number={1-2},
+  pages={1--19},
+  year={2007},
+  publisher={Elsevier}
+}
+
+@article{nichols2007occupancy,
+  title={Occupancy estimation and modeling with multiple states and state uncertainty},
+  author={Nichols, James D and Hines, James E and Mackenzie, Darryl I and Seamans, Mark E and Gutierrez, Ralph J},
+  journal={Ecology},
+  volume={88},
+  number={6},
+  pages={1395--1400},
+  year={2007},
+  publisher={Wiley Online Library}
+}
+
+@article{royle2005general,
+  title={A general class of multinomial mixture models for anuran calling survey data},
+  author={Royle, J Andrew and Link, William A},
+  journal={Ecology},
+  volume={86},
+  number={9},
+  pages={2505--2512},
+  year={2005},
+  publisher={Wiley Online Library}
+}
+
+@article{mackenzie2004investigating,
+  title={Investigating species co-occurrence patterns when species are detected imperfectly},
+  author={MacKenzie, Darryl I and Bailey, Larissa L and Nichols, James D},
+  journal={Journal of Animal Ecology},
+  volume={73},
+  number={3},
+  pages={546--555},
+  year={2004},
+  publisher={Wiley Online Library}
+}
+
+@article{royle2006generalized,
+  title={Generalized site occupancy models allowing for false positive and false negative errors},
+  author={Royle, J Andrew and Link, William A},
+  journal={Ecology},
+  volume={87},
+  number={4},
+  pages={835--841},
+  year={2006},
+  publisher={Wiley Online Library}
+}
+
+@article{jarzyna2014,
+author = {Jarzyna, Marta A. and Finley, Andrew O. and Porter, William F. and Maurer, Brian A. and Beier, Colin M. and Zuckerberg, Benjamin},
+title = {Accounting for the space-varying nature of the relationships between temporal community turnover and the environment},
+journal = {Ecography},
+volume = {37},
+number = {11},
+pages = {1073-1083},
+doi = {https://doi.org/10.1111/ecog.00747},
+url = {https://onlinelibrary.wiley.com/doi/abs/10.1111/ecog.00747},
+eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1111/ecog.00747},
+year = {2014}
+}
+
+@misc{gelman2013bayesian,
+  title={{Bayesian Data Analysis}. 3rd edition},
+  author={Gelman, A and Carlin, JB and Stern, HS and Dunson, DB and Vehtari, A and Rubin, BD},
+  year={2013},
+  publisher={Chapman and Hall/CRC. https://doi. org/10.1201/b16018}
+}
+
+@article{joseph2020neural,
+  title={Neural hierarchical models of ecological populations},
+  author={Joseph, Maxwell B},
+  journal={Ecology letters},
+  volume={23},
+  number={4},
+  pages={734--747},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{miller2011improving,
+  title={Improving occupancy estimation when two types of observational error occur: Non-detection and species misidentification},
+  author={Miller, David A and Nichols, James D and McClintock, Brett T and Grant, Evan H Campbell and Bailey, Larissa L and Weir, Linda A},
+  journal={Ecology},
+  volume={92},
+  number={7},
+  pages={1422--1428},
+  year={2011},
+  publisher={Wiley Online Library}
+}
+
+@article{conn2015using,
+  title={Using spatiotemporal statistical models to estimate animal abundance and infer ecological dynamics from survey counts},
+  author={Conn, Paul B and Johnson, Devin S and Hoef, Jay M Ver and Hooten, Mevin B and London, Joshua M and Boveng, Peter L},
+  journal={Ecological Monographs},
+  volume={85},
+  number={2},
+  pages={235--252},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{finley2015spBayes,
+ title={{spBayes for Large Univariate and Multivariate Point-Referenced Spatio-Temporal Data Models}},
+ volume={63},
+ url={https://www.jstatsoft.org/index.php/jss/article/view/v063i13},
+ doi={10.18637/jss.v063.i13},
+ number={13},
+ journal={Journal of Statistical Software},
+ author={Finley, Andrew O. and Banerjee, Sudipto and Gelfand, Alan E.},
+ year={2015},
+ pages={1–28}
+}
+
+@article{taylor2019spatial,
+  title={{Spatial factor models for high-dimensional and large spatial data: An application in forest variable mapping}},
+  author={Taylor-Rodriguez, Daniel and Finley, Andrew O and Datta, Abhirup and Babcock, Chad and Andersen, Hans-Erik and Cook, Bruce D and Morton, Douglas C and Banerjee, Sudipto},
+  journal={Statistica Sinica},
+  volume={29},
+  pages={1155},
+  year={2019},
+  publisher={NIH Public Access}
+}
+
+@article{tikhonov2020computationally,
+  title={Computationally efficient joint species distribution modeling of big spatial data},
+  author={Tikhonov, Gleb and Duan, Li and Abrego, Nerea and Newell, Graeme and White, Matt and Dunson, David and Ovaskainen, Otso},
+  journal={Ecology},
+  volume={101},
+  number={2},
+  pages={e02929},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{hogg2021effectiveness,
+  title={Effectiveness of Joint Species Distribution Models in the Presence of Imperfect Detection},
+  author={Hogg, Stephanie and Wang, Yan and Stone, Lewi},
+  journal={Methods in Ecology and Evolution},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{leibold2004metacommunity,
+  title={The metacommunity concept: a framework for multi-scale community ecology},
+  author={Leibold, Mathew A and Holyoak, Marcel and Mouquet, Nicolas and Amarasekare, Priyanga and Chase, Jonathan M and Hoopes, Martha F and Holt, Robert D and Shurin, Jonathan B and Law, Richard and Tilman, David and others},
+  journal={Ecology letters},
+  volume={7},
+  number={7},
+  pages={601--613},
+  year={2004},
+  publisher={Wiley Online Library}
+}
+
+@article{chase2011disentangling,
+  title={Disentangling the importance of ecological niches from stochastic processes across scales},
+  author={Chase, Jonathan M and Myers, Jonathan A},
+  journal={Philosophical transactions of the Royal Society B: Biological sciences},
+  volume={366},
+  number={1576},
+  pages={2351--2363},
+  year={2011},
+  publisher={The Royal Society}
+}
+
+@article{pearce1998bioclimatic,
+  title={Bioclimatic analysis to enhance reintroduction biology of the endangered helmeted honeyeater (Lichenostomus melanops cassidix) in southeastern Australia},
+  author={Pearce, Jennie and Lindenmayer, David},
+  journal={Restoration ecology},
+  volume={6},
+  number={3},
+  pages={238--243},
+  year={1998},
+  publisher={Wiley Online Library}
+}
+
+@article{bateman2020north,
+  title={North American birds require mitigation and adaptation to reduce vulnerability to climate change},
+  author={Bateman, Brooke L and Wilsey, Chad and Taylor, Lotem and Wu, Joanna and LeBaron, Geoffrey S and Langham, Gary},
+  journal={Conservation Science and Practice},
+  volume={2},
+  number={8},
+  pages={e242},
+  year={2020},
+  publisher={Wiley Online Library}
+}
+
+@article{clark2017generalized,
+  title={Generalized joint attribute modeling for biodiversity analysis: Median-zero, multivariate, multifarious data},
+  author={Clark, James S and Nemergut, Diana and Seyednasrollah, Bijan and Turner, Phillip J and Zhang, Stacy},
+  journal={Ecological Monographs},
+  volume={87},
+  number={1},
+  pages={34--56},
+  year={2017},
+  publisher={Wiley Online Library}
+}
+
+@article{guillera2015my,
+  title={Is my species distribution model fit for purpose? Matching data and models to applications},
+  author={Guillera-Arroita, Gurutzeta and Lahoz-Monfort, Jos{\'e} J and Elith, Jane and Gordon, Ascelin and Kujala, Heini and Lentini, Pia E and McCarthy, Michael A and Tingley, Reid and Wintle, Brendan A},
+  journal={Global Ecology and Biogeography},
+  volume={24},
+  number={3},
+  pages={276--292},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{latimer2009hierarchical,
+  title={Hierarchical models facilitate spatial analysis of large data sets: a case study on invasive plant species in the northeastern United States},
+  author={Latimer, AM and Banerjee, S and Sang Jr, H and Mosher, ES and Silander Jr, JA},
+  journal={Ecology letters},
+  volume={12},
+  number={2},
+  pages={144--154},
+  year={2009},
+  publisher={Wiley Online Library}
+}
+
+@article{latimer2006building,
+  title={Building statistical models to analyze species distributions},
+  author={Latimer, Andrew M and Wu, Shanshan and Gelfand, Alan E and Silander Jr, John A},
+  journal={Ecological applications},
+  volume={16},
+  number={1},
+  pages={33--50},
+  year={2006},
+  publisher={Wiley Online Library}
+}
+
+@book{banerjee2014hierarchical,
+  title={Hierarchical modeling and analysis for spatial data},
+  author={Banerjee, Sudipto and Carlin, Bradley P and Gelfand, Alan E},
+  year={2014},
+  publisher={CRC press}
+}
+
+@article{warton2015so,
+  title={So many variables: joint modeling in community ecology},
+  author={Warton, David I and Blanchet, F Guillaume and O’Hara, Robert B and Ovaskainen, Otso and Taskinen, Sara and Walker, Steven C and Hui, Francis KC},
+  journal={Trends in Ecology \& Evolution},
+  volume={30},
+  number={12},
+  pages={766--779},
+  year={2015},
+  publisher={Elsevier}
+}
+
+@article{ovaskainen2017make,
+  title={How to make more out of community data? A conceptual framework and its implementation as models and software},
+  author={Ovaskainen, Otso and Tikhonov, Gleb and Norberg, Anna and Guillaume Blanchet, F and Duan, Leo and Dunson, David and Roslin, Tomas and Abrego, Nerea},
+  journal={Ecology letters},
+  volume={20},
+  number={5},
+  pages={561--576},
+  year={2017},
+  publisher={Wiley Online Library}
+}
+
+@article{hui2013mix,
+  title={To mix or not to mix: comparing the predictive performance of mixture models vs. separate species distribution models},
+  author={Hui, Francis KC and Warton, David I and Foster, Scott D and Dunstan, Piers K},
+  journal={Ecology},
+  volume={94},
+  number={9},
+  pages={1913--1919},
+  year={2013},
+  publisher={Wiley Online Library}
+}
+
+@article{clark2014more,
+  title={More than the sum of the parts: forest climate response from joint species distribution models},
+  author={Clark, James S and Gelfand, Alan E and Woodall, Christopher W and Zhu, Kai},
+  journal={Ecological Applications},
+  volume={24},
+  number={5},
+  pages={990--999},
+  year={2014},
+  publisher={Wiley Online Library}
+}
+
+@article{stjernman2019estimating,
+  title={Estimating effects of arable land use intensity on farmland birds using joint species modeling},
+  author={Stjernman, Martin and Sahlin, Ullrika and Olsson, Ola and Smith, Henrik G},
+  journal={Ecological Applications},
+  volume={29},
+  number={4},
+  pages={e01875},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{schliep2018joint,
+  title={Joint species distribution modelling for spatio-temporal occurrence and ordinal abundance data},
+  author={Schliep, Erin M and Lany, Nina K and Zarnetske, Phoebe L and Schaeffer, Robert N and Orians, Colin M and Orwig, David A and Preisser, Evan L},
+  journal={Global Ecology and Biogeography},
+  volume={27},
+  number={1},
+  pages={142--155},
+  year={2018},
+  publisher={Wiley Online Library}
+}
+
+@article{mata2017conserving,
+  title={Conserving herbivorous and predatory insects in urban green spaces},
+  author={Mata, Luis and Threlfall, Caragh G and Williams, Nicholas SG and Hahs, Amy K and Malipatil, Mallik and Stork, Nigel E and Livesley, Stephen J},
+  journal={Scientific Reports},
+  volume={7},
+  number={1},
+  pages={1--12},
+  year={2017},
+  publisher={Nature Publishing Group}
+}
+
+@article{cabodevilla2022implementation,
+  title={The implementation of irrigation leads to declines in farmland birds},
+  author={Cabodevilla, Xabier and Wright, Alexander D and Villanua, Diego and Arroyo, Beatriz and Zipkin, Elise F},
+  journal={Agriculture, Ecosystems \& Environment},
+  volume={323},
+  pages={107701},
+  year={2022},
+  publisher={Elsevier}
+}
+
+@article{doser2021spoccupancy,
+  title={{spOccupancy: An R package for single species, multispecies, and integrated spatial occupancy models}},
+  author={Doser, Jeffrey W and Finley, Andrew O and K{\'e}ry, Marc and Zipkin, Elise F},
+  journal={arXiv preprint arXiv:2111.12163},
+  year={2021}
+}
+
+@article{thorson2015spatial,
+  title={Spatial factor analysis: a new tool for estimating joint species distributions and correlations in species range},
+  author={Thorson, James T and Scheuerell, Mark D and Shelton, Andrew O and See, Kevin E and Skaug, Hans J and Kristensen, Kasper},
+  journal={Methods in Ecology and Evolution},
+  volume={6},
+  number={6},
+  pages={627--637},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{ovaskainen2016uncovering,
+  title={Uncovering hidden spatial structure in species communities with spatially explicit joint species distribution models},
+  author={Ovaskainen, Otso and Roy, David B and Fox, Richard and Anderson, Barbara J},
+  journal={Methods in Ecology and Evolution},
+  volume={7},
+  number={4},
+  pages={428--436},
+  year={2016},
+  publisher={Wiley Online Library}
+}
+
+@article{hogan2004bayesian,
+  title={Bayesian factor analysis for spatially correlated data, with application to summarizing area-level material deprivation from census data},
+  author={Hogan, Joseph W and Tchernis, Rusty},
+  journal={Journal of the American Statistical Association},
+  volume={99},
+  number={466},
+  pages={314--324},
+  year={2004},
+  publisher={Taylor \& Francis}
+}
+
+@article{ren2013hierarchical,
+  title={Hierarchical factor models for large spatially misaligned data: A low-rank predictive process approach},
+  author={Ren, Qian and Banerjee, Sudipto},
+  journal={Biometrics},
+  volume={69},
+  number={1},
+  pages={19--30},
+  year={2013},
+  publisher={Wiley Online Library}
+}
+
+@article{norberg2019comprehensive,
+  title={A comprehensive evaluation of predictive performance of 33 species distribution models at species and community levels},
+  author={Norberg, Anna and Abrego, Nerea and Blanchet, F Guillaume and Adler, Frederick R and Anderson, Barbara J and Anttila, Jani and Ara{\'u}jo, Miguel B and Dallas, Tad and Dunson, David and Elith, Jane and others},
+  journal={Ecological monographs},
+  volume={89},
+  number={3},
+  pages={e01370},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{lopes2004bayesian,
+  title={Bayesian model assessment in factor analysis},
+  author={Lopes, Hedibert Freitas and West, Mike},
+  journal={Statistica Sinica},
+  pages={41--67},
+  year={2004},
+  publisher={JSTOR}
+}
+
+@article{hui2016boral,
+  title={boral--Bayesian ordination and regression analysis of multivariate abundance data in R},
+  author={Hui, Francis KC},
+  journal={Methods in Ecology and Evolution},
+  volume={7},
+  number={6},
+  pages={744--750},
+  year={2016},
+  publisher={Wiley Online Library}
+}
+
+@article{broms2015accounting,
+  title={Accounting for imperfect detection in Hill numbers for biodiversity studies},
+  author={Broms, Kristin M and Hooten, Mevin B and Fitzpatrick, Ryan M},
+  journal={Methods in Ecology and Evolution},
+  volume={6},
+  number={1},
+  pages={99--108},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{gesch2002national,
+  title={The national elevation dataset},
+  author={Gesch, Dean and Oimoen, Michael and Greenlee, Susan and Nelson, Charles and Steuck, Michael and Tyler, Dean},
+  journal={Photogrammetric Engineering and Remote Sensing},
+  volume={68},
+  number={1},
+  pages={5--32},
+  year={2002},
+  publisher={ASPRS AMERICAN SOCIETY FOR PHOTOGRAMMETRY AND}
+}
+
+@article{Homer2015,
+  title={Completion of the 2011 National Land Cover Database for the conterminous United States--representing a decade of land cover change information},
+  author={Homer, Collin and Dewitz, Jon and Yang, Limin and Jin, Suming and Danielson, Patrick and Xian, George and Coulston, John and Herold, Nathaniel and Wickham, James and Megown, Kevin},
+  journal={Photogrammetric Engineering \& Remote Sensing},
+  volume={81},
+  number={5},
+  pages={345--354},
+  year={2015},
+  publisher={American Society for Photogrammetry and Remote Sensing}
+}
+
+@article{rushing2019modeling,
+  title={Modeling spatially and temporally complex range dynamics when detection is imperfect},
+  author={Rushing, Clark S and Royle, J Andrew and Ziolkowski, David J and Pardieck, Keith L},
+  journal={Scientific Reports},
+  volume={9},
+  number={1},
+  pages={1--9},
+  year={2019},
+  publisher={Nature Publishing Group}
+}
+
+@article{zipkin2012evaluating,
+  title={Evaluating the predictive abilities of community occupancy models using AUC while accounting for imperfect detection},
+  author={Zipkin, Elise F and Grant, Evan H Campbell and Fagan, William F},
+  journal={Ecological Applications},
+  volume={22},
+  number={7},
+  pages={1962--1972},
+  year={2012},
+  publisher={Wiley Online Library}
+}
+
+@article{wilkinson2019comparison,
+  title={A comparison of joint species distribution models for presence--absence data},
+  author={Wilkinson, David P and Golding, Nick and Guillera-Arroita, Gurutzeta and Tingley, Reid and McCarthy, Michael A},
+  journal={Methods in Ecology and Evolution},
+  volume={10},
+  number={2},
+  pages={198--211},
+  year={2019},
+  publisher={Wiley Online Library}
+}
+
+@article{hui2015model,
+  title={Model-based approaches to unconstrained ordination},
+  author={Hui, Francis KC and Taskinen, Sara and Pledger, Shirley and Foster, Scott D and Warton, David I},
+  journal={Methods in Ecology and Evolution},
+  volume={6},
+  number={4},
+  pages={399--411},
+  year={2015},
+  publisher={Wiley Online Library}
+}
+
+@article{poggiato2021interpretations,
+  title={On the interpretations of joint modeling in community ecology},
+  author={Poggiato, Giovanni and M{\"u}nkem{\"u}ller, Tamara and Bystrova, Daria and Arbel, Julyan and Clark, James S and Thuiller, Wilfried},
+  journal={Trends in ecology \& evolution},
+  volume={36},
+  number={5},
+  pages={391--401},
+  year={2021},
+  publisher={Elsevier}
+}
+
+@article{konig2021scale,
+  title={Scale dependency of joint species distribution models challenges interpretation of biotic interactions},
+  author={K{\"o}nig, Christian and W{\"u}est, Rafael O and Graham, Catherine H and Karger, Dirk Nikolaus and Sattler, Thomas and Zimmermann, Niklaus E and Zurell, Damaris},
+  journal={Journal of Biogeography},
+  volume={48},
+  number={7},
+  pages={1541--1551},
+  year={2021},
+  publisher={Wiley Online Library}
+}
+
+@article{finley2009aoas,
+author = {Andrew O. Finley and Sudipto Banerjee and Ronald E. McRoberts},
+title = {{Hierarchical spatial models for predicting tree species assemblages across large domains}},
+volume = {3},
+journal = {The Annals of Applied Statistics},
+number = {3},
+publisher = {Institute of Mathematical Statistics},
+pages = {1052 -- 1079},
+keywords = {Bayesian inference, logistic regression, Markov chain Monte Carlo, spatial predictive process, spatially-varying coefficients, species assemblages},
+year = {2009},
+doi = {10.1214/09-AOAS250},
+URL = {https://doi.org/10.1214/09-AOAS250}
+}
+
+@article{roberts2009examples,
+  title={Examples of adaptive MCMC},
+  author={Roberts, Gareth O and Rosenthal, Jeffrey S},
+  journal={Journal of computational and graphical statistics},
+  volume={18},
+  number={2},
+  pages={349--367},
+  year={2009},
+  publisher={Taylor \& Francis}
+}
+