diff --git a/DESCRIPTION b/DESCRIPTION index 34813457a..7fcf646d5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -46,6 +46,7 @@ Suggests: vdiffr (>= 1.0.7) VignetteBuilder: knitr +Config/Needs/check: mrc-ide/epireview Config/Needs/website: epiverse-trace/epiversetheme, mrc-ide/epireview Config/testthat/edition: 3 Encoding: UTF-8 diff --git a/R/coercion.R b/R/coercion.R index a57e0cecc..1b7364ff7 100644 --- a/R/coercion.R +++ b/R/coercion.R @@ -104,6 +104,21 @@ as.data.frame.multi_epidist <- function(x, ...) { #' to function via the `...` argument. The argument should be called `article`, #' as it will be matched by name by `$`. #' +#' To specify a probability distribution pass a `character` string to the +#' function via the `...` argument. The argument should be called `prob_dist`. +#' For example, to specify a gamma distribution: +#' `as_epidist(x, prob_dist = "gamma")`. +#' +#' ***Warning***: distributions specified via the `prob_dist` argument will +#' overwrite the probability distribution specified in the `x` argument. For +#' example, if the probability distribution is given in an \pkg{epireview} +#' entry and the `prob_dist` argument is specified then the function may error +#' or return an unparameterised `` if the parameterisation becomes +#' incompatible. +#' +#' Valid probability distributions are: `"gamma"`, `"lnorm"`, `"weibull"`, +#' `"nbinom"`, `"geom"`, `"pois"`, `"norm"`, `"exp"`. +#' #' @inheritParams base::print #' @param ... [dots] Extra arguments to be passed to the method. #' @@ -233,10 +248,11 @@ epidist_df_to_epidist <- function(x, ...) { #' @inherit epidist return #' @keywords internal #' @noRd -epireview_to_epidist <- function(x, ...) { +epireview_to_epidist <- function(x, ...) { # nolint cyclocomp_linter # capture dots and extract article info if supplied dots <- list(...) article <- dots$article + prob_dist_in <- dots$prob_dist # validate multi-row entries if (nrow(x) > 1) { stopifnot( @@ -314,6 +330,12 @@ epireview_to_epidist <- function(x, ...) { ) names(uncertainty) <- names(prob_dist_params) } + # overwrite prob_dist with user specified if given + if (!is.null(prob_dist_in)) { + prob_dist <- prob_dist_in + # erase uncertainty, new prob_dist will likely have different param names + uncertainty <- create_epidist_uncertainty() + } # vectorise switch (cannot use vapply due to various return FUN.VALUE) param_type <- sapply( # nolint undesirable_function_linter x$parameter_value_type, diff --git a/inst/WORDLIST b/inst/WORDLIST index 5cc19318f..e2355d648 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -20,6 +20,7 @@ etc EVD extdata facetted +Faye jsonlite Lifecycle lnorm diff --git a/man/as_epidist.Rd b/man/as_epidist.Rd index e487ee69e..79b93d432 100644 --- a/man/as_epidist.Rd +++ b/man/as_epidist.Rd @@ -23,4 +23,19 @@ To create the full citation the information from the article table from the \pkg{epireview} package of the corresponding entry will need to be passed to function via the \code{...} argument. The argument should be called \code{article}, as it will be matched by name by \code{$}. + +To specify a probability distribution pass a \code{character} string to the +function via the \code{...} argument. The argument should be called \code{prob_dist}. +For example, to specify a gamma distribution: +\code{as_epidist(x, prob_dist = "gamma")}. + +\emph{\strong{Warning}}: distributions specified via the \code{prob_dist} argument will +overwrite the probability distribution specified in the \code{x} argument. For +example, if the probability distribution is given in an \pkg{epireview} +entry and the \code{prob_dist} argument is specified then the function may error +or return an unparameterised \verb{} if the parameterisation becomes +incompatible. + +Valid probability distributions are: \code{"gamma"}, \code{"lnorm"}, \code{"weibull"}, +\code{"nbinom"}, \code{"geom"}, \code{"pois"}, \code{"norm"}, \code{"exp"}. } diff --git a/tests/testthat/test-coercion.R b/tests/testthat/test-coercion.R index b9a9aabdf..0a98c1292 100644 --- a/tests/testthat/test-coercion.R +++ b/tests/testthat/test-coercion.R @@ -90,3 +90,129 @@ test_that("as_epidist works for ebola serial interval (issue #303)", { # populate mean and sd summary statistics without uncertainty expect_true(all(!is.na(ebola_serial_epidist$summary_stats[c("mean", "sd")]))) }) + +test_that("as_epidist works for ebola SI assumed prob_dist (issue #310)", { + # {epireview} is not a dependency so only run if already on system + skip_if_not_installed("epireview") + # suppress warning and message about loading data + ebola_data <- suppressWarnings( + suppressMessages( + epireview::load_epidata("ebola") + ) + ) + ebola_params <- ebola_data$params + ebola_serial <- ebola_params[ + which( + grepl(pattern = "Faye", x = ebola_params$article_label) & + grepl(pattern = "serial", ebola_params$parameter_type) + ), + ] + # suppress warning and message about citation + ebola_serial_epidist <- suppressWarnings( + suppressMessages( + as_epidist(ebola_serial, prob_dist = "gamma") + ) + ) + expect_s3_class(ebola_serial_epidist, class = "epidist") + # Faye 2015 parameterise an assumed gamma distribution + expect_s3_class( + ebola_serial_epidist$prob_dist, + class = "distribution" + ) + # populate mean and sd summary statistics without uncertainty + expect_true(all(!is.na(ebola_serial_epidist$summary_stats[c("mean", "sd")]))) +}) + +test_that("as_epidist works for lassa incubation overwritten prob_dist", { + # {epireview} is not a dependency so only run if already on system + skip_if_not_installed("epireview") + # suppress warning and message about loading data + lassa_data <- suppressWarnings( + suppressMessages( + epireview::load_epidata("lassa") + ) + ) + lassa_params <- lassa_data$params + lassa_incub <- lassa_params[ + which(lassa_params$article_label == "Akhmetzhanov 2019" & + lassa_params$parameter_type == "Human delay - incubation period"), + ] + # suppress warning and message about citation + lassa_incub_epidist <- suppressWarnings( + suppressMessages( + as_epidist(lassa_incub, prob_dist = "lnorm") + ) + ) + expect_s3_class(lassa_incub_epidist, class = "epidist") + # Akhmetzhanov 2019 has information to parameterise a gamma distribution + expect_s3_class( + lassa_incub_epidist$prob_dist, + class = "distribution" + ) + expect_identical(family(lassa_incub_epidist), "lnorm") + expect_true(is_parameterised(lassa_incub_epidist)) + # populate mean and sd summary statistics without uncertainty + expect_true(all(!is.na(lassa_incub_epidist$summary_stats[c("mean", "sd")]))) +}) + +test_that("as_epidist works for overwritten prob_dist with same parameters", { + # {epireview} is not a dependency so only run if already on system + skip_if_not_installed("epireview") + # suppress warning and message about loading data + ebola_data <- suppressWarnings( + suppressMessages( + epireview::load_epidata("ebola") + ) + ) + ebola_params <- ebola_data$params + ebola_si <- ebola_params[ + which( + ebola_params$distribution_par1_type == "Shape" & + ebola_params$article_label == "Fallah 2015 (1)" + ), + ] + # suppress warning and message about citation + ebola_si_gamma <- suppressWarnings( + suppressMessages( + as_epidist(ebola_si) + ) + ) + ebola_si_weibull <- suppressWarnings( + suppressMessages( + as_epidist(ebola_si, prob_dist = "weibull") + ) + ) + expect_s3_class(ebola_si_gamma, class = "epidist") + expect_s3_class(ebola_si_weibull, class = "epidist") + expect_true(is_parameterised(ebola_si_gamma)) + expect_true(is_parameterised(ebola_si_weibull)) + expect_identical(family(ebola_si_gamma), "gamma") + expect_identical(family(ebola_si_weibull), "weibull") +}) + +test_that("as_epidist fails as expected with overwritten prob_dist", { + # {epireview} is not a dependency so only run if already on system + skip_if_not_installed("epireview") + # suppress warning and message about loading data + ebola_data <- suppressWarnings( + suppressMessages( + epireview::load_epidata("ebola") + ) + ) + ebola_params <- ebola_data$params + ebola_si <- ebola_params[ + which( + ebola_params$distribution_par1_type == "Shape" & + ebola_params$article_label == "Fallah 2015 (1)" + ), + ] + # suppress warning and message about citation + ebola_si_lnorm <- suppressWarnings( + suppressMessages( + as_epidist(ebola_si, prob_dist = "lnorm") + ) + ) + expect_s3_class(ebola_si_lnorm, class = "epidist") + expect_false(is_parameterised(ebola_si_lnorm)) + expect_identical(family(ebola_si_lnorm), "lnorm") +}) diff --git a/vignettes/articles/data_from_epireview.Rmd b/vignettes/articles/data_from_epireview.Rmd index 346375d11..dab294b3f 100644 --- a/vignettes/articles/data_from_epireview.Rmd +++ b/vignettes/articles/data_from_epireview.Rmd @@ -1,5 +1,6 @@ --- title: "Using {epireview} with {epiparameter}" +bibliography: ../references.json --- ```{r, include = FALSE} @@ -208,4 +209,62 @@ plot(ebola_si_epidist) generate(ebola_si_epidist, times = 10) ``` +## Specifying the probability distribution if unknown + +There may be instances where a delay distribution is reported, but either a probability distribution is not fit to the data, or it is not reported which probability distribution the parameters correspond to. In these cases, and when a parametric probability distribution is required for a particular epidemiological task then assuming a probability distribution can be useful. + +::: {.alert .alert-danger} +**Please use this feature with caution**. Assuming an incorrect probability distribution and applying this in an epidemiological method can lead to erroneous results. Additionally, if a probability distribution is specified by the user it will overwrite any probability distribution specified in input data (e.g. {epireview} parameter data) which can lead to an error if the distribution name supplied and parameters input are incompatible See `?as_epidist` details for more information. +::: + +Just as the example above we will load the Ebola parameters using the `epireview::load_epidata()` function and subset to just the parameters (`$params`). + +```{r, load-and-subset-ebola} +ebola_data <- load_epidata("ebola") +ebola_params <- ebola_data$params +``` + +Here we will use the serial interval for Ebola reported by @fayeChainsTransmissionControl2015a. This is stored, over two rows of the parameter table, as the mean and standard deviation, but there is no probability distribution specified. + +The code chunk below subsets the Ebola parameter table to just return the serial interval from @fayeChainsTransmissionControl2015a. + +```{r, subset-ebola-serial-interval} +ebola_si <- ebola_params[ + which( + grepl(pattern = "Faye", x = ebola_params$article_label, fixed = TRUE) & + grepl(pattern = "serial", ebola_params$parameter_type, fixed = TRUE) + ), +] +``` + +If we were to supply this data to `as_epidist()` we would get an unparameterised `` object because no probability distribution is stated. + +```{r, convert-to-epidist-unspecified-prob-dist} +ebola_si_epidist <- as_epidist(ebola_si) +ebola_si_epidist +is_parameterised(ebola_si_epidist) +``` + +Given that we can convert the mean and standard deviation into parameters of a probability distribution if we assume a distribution form, we can supply this data to `as_epidist()`. + +```{r, convert-to-epidist-assumed-prob-dist} +ebola_si_epidist <- as_epidist(ebola_si, prob_dist = "gamma") +ebola_si_epidist +is_parameterised(ebola_si_epidist) +``` + +The Ebola serial interval `` can now be used for various probability distribution methods. + +```{r, ebola-epidist-dist-methods} +get_parameters(ebola_si_epidist) +density(ebola_si_epidist, at = 20) +plot(ebola_si_epidist) +cdf(ebola_si_epidist, q = 10) +plot(ebola_si_epidist, cumulative = TRUE) +quantile(ebola_si_epidist, p = 0.5) +generate(ebola_si_epidist, times = 10) +``` + ## Limitations + +## References diff --git a/vignettes/references.json b/vignettes/references.json index 0970f52b4..9449f0849 100644 --- a/vignettes/references.json +++ b/vignettes/references.json @@ -665,5 +665,92 @@ ] ] } + }, + { + "id": "fayeChainsTransmissionControl2015a", + "type": "article-journal", + "container-title": "The Lancet Infectious Diseases", + "DOI": "10.1016/S1473-3099(14)71075-8", + "ISSN": "14733099", + "issue": "3", + "journalAbbreviation": "The Lancet Infectious Diseases", + "language": "en", + "page": "320-326", + "source": "DOI.org (Crossref)", + "title": "Chains of transmission and control of Ebola virus disease in Conakry, Guinea, in 2014: an observational study", + "title-short": "Chains of transmission and control of Ebola virus disease in Conakry, Guinea, in 2014", + "URL": "https://linkinghub.elsevier.com/retrieve/pii/S1473309914710758", + "volume": "15", + "author": [ + { + "family": "Faye", + "given": "Ousmane" + }, + { + "family": "Boëlle", + "given": "Pierre-Yves" + }, + { + "family": "Heleze", + "given": "Emmanuel" + }, + { + "family": "Faye", + "given": "Oumar" + }, + { + "family": "Loucoubar", + "given": "Cheikh" + }, + { + "family": "Magassouba", + "given": "N'Faly" + }, + { + "family": "Soropogui", + "given": "Barré" + }, + { + "family": "Keita", + "given": "Sakoba" + }, + { + "family": "Gakou", + "given": "Tata" + }, + { + "family": "Bah", + "given": "El Hadji Ibrahima" + }, + { + "family": "Koivogui", + "given": "Lamine" + }, + { + "family": "Sall", + "given": "Amadou Alpha" + }, + { + "family": "Cauchemez", + "given": "Simon" + } + ], + "accessed": { + "date-parts": [ + [ + "2024", + 6, + 17 + ] + ] + }, + "issued": { + "date-parts": [ + [ + "2015", + 3 + ] + ] + } } ] \ No newline at end of file