Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Specify probability distribution in as_epidist() #335

Merged
merged 9 commits into from
Jun 24, 2024
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ Suggests:
vdiffr (>= 1.0.7)
VignetteBuilder:
knitr
Config/Needs/check: mrc-ide/epireview
Config/Needs/website: epiverse-trace/epiversetheme, mrc-ide/epireview
Config/testthat/edition: 3
Encoding: UTF-8
Expand Down
24 changes: 23 additions & 1 deletion R/coercion.R
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,21 @@ as.data.frame.multi_epidist <- function(x, ...) {
#' to function via the `...` argument. The argument should be called `article`,
#' as it will be matched by name by `$`.
#'
#' To specify a probability distribution pass a `character` string to the
#' function via the `...` argument. The argument should be called `prob_dist`.
#' For example, to specify a gamma distribution:
#' `as_epidist(x, prob_dist = "gamma")`.
#'
#' ***Warning***: distributions specified via the `prob_dist` argument will
#' overwrite the probability distribution specified in the `x` argument. For
#' example, if the probability distribution is given in an \pkg{epireview}
#' entry and the `prob_dist` argument is specified then the function may error
#' or return an unparameterised `<epidist>` if the parameterisation becomes
#' incompatible.
#'
#' Valid probability distributions are: `"gamma"`, `"lnorm"`, `"weibull"`,
#' `"nbinom"`, `"geom"`, `"pois"`, `"norm"`, `"exp"`.
#'
#' @inheritParams base::print
#' @param ... [dots] Extra arguments to be passed to the method.
#'
Expand Down Expand Up @@ -233,10 +248,11 @@ epidist_df_to_epidist <- function(x, ...) {
#' @inherit epidist return
#' @keywords internal
#' @noRd
epireview_to_epidist <- function(x, ...) {
epireview_to_epidist <- function(x, ...) { # nolint cyclocomp_linter
# capture dots and extract article info if supplied
dots <- list(...)
article <- dots$article
prob_dist_in <- dots$prob_dist
# validate multi-row entries
if (nrow(x) > 1) {
stopifnot(
Expand Down Expand Up @@ -314,6 +330,12 @@ epireview_to_epidist <- function(x, ...) {
)
names(uncertainty) <- names(prob_dist_params)
}
# overwrite prob_dist with user specified if given
if (!is.null(prob_dist_in)) {
prob_dist <- prob_dist_in
# erase uncertainty, new prob_dist will likely have different param names
uncertainty <- create_epidist_uncertainty()
}
# vectorise switch (cannot use vapply due to various return FUN.VALUE)
param_type <- sapply( # nolint undesirable_function_linter
x$parameter_value_type,
Expand Down
1 change: 1 addition & 0 deletions inst/WORDLIST
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ etc
EVD
extdata
facetted
Faye
jsonlite
Lifecycle
lnorm
Expand Down
15 changes: 15 additions & 0 deletions man/as_epidist.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

126 changes: 126 additions & 0 deletions tests/testthat/test-coercion.R
Original file line number Diff line number Diff line change
Expand Up @@ -90,3 +90,129 @@ test_that("as_epidist works for ebola serial interval (issue #303)", {
# populate mean and sd summary statistics without uncertainty
expect_true(all(!is.na(ebola_serial_epidist$summary_stats[c("mean", "sd")])))
})

test_that("as_epidist works for ebola SI assumed prob_dist (issue #310)", {
# {epireview} is not a dependency so only run if already on system
skip_if_not_installed("epireview")
joshwlambert marked this conversation as resolved.
Show resolved Hide resolved
# suppress warning and message about loading data
ebola_data <- suppressWarnings(
suppressMessages(
epireview::load_epidata("ebola")
)
)
ebola_params <- ebola_data$params
ebola_serial <- ebola_params[
which(
grepl(pattern = "Faye", x = ebola_params$article_label) &
grepl(pattern = "serial", ebola_params$parameter_type)
),
]
# suppress warning and message about citation
ebola_serial_epidist <- suppressWarnings(
suppressMessages(
as_epidist(ebola_serial, prob_dist = "gamma")
)
)
expect_s3_class(ebola_serial_epidist, class = "epidist")
# Faye 2015 parameterise an assumed gamma distribution
expect_s3_class(
ebola_serial_epidist$prob_dist,
class = "distribution"
)
# populate mean and sd summary statistics without uncertainty
expect_true(all(!is.na(ebola_serial_epidist$summary_stats[c("mean", "sd")])))
})

test_that("as_epidist works for lassa incubation overwritten prob_dist", {
# {epireview} is not a dependency so only run if already on system
skip_if_not_installed("epireview")
# suppress warning and message about loading data
lassa_data <- suppressWarnings(
suppressMessages(
epireview::load_epidata("lassa")
)
)
lassa_params <- lassa_data$params
lassa_incub <- lassa_params[
which(lassa_params$article_label == "Akhmetzhanov 2019" &
lassa_params$parameter_type == "Human delay - incubation period"),
]
# suppress warning and message about citation
lassa_incub_epidist <- suppressWarnings(
suppressMessages(
as_epidist(lassa_incub, prob_dist = "lnorm")
)
)
expect_s3_class(lassa_incub_epidist, class = "epidist")
# Akhmetzhanov 2019 has information to parameterise a gamma distribution
expect_s3_class(
lassa_incub_epidist$prob_dist,
class = "distribution"
)
expect_identical(family(lassa_incub_epidist), "lnorm")
expect_true(is_parameterised(lassa_incub_epidist))
# populate mean and sd summary statistics without uncertainty
expect_true(all(!is.na(lassa_incub_epidist$summary_stats[c("mean", "sd")])))
})

test_that("as_epidist works for overwritten prob_dist with same parameters", {
# {epireview} is not a dependency so only run if already on system
skip_if_not_installed("epireview")
# suppress warning and message about loading data
ebola_data <- suppressWarnings(
suppressMessages(
epireview::load_epidata("ebola")
)
)
ebola_params <- ebola_data$params
ebola_si <- ebola_params[
which(
ebola_params$distribution_par1_type == "Shape" &
ebola_params$article_label == "Fallah 2015 (1)"
),
]
# suppress warning and message about citation
ebola_si_gamma <- suppressWarnings(
suppressMessages(
as_epidist(ebola_si)
)
)
ebola_si_weibull <- suppressWarnings(
suppressMessages(
as_epidist(ebola_si, prob_dist = "weibull")
)
)
expect_s3_class(ebola_si_gamma, class = "epidist")
expect_s3_class(ebola_si_weibull, class = "epidist")
expect_true(is_parameterised(ebola_si_gamma))
expect_true(is_parameterised(ebola_si_weibull))
expect_identical(family(ebola_si_gamma), "gamma")
expect_identical(family(ebola_si_weibull), "weibull")
})

test_that("as_epidist fails as expected with overwritten prob_dist", {
# {epireview} is not a dependency so only run if already on system
skip_if_not_installed("epireview")
# suppress warning and message about loading data
ebola_data <- suppressWarnings(
suppressMessages(
epireview::load_epidata("ebola")
)
)
ebola_params <- ebola_data$params
ebola_si <- ebola_params[
which(
ebola_params$distribution_par1_type == "Shape" &
ebola_params$article_label == "Fallah 2015 (1)"
),
]
# suppress warning and message about citation
ebola_si_lnorm <- suppressWarnings(
suppressMessages(
as_epidist(ebola_si, prob_dist = "lnorm")
)
)
expect_s3_class(ebola_si_lnorm, class = "epidist")
expect_false(is_parameterised(ebola_si_lnorm))
expect_identical(family(ebola_si_lnorm), "lnorm")
})
59 changes: 59 additions & 0 deletions vignettes/articles/data_from_epireview.Rmd
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
---
title: "Using {epireview} with {epiparameter}"
bibliography: ../references.json
---

```{r, include = FALSE}
Expand Down Expand Up @@ -208,4 +209,62 @@ plot(ebola_si_epidist)
generate(ebola_si_epidist, times = 10)
```

## Specifying the probability distribution if unknown

There may be instances where a delay distribution is reported, but either a probability distribution is not fit to the data, or it is not reported which probability distribution the parameters correspond to. In these cases, and when a parametric probability distribution is required for a particular epidemiological task then assuming a probability distribution can be useful.

::: {.alert .alert-danger}
**Please use this feature with caution**. Assuming an incorrect probability distribution and applying this in an epidemiological method can lead to erroneous results. Additionally, if a probability distribution is specified by the user it will overwrite any probability distribution specified in input data (e.g. {epireview} parameter data) which can lead to an error if the distribution name supplied and parameters input are incompatible See `?as_epidist` details for more information.
:::

Just as the example above we will load the Ebola parameters using the `epireview::load_epidata()` function and subset to just the parameters (`$params`).

```{r, load-and-subset-ebola}
ebola_data <- load_epidata("ebola")
ebola_params <- ebola_data$params
```

Here we will use the serial interval for Ebola reported by @fayeChainsTransmissionControl2015a. This is stored, over two rows of the parameter table, as the mean and standard deviation, but there is no probability distribution specified.

The code chunk below subsets the Ebola parameter table to just return the serial interval from @fayeChainsTransmissionControl2015a.

```{r, subset-ebola-serial-interval}
joshwlambert marked this conversation as resolved.
Show resolved Hide resolved
ebola_si <- ebola_params[
which(
grepl(pattern = "Faye", x = ebola_params$article_label, fixed = TRUE) &
grepl(pattern = "serial", ebola_params$parameter_type, fixed = TRUE)
),
]
```

If we were to supply this data to `as_epidist()` we would get an unparameterised `<epidist>` object because no probability distribution is stated.

```{r, convert-to-epidist-unspecified-prob-dist}
ebola_si_epidist <- as_epidist(ebola_si)
ebola_si_epidist
is_parameterised(ebola_si_epidist)
```

Given that we can convert the mean and standard deviation into parameters of a probability distribution if we assume a distribution form, we can supply this data to `as_epidist()`.

```{r, convert-to-epidist-assumed-prob-dist}
ebola_si_epidist <- as_epidist(ebola_si, prob_dist = "gamma")
ebola_si_epidist
is_parameterised(ebola_si_epidist)
```

The Ebola serial interval `<epidist>` can now be used for various probability distribution methods.

```{r, ebola-epidist-dist-methods}
get_parameters(ebola_si_epidist)
density(ebola_si_epidist, at = 20)
plot(ebola_si_epidist)
cdf(ebola_si_epidist, q = 10)
plot(ebola_si_epidist, cumulative = TRUE)
quantile(ebola_si_epidist, p = 0.5)
generate(ebola_si_epidist, times = 10)
```

## Limitations

## References
87 changes: 87 additions & 0 deletions vignettes/references.json
Original file line number Diff line number Diff line change
Expand Up @@ -665,5 +665,92 @@
]
]
}
},
{
"id": "fayeChainsTransmissionControl2015a",
"type": "article-journal",
"container-title": "The Lancet Infectious Diseases",
"DOI": "10.1016/S1473-3099(14)71075-8",
"ISSN": "14733099",
"issue": "3",
"journalAbbreviation": "The Lancet Infectious Diseases",
"language": "en",
"page": "320-326",
"source": "DOI.org (Crossref)",
"title": "Chains of transmission and control of Ebola virus disease in Conakry, Guinea, in 2014: an observational study",
"title-short": "Chains of transmission and control of Ebola virus disease in Conakry, Guinea, in 2014",
"URL": "https://linkinghub.elsevier.com/retrieve/pii/S1473309914710758",
"volume": "15",
"author": [
{
"family": "Faye",
"given": "Ousmane"
},
{
"family": "Boëlle",
"given": "Pierre-Yves"
},
{
"family": "Heleze",
"given": "Emmanuel"
},
{
"family": "Faye",
"given": "Oumar"
},
{
"family": "Loucoubar",
"given": "Cheikh"
},
{
"family": "Magassouba",
"given": "N'Faly"
},
{
"family": "Soropogui",
"given": "Barré"
},
{
"family": "Keita",
"given": "Sakoba"
},
{
"family": "Gakou",
"given": "Tata"
},
{
"family": "Bah",
"given": "El Hadji Ibrahima"
},
{
"family": "Koivogui",
"given": "Lamine"
},
{
"family": "Sall",
"given": "Amadou Alpha"
},
{
"family": "Cauchemez",
"given": "Simon"
}
],
"accessed": {
"date-parts": [
[
"2024",
6,
17
]
]
},
"issued": {
"date-parts": [
[
"2015",
3
]
]
}
}
]
Loading