Skip to content

Commit

Permalink
Merge pull request #5 from epinowcast/4-setup-vignette
Browse files Browse the repository at this point in the history
Issue #4: Setup vignette
  • Loading branch information
kaitejohnson authored Jan 31, 2025
2 parents 7ab1c26 + 315e0bd commit 9c21a1b
Show file tree
Hide file tree
Showing 13 changed files with 619 additions and 5 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
extra-packages: any::rcmdcheck, any::testthat
needs: check

- uses: r-lib/actions/check-r-package@v2
Expand Down
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ URL: https://github.com/epinowcast/baselinenowcast/
BugReports: https://github.com/epinowcast/baselinenowcast/issues
Depends:
R (>= 4.0.0)
Imports:
cli
Suggests:
dplyr,
epinowcast,
bookdown,
dplyr,
ggplot2,
spelling,
rmarkdown,
Expand All @@ -44,5 +47,7 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Remotes:
epinowcast=epinowcast/epinowcast@v0.3.0
Config/Needs/hexsticker: hexSticker, sysfonts, ggplot2
Config/Needs/website: r-lib/pkgdown, epinowcast/enwtheme
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# Generated by roxygen2: do not edit by hand

export(get_delay_estimate)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# baselinenowcast 0.0.0.1000

- Introduced first function to get the delay estimate and an outline of the Getting Started vignette
- Added package skeleton.
80 changes: 80 additions & 0 deletions R/get_delay_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' Estimate a delay distribution from a reporting triangle
#' @description
#' Provides an estimate of the reporting delay as a function
#' of the delay, based on the reporting triangle and the specified maximum
#' delay and number of reference date observations to be used in the estimation.
#' This point estimate of the delay is computed empirically, using an
#' iterative algorithm starting from the most recent observations. It was
#' modified from the code originally developed by the Karlsruhe Institute
#' of Technology RESPINOW German Hospitalization Nowcasting Hub,
#' Modified from: https://github.com/KITmetricslab/RESPINOW-Hub/blob/7cce3ae2728116e8c8cc0e4ab29074462c24650e/code/baseline/functions.R#L55 #nolint
#' @param triangle matrix of the reporting triangle, with rows representing
#' the time points of reference and columns representing the delays
#' indexed at 0
#' @param max_delay integer indicating the maximum delay to estimate, in units
#' of the delay. The default is to use the whole reporting triangle,
#' `ncol(triangle) -1`.
#' @param n_history integer indicating the number of reference dates to be
#' used in the estimate of the reporting delay, always starting from the most
#' recent reporting delay. The default is to use the whole reporting triangle,
#' so `nrow(triangle)-1`
#' @returns delay_df dataframe of length `max_delay` with columns `delay`
#' and `pmf`, indicating the point estimate of the empirical probability
#' mass on each delay
#' @export
#' @examples
#' library(epinowcast)
#' nat_germany_hosp <-
#' germany_covid19_hosp[location == "DE"][age_group == "00+"]
#' nat_germany_hosp <- enw_filter_report_dates(
#' nat_germany_hosp,
#' latest_date = "2021-10-01"
#' )
#' pobs <- enw_preprocess_data(nat_germany_hosp, max_delay = 21)
#' triangle_raw <- pobs$reporting_triangle[[1]] |>
#' dplyr::select(-`.group`, -reference_date) |>
#' as.matrix() |>
#' unname()
#' delay_df <- get_delay_estimate(triangle_raw,
#' max_delay = 20,
#' n_history = 30
#' )
get_delay_estimate <- function(triangle,
max_delay = ncol(triangle) - 1,
n_history = nrow(triangle)) {
# Check that the input reporting triangle is formatted properly.
validate_triangle(triangle,
max_delay = max_delay,
n_history = n_history
)

# Filter the triangle down to nrow = n_history + 1, ncol = max_delay
nr0 <- nrow(triangle)
trunc_triangle <- triangle[(nr0 - n_history + 1):nr0, 1:(max_delay + 1)]
rt <- handle_neg_vals(trunc_triangle)
n_delays <- ncol(rt)
n_dates <- nrow(rt)
factor <- vector(length = max_delay - 1)
expectation <- rt
for (co in 2:(n_delays)) {
block_top_left <- rt[1:(n_dates - co + 1), 1:(co - 1), drop = FALSE]
block_top <- rt[1:(n_dates - co + 1), co, drop = FALSE]
factor[co - 1] <- sum(block_top) / max(sum(block_top_left), 1)
block_bottom_left <- expectation[(n_dates - co + 2):n_dates, 1:(co - 1),
drop = FALSE
]
# We compute the expectation so that we can get the delay estimate
expectation[(n_dates - co + 2):n_dates, co] <- factor[co - 1] * rowSums(
block_bottom_left
)
}

# Use the completed reporting square to get the point estimate of the delay
# distribution
pmf <- colSums(expectation) / sum(expectation)
delay_df <- data.frame(
delay = 0:max_delay,
pmf = pmf
)
return(delay_df)
}
41 changes: 41 additions & 0 deletions R/handle_neg_vals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Handle negative values in the reporting triangle
#' @description
#' Takes in a reporting triangle and returns a matrix in the same format
#' as the input triangle, but with negative values of reporting handled via
#' passing them to the subsequent days delay.
#' Modified from https://github.com/KITmetricslab/RESPINOW-Hub/blob/main/code/baseline/functions.R #nolint
#' @param triangle the reporting triangle as a matrix, where rows are the
#' time points and columns are the delays, already truncated to the maximum
#' delay and the number of historical observations
#' @return pos_triangle a positive integer matrix with negative values of
#' reporting handled via passing them to the subsequent days delay
handle_neg_vals <- function(triangle) {
integer_cols <- seq_len(ncol(triangle))
pos_triangle <- triangle
for (i in seq_len(nrow(triangle))) {
to_subtract <- 0
row <- pos_triangle[i, ]
# Loop over the columns starting from the last column back to max delay
# column, and if there is a negative value, we add this to the
# next day and set that one as 0.
for (j in rev(integer_cols)) {
value <- row[[j]]
if (!is.na(value)) {
# Either adds 0 or the previous days negative value
value <- value + to_subtract
if (value < 0) {
# Want to subtract from subsequent day
to_subtract <- value
pos_triangle[i, j] <- 0 # Set the negative value in the RT to 0
} else {
pos_triangle[i, j] <- value
to_subtract <- 0
}
}
}
}
for (col in integer_cols) {
pos_triangle[[col]] <- as.integer(pos_triangle[[col]])
}
return(pos_triangle)
}
49 changes: 49 additions & 0 deletions R/validate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' Validate triangle
#' @description
#' Various checks to make sure that the reporting triangle passed in to
#' `estimate_delay()` is formatted properly.
#' @param triangle a matrix of values with rows indicating the time points and
#' columns indicating the delays
#' @inheritParams get_delay_estimate
#'
#' @returns NULL, invisibly
validate_triangle <- function(triangle,
max_delay,
n_history) {
if (nrow(triangle) <= ncol(triangle)) {
cli::cli_abort(
message = c(
"Number of observations must be greater than the maximum",
"delay"
)
)
}

if (nrow(triangle) < n_history) {
cli::cli_abort(
message = c(
"Number of observations in input data not sufficient for",
"user specified number of historical observations to use",
"for estimaton."
)
)
}

if (ncol(triangle) < (max_delay + 1)) {
cli::cli_abort(
message = c(
"Number of delays in input data not sufficient for",
"user specified maximum delay"
)
)
}

if ((max_delay < 1 || n_history < 1)) {
cli::cli_abort(
message = c(
"Insufficient `max_delay` or `n_history`, must be greater than or",
" equal to 1."
)
)
}
}
59 changes: 59 additions & 0 deletions man/get_delay_estimate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/handle_neg_vals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/validate_triangle.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
library(testthat)
library(withr)
library(baselinenowcast)

test_results <- test_check("baselinenowcast")
Expand Down
40 changes: 40 additions & 0 deletions tests/testthat/test-get_delay_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
test_that("get_delay_estimate function works correctly", {
set.seed(123)
triangle <- matrix(rpois(30 * 5, lambda = 25), nrow = 30, ncol = 5)

# Test 1: Basic functionality
result <- get_delay_estimate(triangle)
expect_is(result, "data.frame")
expect_identical(as.integer(nrow(result)), as.integer(ncol(triangle)))
expect_identical(colnames(result), c("delay", "pmf"))
expect_true(all(result$pmf >= 0 & result$pmf <= 1))
expect_equal(sum(result$pmf), 1, tolerance = 1e-6)

# Test 2: Custom max_delay
result_max_delay <- get_delay_estimate(triangle, max_delay = 3)
expect_identical(as.integer(nrow(result_max_delay)), 4L)

# Test 3: Custom n_history
result_n_history <- get_delay_estimate(triangle, n_history = 20)
expect_is(result_n_history, "data.frame")

# Test 4: Input validation *These should be more useful error messages*
expect_error(get_delay_estimate(triangle, max_delay = 0))
expect_error(get_delay_estimate(triangle, n_history = 0))
expect_error(get_delay_estimate(triangle, max_delay = 10, n_history = 40))

# Test 5: Handling of missing values
triangle_with_na <- triangle
triangle_with_na[1, 2] <- NA
result_with_na <- get_delay_estimate(triangle_with_na)
expect_is(result_with_na, "data.frame")

# Test 6: Consistency of results
result1 <- get_delay_estimate(triangle)
result2 <- get_delay_estimate(triangle)
expect_identical(result1, result2)

# Test 7: Check that the function errors if its not passed a triangle
triangle_single_day <- triangle[1, ]
expect_error(get_delay_estimate(triangle_single_day))
})
Loading

0 comments on commit 9c21a1b

Please sign in to comment.