-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #5 from epinowcast/4-setup-vignette
Issue #4: Setup vignette
- Loading branch information
Showing
13 changed files
with
619 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,3 @@ | ||
# Generated by roxygen2: do not edit by hand | ||
|
||
export(get_delay_estimate) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
# baselinenowcast 0.0.0.1000 | ||
|
||
- Introduced first function to get the delay estimate and an outline of the Getting Started vignette | ||
- Added package skeleton. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#' Estimate a delay distribution from a reporting triangle | ||
#' @description | ||
#' Provides an estimate of the reporting delay as a function | ||
#' of the delay, based on the reporting triangle and the specified maximum | ||
#' delay and number of reference date observations to be used in the estimation. | ||
#' This point estimate of the delay is computed empirically, using an | ||
#' iterative algorithm starting from the most recent observations. It was | ||
#' modified from the code originally developed by the Karlsruhe Institute | ||
#' of Technology RESPINOW German Hospitalization Nowcasting Hub, | ||
#' Modified from: https://github.com/KITmetricslab/RESPINOW-Hub/blob/7cce3ae2728116e8c8cc0e4ab29074462c24650e/code/baseline/functions.R#L55 #nolint | ||
#' @param triangle matrix of the reporting triangle, with rows representing | ||
#' the time points of reference and columns representing the delays | ||
#' indexed at 0 | ||
#' @param max_delay integer indicating the maximum delay to estimate, in units | ||
#' of the delay. The default is to use the whole reporting triangle, | ||
#' `ncol(triangle) -1`. | ||
#' @param n_history integer indicating the number of reference dates to be | ||
#' used in the estimate of the reporting delay, always starting from the most | ||
#' recent reporting delay. The default is to use the whole reporting triangle, | ||
#' so `nrow(triangle)-1` | ||
#' @returns delay_df dataframe of length `max_delay` with columns `delay` | ||
#' and `pmf`, indicating the point estimate of the empirical probability | ||
#' mass on each delay | ||
#' @export | ||
#' @examples | ||
#' library(epinowcast) | ||
#' nat_germany_hosp <- | ||
#' germany_covid19_hosp[location == "DE"][age_group == "00+"] | ||
#' nat_germany_hosp <- enw_filter_report_dates( | ||
#' nat_germany_hosp, | ||
#' latest_date = "2021-10-01" | ||
#' ) | ||
#' pobs <- enw_preprocess_data(nat_germany_hosp, max_delay = 21) | ||
#' triangle_raw <- pobs$reporting_triangle[[1]] |> | ||
#' dplyr::select(-`.group`, -reference_date) |> | ||
#' as.matrix() |> | ||
#' unname() | ||
#' delay_df <- get_delay_estimate(triangle_raw, | ||
#' max_delay = 20, | ||
#' n_history = 30 | ||
#' ) | ||
get_delay_estimate <- function(triangle, | ||
max_delay = ncol(triangle) - 1, | ||
n_history = nrow(triangle)) { | ||
# Check that the input reporting triangle is formatted properly. | ||
validate_triangle(triangle, | ||
max_delay = max_delay, | ||
n_history = n_history | ||
) | ||
|
||
# Filter the triangle down to nrow = n_history + 1, ncol = max_delay | ||
nr0 <- nrow(triangle) | ||
trunc_triangle <- triangle[(nr0 - n_history + 1):nr0, 1:(max_delay + 1)] | ||
rt <- handle_neg_vals(trunc_triangle) | ||
n_delays <- ncol(rt) | ||
n_dates <- nrow(rt) | ||
factor <- vector(length = max_delay - 1) | ||
expectation <- rt | ||
for (co in 2:(n_delays)) { | ||
block_top_left <- rt[1:(n_dates - co + 1), 1:(co - 1), drop = FALSE] | ||
block_top <- rt[1:(n_dates - co + 1), co, drop = FALSE] | ||
factor[co - 1] <- sum(block_top) / max(sum(block_top_left), 1) | ||
block_bottom_left <- expectation[(n_dates - co + 2):n_dates, 1:(co - 1), | ||
drop = FALSE | ||
] | ||
# We compute the expectation so that we can get the delay estimate | ||
expectation[(n_dates - co + 2):n_dates, co] <- factor[co - 1] * rowSums( | ||
block_bottom_left | ||
) | ||
} | ||
|
||
# Use the completed reporting square to get the point estimate of the delay | ||
# distribution | ||
pmf <- colSums(expectation) / sum(expectation) | ||
delay_df <- data.frame( | ||
delay = 0:max_delay, | ||
pmf = pmf | ||
) | ||
return(delay_df) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#' Handle negative values in the reporting triangle | ||
#' @description | ||
#' Takes in a reporting triangle and returns a matrix in the same format | ||
#' as the input triangle, but with negative values of reporting handled via | ||
#' passing them to the subsequent days delay. | ||
#' Modified from https://github.com/KITmetricslab/RESPINOW-Hub/blob/main/code/baseline/functions.R #nolint | ||
#' @param triangle the reporting triangle as a matrix, where rows are the | ||
#' time points and columns are the delays, already truncated to the maximum | ||
#' delay and the number of historical observations | ||
#' @return pos_triangle a positive integer matrix with negative values of | ||
#' reporting handled via passing them to the subsequent days delay | ||
handle_neg_vals <- function(triangle) { | ||
integer_cols <- seq_len(ncol(triangle)) | ||
pos_triangle <- triangle | ||
for (i in seq_len(nrow(triangle))) { | ||
to_subtract <- 0 | ||
row <- pos_triangle[i, ] | ||
# Loop over the columns starting from the last column back to max delay | ||
# column, and if there is a negative value, we add this to the | ||
# next day and set that one as 0. | ||
for (j in rev(integer_cols)) { | ||
value <- row[[j]] | ||
if (!is.na(value)) { | ||
# Either adds 0 or the previous days negative value | ||
value <- value + to_subtract | ||
if (value < 0) { | ||
# Want to subtract from subsequent day | ||
to_subtract <- value | ||
pos_triangle[i, j] <- 0 # Set the negative value in the RT to 0 | ||
} else { | ||
pos_triangle[i, j] <- value | ||
to_subtract <- 0 | ||
} | ||
} | ||
} | ||
} | ||
for (col in integer_cols) { | ||
pos_triangle[[col]] <- as.integer(pos_triangle[[col]]) | ||
} | ||
return(pos_triangle) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
#' Validate triangle | ||
#' @description | ||
#' Various checks to make sure that the reporting triangle passed in to | ||
#' `estimate_delay()` is formatted properly. | ||
#' @param triangle a matrix of values with rows indicating the time points and | ||
#' columns indicating the delays | ||
#' @inheritParams get_delay_estimate | ||
#' | ||
#' @returns NULL, invisibly | ||
validate_triangle <- function(triangle, | ||
max_delay, | ||
n_history) { | ||
if (nrow(triangle) <= ncol(triangle)) { | ||
cli::cli_abort( | ||
message = c( | ||
"Number of observations must be greater than the maximum", | ||
"delay" | ||
) | ||
) | ||
} | ||
|
||
if (nrow(triangle) < n_history) { | ||
cli::cli_abort( | ||
message = c( | ||
"Number of observations in input data not sufficient for", | ||
"user specified number of historical observations to use", | ||
"for estimaton." | ||
) | ||
) | ||
} | ||
|
||
if (ncol(triangle) < (max_delay + 1)) { | ||
cli::cli_abort( | ||
message = c( | ||
"Number of delays in input data not sufficient for", | ||
"user specified maximum delay" | ||
) | ||
) | ||
} | ||
|
||
if ((max_delay < 1 || n_history < 1)) { | ||
cli::cli_abort( | ||
message = c( | ||
"Insufficient `max_delay` or `n_history`, must be greater than or", | ||
" equal to 1." | ||
) | ||
) | ||
} | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
library(testthat) | ||
library(withr) | ||
library(baselinenowcast) | ||
|
||
test_results <- test_check("baselinenowcast") | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
test_that("get_delay_estimate function works correctly", { | ||
set.seed(123) | ||
triangle <- matrix(rpois(30 * 5, lambda = 25), nrow = 30, ncol = 5) | ||
|
||
# Test 1: Basic functionality | ||
result <- get_delay_estimate(triangle) | ||
expect_is(result, "data.frame") | ||
expect_identical(as.integer(nrow(result)), as.integer(ncol(triangle))) | ||
expect_identical(colnames(result), c("delay", "pmf")) | ||
expect_true(all(result$pmf >= 0 & result$pmf <= 1)) | ||
expect_equal(sum(result$pmf), 1, tolerance = 1e-6) | ||
|
||
# Test 2: Custom max_delay | ||
result_max_delay <- get_delay_estimate(triangle, max_delay = 3) | ||
expect_identical(as.integer(nrow(result_max_delay)), 4L) | ||
|
||
# Test 3: Custom n_history | ||
result_n_history <- get_delay_estimate(triangle, n_history = 20) | ||
expect_is(result_n_history, "data.frame") | ||
|
||
# Test 4: Input validation *These should be more useful error messages* | ||
expect_error(get_delay_estimate(triangle, max_delay = 0)) | ||
expect_error(get_delay_estimate(triangle, n_history = 0)) | ||
expect_error(get_delay_estimate(triangle, max_delay = 10, n_history = 40)) | ||
|
||
# Test 5: Handling of missing values | ||
triangle_with_na <- triangle | ||
triangle_with_na[1, 2] <- NA | ||
result_with_na <- get_delay_estimate(triangle_with_na) | ||
expect_is(result_with_na, "data.frame") | ||
|
||
# Test 6: Consistency of results | ||
result1 <- get_delay_estimate(triangle) | ||
result2 <- get_delay_estimate(triangle) | ||
expect_identical(result1, result2) | ||
|
||
# Test 7: Check that the function errors if its not passed a triangle | ||
triangle_single_day <- triangle[1, ] | ||
expect_error(get_delay_estimate(triangle_single_day)) | ||
}) |
Oops, something went wrong.