Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue #4: Setup vignette #5

Merged
merged 26 commits into from
Jan 31, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ jobs:

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
extra-packages: any::rcmdcheck, any::testthat
needs: check

- uses: r-lib/actions/check-r-package@v2
Expand Down
7 changes: 6 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,12 @@ URL: https://github.com/epinowcast/baselinenowcast/
BugReports: https://github.com/epinowcast/baselinenowcast/issues
Depends:
R (>= 4.0.0)
Imports:
cli
Suggests:
dplyr,
epinowcast,
bookdown,
dplyr,
ggplot2,
spelling,
rmarkdown,
Expand All @@ -44,5 +47,7 @@ Encoding: UTF-8
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
VignetteBuilder: knitr
Remotes:
epinowcast=epinowcast/epinowcast@v0.3.0
Config/Needs/hexsticker: hexSticker, sysfonts, ggplot2
Config/Needs/website: r-lib/pkgdown, epinowcast/enwtheme
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1 +1,3 @@
# Generated by roxygen2: do not edit by hand

export(get_delay_estimate)
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
# baselinenowcast 0.0.0.1000

- Introduced first function to get the delay estimate and an outline of the Getting Started vignette
- Added package skeleton.
80 changes: 80 additions & 0 deletions R/get_delay_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#' Estimate a delay distribution from a reporting triangle
#' @description
#' Provides an estimate of the reporting delay as a function
#' of the delay, based on the reporting triangle and the specified maximum
#' delay and number of reference date observations to be used in the estimation.
#' This point estimate of the delay is computed empirically, using an
#' iterative algorithm starting from the most recent observations. It was
#' modified from the code originally developed by the Karlsruhe Institute
#' of Technology RESPINOW German Hospitalization Nowcasting Hub,
#' Modified from: https://github.com/KITmetricslab/RESPINOW-Hub/blob/7cce3ae2728116e8c8cc0e4ab29074462c24650e/code/baseline/functions.R#L55 #nolint
#' @param triangle matrix of the reporting triangle, with rows representing
#' the time points of reference and columns representing the delays
#' indexed at 0
#' @param max_delay integer indicating the maximum delay to estimate, in units
#' of the delay. The default is to use the whole reporting triangle,
#' `ncol(triangle) -1`.
#' @param n_history integer indicating the number of reference dates to be
#' used in the estimate of the reporting delay, always starting from the most
#' recent reporting delay. The default is to use the whole reporting triangle,
#' so `nrow(triangle)-1`
#' @returns delay_df dataframe of length `max_delay` with columns `delay`
#' and `pmf`, indicating the point estimate of the empirical probability
#' mass on each delay
#' @export
#' @examples
#' library(epinowcast)
#' nat_germany_hosp <-
#' germany_covid19_hosp[location == "DE"][age_group == "00+"]
#' nat_germany_hosp <- enw_filter_report_dates(
#' nat_germany_hosp,
#' latest_date = "2021-10-01"
#' )
#' pobs <- enw_preprocess_data(nat_germany_hosp, max_delay = 21)
#' triangle_raw <- pobs$reporting_triangle[[1]] |>
#' dplyr::select(-`.group`, -reference_date) |>
#' as.matrix() |>
#' unname()
#' delay_df <- get_delay_estimate(triangle_raw,
#' max_delay = 20,
#' n_history = 30
#' )
get_delay_estimate <- function(triangle,
max_delay = ncol(triangle) - 1,
n_history = nrow(triangle)) {
# Check that the input reporting triangle is formatted properly.
validate_triangle(triangle,
max_delay = max_delay,
n_history = n_history
)

# Filter the triangle down to nrow = n_history + 1, ncol = max_delay
nr0 <- nrow(triangle)
trunc_triangle <- triangle[(nr0 - n_history + 1):nr0, 1:(max_delay + 1)]
rt <- handle_neg_vals(trunc_triangle)
n_delays <- ncol(rt)
n_dates <- nrow(rt)
factor <- vector(length = max_delay - 1)
expectation <- rt
for (co in 2:(n_delays)) {
block_top_left <- rt[1:(n_dates - co + 1), 1:(co - 1), drop = FALSE]
block_top <- rt[1:(n_dates - co + 1), co, drop = FALSE]
factor[co - 1] <- sum(block_top) / max(sum(block_top_left), 1)
block_bottom_left <- expectation[(n_dates - co + 2):n_dates, 1:(co - 1),
drop = FALSE
]
# We compute the expectation so that we can get the delay estimate
expectation[(n_dates - co + 2):n_dates, co] <- factor[co - 1] * rowSums(
block_bottom_left
)
}

# Use the completed reporting square to get the point estimate of the delay
# distribution
pmf <- colSums(expectation) / sum(expectation)
delay_df <- data.frame(
delay = 0:max_delay,
pmf = pmf
)
return(delay_df)
}
41 changes: 41 additions & 0 deletions R/handle_neg_vals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#' Handle negative values in the reporting triangle
#' @description
#' Takes in a reporting triangle and returns a matrix in the same format
#' as the input triangle, but with negative values of reporting handled via
#' passing them to the subsequent days delay.
#' Modified from https://github.com/KITmetricslab/RESPINOW-Hub/blob/main/code/baseline/functions.R #nolint
#' @param triangle the reporting triangle as a matrix, where rows are the
#' time points and columns are the delays, already truncated to the maximum
#' delay and the number of historical observations
#' @return pos_triangle a positive integer matrix with negative values of
#' reporting handled via passing them to the subsequent days delay
handle_neg_vals <- function(triangle) {
integer_cols <- seq_len(ncol(triangle))
pos_triangle <- triangle
for (i in seq_len(nrow(triangle))) {
to_subtract <- 0
row <- pos_triangle[i, ]
# Loop over the columns starting from the last column back to max delay
# column, and if there is a negative value, we add this to the
# next day and set that one as 0.
for (j in rev(integer_cols)) {
value <- row[[j]]
if (!is.na(value)) {
# Either adds 0 or the previous days negative value
value <- value + to_subtract
if (value < 0) {
# Want to subtract from subsequent day
to_subtract <- value
pos_triangle[i, j] <- 0 # Set the negative value in the RT to 0
} else {
pos_triangle[i, j] <- value
to_subtract <- 0
}
}
}
}
for (col in integer_cols) {
pos_triangle[[col]] <- as.integer(pos_triangle[[col]])
}
return(pos_triangle)
}
49 changes: 49 additions & 0 deletions R/validate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
#' Validate triangle
#' @description
#' Various checks to make sure that the reporting triangle passed in to
#' `estimate_delay()` is formatted properly.
#' @param triangle a matrix of values with rows indicating the time points and
#' columns indicating the delays
#' @inheritParams get_delay_estimate
#'
#' @returns NULL, invisibly
validate_triangle <- function(triangle,
max_delay,
n_history) {
if (nrow(triangle) <= ncol(triangle)) {
cli::cli_abort(
message = c(
"Number of observations must be greater than the maximum",
"delay"
)
)
}

if (nrow(triangle) < n_history) {
cli::cli_abort(
message = c(
"Number of observations in input data not sufficient for",
"user specified number of historical observations to use",
"for estimaton."
)
)
}

if (ncol(triangle) < (max_delay + 1)) {
cli::cli_abort(
message = c(
"Number of delays in input data not sufficient for",
"user specified maximum delay"
)
)
}

if ((max_delay < 1 || n_history < 1)) {
cli::cli_abort(
message = c(
"Insufficient `max_delay` or `n_history`, must be greater than or",
" equal to 1."
)
)
}
}
59 changes: 59 additions & 0 deletions man/get_delay_estimate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/handle_neg_vals.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

28 changes: 28 additions & 0 deletions man/validate_triangle.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion tests/testthat.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
library(testthat)
library(withr)
library(baselinenowcast)

test_results <- test_check("baselinenowcast")
Expand Down
40 changes: 40 additions & 0 deletions tests/testthat/test-get_delay_estimate.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
test_that("get_delay_estimate function works correctly", {
set.seed(123)
triangle <- matrix(rpois(30 * 5, lambda = 25), nrow = 30, ncol = 5)

# Test 1: Basic functionality
result <- get_delay_estimate(triangle)
expect_is(result, "data.frame")
expect_identical(as.integer(nrow(result)), as.integer(ncol(triangle)))
expect_identical(colnames(result), c("delay", "pmf"))
expect_true(all(result$pmf >= 0 & result$pmf <= 1))
expect_equal(sum(result$pmf), 1, tolerance = 1e-6)

# Test 2: Custom max_delay
result_max_delay <- get_delay_estimate(triangle, max_delay = 3)
expect_identical(as.integer(nrow(result_max_delay)), 4L)

# Test 3: Custom n_history
result_n_history <- get_delay_estimate(triangle, n_history = 20)
expect_is(result_n_history, "data.frame")

# Test 4: Input validation *These should be more useful error messages*
expect_error(get_delay_estimate(triangle, max_delay = 0))
expect_error(get_delay_estimate(triangle, n_history = 0))
expect_error(get_delay_estimate(triangle, max_delay = 10, n_history = 40))

# Test 5: Handling of missing values
triangle_with_na <- triangle
triangle_with_na[1, 2] <- NA
result_with_na <- get_delay_estimate(triangle_with_na)
expect_is(result_with_na, "data.frame")

# Test 6: Consistency of results
result1 <- get_delay_estimate(triangle)
result2 <- get_delay_estimate(triangle)
expect_identical(result1, result2)

# Test 7: Check that the function errors if its not passed a triangle
triangle_single_day <- triangle[1, ]
expect_error(get_delay_estimate(triangle_single_day))
})
Loading
Loading