diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 991bda2..4564b37 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -49,7 +49,7 @@ jobs:
 
       - uses: r-lib/actions/setup-r-dependencies@v2
         with:
-          extra-packages: any::rcmdcheck
+          extra-packages: any::rcmdcheck, any::testthat
           needs: check
 
       - uses: r-lib/actions/check-r-package@v2
diff --git a/DESCRIPTION b/DESCRIPTION
index ec8bd93..fc1cc94 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -30,9 +30,12 @@ URL: https://github.com/epinowcast/baselinenowcast/
 BugReports: https://github.com/epinowcast/baselinenowcast/issues
 Depends:
     R (>= 4.0.0)
+Imports:
+  cli
 Suggests:
-    dplyr,
+    epinowcast,
     bookdown,
+    dplyr,
     ggplot2,
     spelling,
     rmarkdown,
@@ -44,5 +47,7 @@ Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
 RoxygenNote: 7.3.2
 VignetteBuilder: knitr
+Remotes:
+  epinowcast=epinowcast/epinowcast@v0.3.0
 Config/Needs/hexsticker: hexSticker, sysfonts, ggplot2
 Config/Needs/website: r-lib/pkgdown, epinowcast/enwtheme
diff --git a/NAMESPACE b/NAMESPACE
index e651b94..92938d6 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1 +1,3 @@
 # Generated by roxygen2: do not edit by hand
+
+export(get_delay_estimate)
diff --git a/NEWS.md b/NEWS.md
index 7b180ec..3691513 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,4 @@
 # baselinenowcast 0.0.0.1000
 
+-   Introduced first function to get the delay estimate and an outline of the Getting Started vignette
 -   Added package skeleton.
diff --git a/R/get_delay_estimate.R b/R/get_delay_estimate.R
new file mode 100644
index 0000000..eca844b
--- /dev/null
+++ b/R/get_delay_estimate.R
@@ -0,0 +1,80 @@
+#' Estimate a delay distribution from a reporting triangle
+#' @description
+#' Provides an estimate of the reporting delay as a function
+#' of the delay, based on the reporting triangle and the specified maximum
+#' delay and number of reference date observations to be used in the estimation.
+#' This point estimate of the delay is computed empirically, using an
+#' iterative algorithm starting from the most recent observations. It was
+#' modified from the code originally developed by the Karlsruhe Institute
+#' of Technology RESPINOW German Hospitalization Nowcasting Hub,
+#' Modified from: https://github.com/KITmetricslab/RESPINOW-Hub/blob/7cce3ae2728116e8c8cc0e4ab29074462c24650e/code/baseline/functions.R#L55 #nolint
+#' @param triangle matrix of the reporting triangle, with rows representing
+#' the time points of reference and columns representing the delays
+#' indexed at 0
+#' @param max_delay integer indicating the maximum delay to estimate, in units
+#' of the delay. The default is to use the whole reporting triangle,
+#'  `ncol(triangle) -1`.
+#' @param n_history integer indicating the number of reference dates to be
+#' used in the estimate of the reporting delay, always starting from the most
+#' recent reporting delay. The default is to use the whole reporting triangle,
+#' so `nrow(triangle)-1`
+#' @returns delay_df dataframe of length `max_delay` with columns `delay`
+#' and `pmf`, indicating the point estimate of the empirical probability
+#' mass on each delay
+#' @export
+#' @examples
+#' library(epinowcast)
+#' nat_germany_hosp <-
+#'   germany_covid19_hosp[location == "DE"][age_group == "00+"]
+#' nat_germany_hosp <- enw_filter_report_dates(
+#'   nat_germany_hosp,
+#'   latest_date = "2021-10-01"
+#' )
+#' pobs <- enw_preprocess_data(nat_germany_hosp, max_delay = 21)
+#' triangle_raw <- pobs$reporting_triangle[[1]] |>
+#'   dplyr::select(-`.group`, -reference_date) |>
+#'   as.matrix() |>
+#'   unname()
+#' delay_df <- get_delay_estimate(triangle_raw,
+#'   max_delay = 20,
+#'   n_history = 30
+#' )
+get_delay_estimate <- function(triangle,
+                               max_delay = ncol(triangle) - 1,
+                               n_history = nrow(triangle)) {
+  # Check that the input reporting triangle is formatted properly.
+  validate_triangle(triangle,
+    max_delay = max_delay,
+    n_history = n_history
+  )
+
+  # Filter the triangle down to nrow = n_history + 1, ncol = max_delay
+  nr0 <- nrow(triangle)
+  trunc_triangle <- triangle[(nr0 - n_history + 1):nr0, 1:(max_delay + 1)]
+  rt <- handle_neg_vals(trunc_triangle)
+  n_delays <- ncol(rt)
+  n_dates <- nrow(rt)
+  factor <- vector(length = max_delay - 1)
+  expectation <- rt
+  for (co in 2:(n_delays)) {
+    block_top_left <- rt[1:(n_dates - co + 1), 1:(co - 1), drop = FALSE]
+    block_top <- rt[1:(n_dates - co + 1), co, drop = FALSE]
+    factor[co - 1] <- sum(block_top) / max(sum(block_top_left), 1)
+    block_bottom_left <- expectation[(n_dates - co + 2):n_dates, 1:(co - 1),
+      drop = FALSE
+    ]
+    # We compute the expectation so that we can get the delay estimate
+    expectation[(n_dates - co + 2):n_dates, co] <- factor[co - 1] * rowSums(
+      block_bottom_left
+    )
+  }
+
+  # Use the completed reporting square to get the point estimate of the delay
+  # distribution
+  pmf <- colSums(expectation) / sum(expectation)
+  delay_df <- data.frame(
+    delay = 0:max_delay,
+    pmf = pmf
+  )
+  return(delay_df)
+}
diff --git a/R/handle_neg_vals.R b/R/handle_neg_vals.R
new file mode 100644
index 0000000..b954a35
--- /dev/null
+++ b/R/handle_neg_vals.R
@@ -0,0 +1,41 @@
+#' Handle negative values in the reporting triangle
+#' @description
+#' Takes in a reporting triangle and returns a matrix in the same format
+#' as the input triangle, but with negative values of reporting handled via
+#' passing them to the subsequent days delay.
+#' Modified from https://github.com/KITmetricslab/RESPINOW-Hub/blob/main/code/baseline/functions.R #nolint
+#' @param triangle the reporting triangle as a matrix, where rows are the
+#' time points and columns are the delays, already truncated to the maximum
+#' delay and the number of historical observations
+#' @return pos_triangle a positive integer matrix with negative values of
+#' reporting handled via passing them to the subsequent days delay
+handle_neg_vals <- function(triangle) {
+  integer_cols <- seq_len(ncol(triangle))
+  pos_triangle <- triangle
+  for (i in seq_len(nrow(triangle))) {
+    to_subtract <- 0
+    row <- pos_triangle[i, ]
+    # Loop over the columns starting from the last column back to max delay
+    # column, and if there is a negative value, we add this to the
+    # next day and set that one as 0.
+    for (j in rev(integer_cols)) {
+      value <- row[[j]]
+      if (!is.na(value)) {
+        # Either adds 0 or the previous days negative value
+        value <- value + to_subtract
+        if (value < 0) {
+          # Want to subtract from subsequent day
+          to_subtract <- value
+          pos_triangle[i, j] <- 0 # Set the negative value in the RT to 0
+        } else {
+          pos_triangle[i, j] <- value
+          to_subtract <- 0
+        }
+      }
+    }
+  }
+  for (col in integer_cols) {
+    pos_triangle[[col]] <- as.integer(pos_triangle[[col]])
+  }
+  return(pos_triangle)
+}
diff --git a/R/validate.R b/R/validate.R
new file mode 100644
index 0000000..ec6e04f
--- /dev/null
+++ b/R/validate.R
@@ -0,0 +1,49 @@
+#' Validate triangle
+#' @description
+#' Various checks to make sure that the reporting triangle passed in to
+#' `estimate_delay()` is formatted properly.
+#' @param triangle a matrix of values with rows indicating the time points and
+#' columns indicating the delays
+#' @inheritParams get_delay_estimate
+#'
+#' @returns NULL, invisibly
+validate_triangle <- function(triangle,
+                              max_delay,
+                              n_history) {
+  if (nrow(triangle) <= ncol(triangle)) {
+    cli::cli_abort(
+      message = c(
+        "Number of observations must be greater than the maximum",
+        "delay"
+      )
+    )
+  }
+
+  if (nrow(triangle) < n_history) {
+    cli::cli_abort(
+      message = c(
+        "Number of observations in input data not sufficient for",
+        "user specified number of historical observations to use",
+        "for estimaton."
+      )
+    )
+  }
+
+  if (ncol(triangle) < (max_delay + 1)) {
+    cli::cli_abort(
+      message = c(
+        "Number of delays in input data not sufficient for",
+        "user specified maximum delay"
+      )
+    )
+  }
+
+  if ((max_delay < 1 || n_history < 1)) {
+    cli::cli_abort(
+      message = c(
+        "Insufficient `max_delay` or `n_history`, must be greater than or",
+        " equal to 1."
+      )
+    )
+  }
+}
diff --git a/man/get_delay_estimate.Rd b/man/get_delay_estimate.Rd
new file mode 100644
index 0000000..1b4c740
--- /dev/null
+++ b/man/get_delay_estimate.Rd
@@ -0,0 +1,59 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_delay_estimate.R
+\name{get_delay_estimate}
+\alias{get_delay_estimate}
+\title{Estimate a delay distribution from a reporting triangle}
+\usage{
+get_delay_estimate(
+  triangle,
+  max_delay = ncol(triangle) - 1,
+  n_history = nrow(triangle)
+)
+}
+\arguments{
+\item{triangle}{matrix of the reporting triangle, with rows representing
+the time points of reference and columns representing the delays
+indexed at 0}
+
+\item{max_delay}{integer indicating the maximum delay to estimate, in units
+of the delay. The default is to use the whole reporting triangle,
+\code{ncol(triangle) -1}.}
+
+\item{n_history}{integer indicating the number of reference dates to be
+used in the estimate of the reporting delay, always starting from the most
+recent reporting delay. The default is to use the whole reporting triangle,
+so \code{nrow(triangle)-1}}
+}
+\value{
+delay_df dataframe of length \code{max_delay} with columns \code{delay}
+and \code{pmf}, indicating the point estimate of the empirical probability
+mass on each delay
+}
+\description{
+Provides an estimate of the reporting delay as a function
+of the delay, based on the reporting triangle and the specified maximum
+delay and number of reference date observations to be used in the estimation.
+This point estimate of the delay is computed empirically, using an
+iterative algorithm starting from the most recent observations. It was
+modified from the code originally developed by the Karlsruhe Institute
+of Technology RESPINOW German Hospitalization Nowcasting Hub,
+Modified from: https://github.com/KITmetricslab/RESPINOW-Hub/blob/7cce3ae2728116e8c8cc0e4ab29074462c24650e/code/baseline/functions.R#L55 #nolint
+}
+\examples{
+library(epinowcast)
+nat_germany_hosp <-
+  germany_covid19_hosp[location == "DE"][age_group == "00+"]
+nat_germany_hosp <- enw_filter_report_dates(
+  nat_germany_hosp,
+  latest_date = "2021-10-01"
+)
+pobs <- enw_preprocess_data(nat_germany_hosp, max_delay = 21)
+triangle_raw <- pobs$reporting_triangle[[1]] |>
+  dplyr::select(-`.group`, -reference_date) |>
+  as.matrix() |>
+  unname()
+delay_df <- get_delay_estimate(triangle_raw,
+  max_delay = 20,
+  n_history = 30
+)
+}
diff --git a/man/handle_neg_vals.Rd b/man/handle_neg_vals.Rd
new file mode 100644
index 0000000..e88940d
--- /dev/null
+++ b/man/handle_neg_vals.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/handle_neg_vals.R
+\name{handle_neg_vals}
+\alias{handle_neg_vals}
+\title{Handle negative values in the reporting triangle}
+\usage{
+handle_neg_vals(triangle)
+}
+\arguments{
+\item{triangle}{the reporting triangle as a matrix, where rows are the
+time points and columns are the delays, already truncated to the maximum
+delay and the number of historical observations}
+}
+\value{
+pos_triangle a positive integer matrix with negative values of
+reporting handled via passing them to the subsequent days delay
+}
+\description{
+Takes in a reporting triangle and returns a matrix in the same format
+as the input triangle, but with negative values of reporting handled via
+passing them to the subsequent days delay.
+Modified from https://github.com/KITmetricslab/RESPINOW-Hub/blob/main/code/baseline/functions.R #nolint
+}
diff --git a/man/validate_triangle.Rd b/man/validate_triangle.Rd
new file mode 100644
index 0000000..89055b7
--- /dev/null
+++ b/man/validate_triangle.Rd
@@ -0,0 +1,28 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/validate.R
+\name{validate_triangle}
+\alias{validate_triangle}
+\title{Validate triangle}
+\usage{
+validate_triangle(triangle, max_delay, n_history)
+}
+\arguments{
+\item{triangle}{a matrix of values with rows indicating the time points and
+columns indicating the delays}
+
+\item{max_delay}{integer indicating the maximum delay to estimate, in units
+of the delay. The default is to use the whole reporting triangle,
+\code{ncol(triangle) -1}.}
+
+\item{n_history}{integer indicating the number of reference dates to be
+used in the estimate of the reporting delay, always starting from the most
+recent reporting delay. The default is to use the whole reporting triangle,
+so \code{nrow(triangle)-1}}
+}
+\value{
+NULL, invisibly
+}
+\description{
+Various checks to make sure that the reporting triangle passed in to
+\code{estimate_delay()} is formatted properly.
+}
diff --git a/tests/testthat.R b/tests/testthat.R
index 484df2d..a8230d0 100644
--- a/tests/testthat.R
+++ b/tests/testthat.R
@@ -1,5 +1,4 @@
 library(testthat)
-library(withr)
 library(baselinenowcast)
 
 test_results <- test_check("baselinenowcast")
diff --git a/tests/testthat/test-get_delay_estimate.R b/tests/testthat/test-get_delay_estimate.R
new file mode 100644
index 0000000..08ddb3c
--- /dev/null
+++ b/tests/testthat/test-get_delay_estimate.R
@@ -0,0 +1,40 @@
+test_that("get_delay_estimate function works correctly", {
+  set.seed(123)
+  triangle <- matrix(rpois(30 * 5, lambda = 25), nrow = 30, ncol = 5)
+
+  # Test 1: Basic functionality
+  result <- get_delay_estimate(triangle)
+  expect_is(result, "data.frame")
+  expect_identical(as.integer(nrow(result)), as.integer(ncol(triangle)))
+  expect_identical(colnames(result), c("delay", "pmf"))
+  expect_true(all(result$pmf >= 0 & result$pmf <= 1))
+  expect_equal(sum(result$pmf), 1, tolerance = 1e-6)
+
+  # Test 2: Custom max_delay
+  result_max_delay <- get_delay_estimate(triangle, max_delay = 3)
+  expect_identical(as.integer(nrow(result_max_delay)), 4L)
+
+  # Test 3: Custom n_history
+  result_n_history <- get_delay_estimate(triangle, n_history = 20)
+  expect_is(result_n_history, "data.frame")
+
+  # Test 4: Input validation *These should be more useful error messages*
+  expect_error(get_delay_estimate(triangle, max_delay = 0))
+  expect_error(get_delay_estimate(triangle, n_history = 0))
+  expect_error(get_delay_estimate(triangle, max_delay = 10, n_history = 40))
+
+  # Test 5: Handling of missing values
+  triangle_with_na <- triangle
+  triangle_with_na[1, 2] <- NA
+  result_with_na <- get_delay_estimate(triangle_with_na)
+  expect_is(result_with_na, "data.frame")
+
+  # Test 6: Consistency of results
+  result1 <- get_delay_estimate(triangle)
+  result2 <- get_delay_estimate(triangle)
+  expect_identical(result1, result2)
+
+  # Test 7: Check that the function errors if its not passed a triangle
+  triangle_single_day <- triangle[1, ]
+  expect_error(get_delay_estimate(triangle_single_day))
+})
diff --git a/vignettes/baselinenowcast.Rmd b/vignettes/baselinenowcast.Rmd
index 8bb2a5f..0b39662 100644
--- a/vignettes/baselinenowcast.Rmd
+++ b/vignettes/baselinenowcast.Rmd
@@ -20,12 +20,299 @@ vignette: >
 
 # Introduction
 
+Incomplete reporting of epidemiological data at recent times can result in case
+count data that is right-truncated.
+Right-truncated case counts can be misleading to interpret at face-value, as
+they will typically show a decline in the number of reported observations in
+the most recent time points.
+These are the time points where the highest proportion of the data has yet to
+be observed in the dataset.
 
+The imputation of the cases that will eventually be observed up until the
+current time is referred to as a nowcast.
 
+A number of methods have been developed to nowcast epidemiological case count
+data.
+
+The purpose of `baselinenowcast` is to provide a nowcast computed directly from
+the most recent observations to estimate a delay distribution empirically, and
+apply that to the partially observed data to generate a nowcast.
+
+In the below section, we will describe an example of a nowcasting problem, and
+demonstrate how to use `baselinenowcast` to estimate a delay distribution from
+the data and apply that estimate to generate a probabilistic nowcast.
+
+# Packages
+
+As well as the `baselinenowcast` package this vignette also uses `epinowcast`,
+`ggplot2`, and `dplyr`.
+The installation of `epinowcast` is not required for using the package,
+however, its pre and post-processing functions provide a lot of the data
+wrangling needed to set up the nowcasting problem.
+We note that no components of the vignette require installing `CmdStan`,
+which is a downstream dependency of `epinowcast`.
+We will just be using the `R` components of `epinowcast`, which can be
+installed using the example lines of code below, so there is no need to
+additionally install `CmdStan`.
 ```{r setup, message = FALSE}
+# Installing epinowcast
+# install.packages( #nolint
+#  "epinowcast", repos = "https://epinowcast.r-universe.dev" #nolint
+# ) #nolint
 # Load packages
-library(baselinenowcast) # nolint
-library(ggplot2) # nolint
+library(baselinenowcast)
+library(epinowcast)
+library(ggplot2)
+library(dplyr)
 # Set seed for reproducibility
 set.seed(123)
 ```
+
+# Data
+
+Nowcasting of right-truncated case counts involves the estimation of reporting
+delays for recently reported data.
+For this, we need case counts both by when they were diagnosed (often
+called "reference date") and by when they were
+reported (i.e. when administratively recorded via public health surveillance;
+often called "report date"). The difference between the reference date and the
+report date is the reporting delay.
+For this quick start, we use daily level data from the
+[Robert Koch Institute via the Germany Nowcasting hub](https://github.com/KITmetricslab/hospitalization-nowcast-hub/wiki/Truth-data#role-an-definition-of-the-seven-day-hospitalization-incidence).
+These data represent hospitalisation counts by date of positive test and date
+of test report in Germany up to October 1, 2021.
+
+# Filtering
+
+We will filter the data to just look at the national-level data, for all age
+groups.
+We will pretend that we are making a nowcast as of July 1, 2021, therefore we
+will exclude all reference dates and report dates after that date.
+`germany_covid19_hosp` is provided as package data from `epinowcast`
+```{r load-data}
+observed_long <- epinowcast::germany_covid19_hosp |>
+  enw_filter_report_dates(latest_date = "2021-07-01") |>
+  dplyr::filter(location == "DE", age_group == "00+")
+```
+
+Let's start by plotting all of the values reported on the day of admissions,
+and then compare that to what we will eventually observe as of the final date
+in the complete dataset.
+
+# Plotting
+
+The red line shows the cumulative number of confirmed admissions on each report
+date, across all delays, using the data available as of July 1, 2021.
+It demonstrates the characteristic behavior of right-truncation.
+This is because we have not yet observed the data that will become available
+for the longer delays at recent time points.
+
+Our task will be to estimate what the "final" cumulative number of cases will
+at each reference date, observed as of the "fully observed" data on October
+2021.
+```{r plot-the-data-by-reference-date}
+ggplot() +
+  geom_line(
+    # Plot the data summed across reporting days
+    data = observed_long |>
+      group_by(reference_date) |>
+      summarise(total_confirmed = sum(confirm)),
+    aes(x = reference_date, y = total_confirmed), color = "darkred"
+  ) +
+  geom_line(
+    data = epinowcast::germany_covid19_hosp |>
+      dplyr::filter(location == "DE", age_group == "00+") |>
+      group_by(reference_date) |>
+      summarise(total_confirmed = sum(confirm)) |>
+      dplyr::filter(reference_date <= "2021-07-01"),
+    aes(x = reference_date, y = total_confirmed), color = "black"
+  ) +
+  theme_bw() +
+  xlab("Reference date") +
+  ylab("Confirmed admissions") +
+  scale_y_continuous(trans = "log10") +
+  ggtitle("Comparing all admissions by reference date and those with no delay")
+```
+Here the black line represents the quantity we will evaluate our nowcast
+against, and the red line represents the data we have available to us up until
+July 1st, 2021.
+
+# Pre-processing
+
+In order to compute a nowcast for this data, we will need to start by creating
+what we call a reporting triangle.
+This is a wide formatted dataframe where each row represents one of the time
+points being reference and each column represents the delay, starting from 0 up
+until the maximum delay.
+The entries represent the number of new cases assigned to that reference
+time point with a particular delay.
+The reporting triangle will be used to estimate the delay distribution, or the
+proportion of the final number of cases reported on a particular delay.
+Since this data is both reported and referenced daily, we will use the time
+scale of days to create the reporting triangle, but the delay and the reference
+date can have any temporal granularity.
+
+In this example, we will both fit our delay distribution, and apply it to
+generate a nowcast using the same data, the national level data from Germany
+for all age groups.
+However, these components can be separated, so for example, we could use the
+national level data for all age groups to estimate a delay distribution, and
+then we could apply that elsewhere, for example to the data stratified by
+age group and location.
+This type of "borrowing" from another training dataset can be really useful
+when you have low counts or relatively sparse data, which is likely to be the
+case for smaller populations.
+
+In the below sections, we will specify our nowcast date, the maximum delay,
+and the number of observations by reference date that we want to use to
+estimate the delay distribution.
+We recommend choosing the maximum delay and number of historical observations
+based on an exploratory data analysis, as these specifications will change
+significantly depending on the dataset.
+```{r user-specificatons}
+nowcast_date <- "2021-07-01"
+# Specify the maximum delay, which will determine the length of your delay
+# distribution. Empirical data outside this delay window will not be used for
+# training.
+max_delay <- 40
+# Specify the number of reference dates to use to estimate the delay
+# distribution.Note this assumes you want the most recent observations
+# (though we can consider changing this)
+n_history <- 60
+```
+
+Next we will use the `epinowcast` function, `enw_preprocess_data()` and the
+data in the form a of a long tidy dataframe indexed by reference date and
+report date and filtered to the strata we are interested in, to generate a
+reporting triangle.
+```{r use-epinowcast-preprocessing}
+# Noting that this is the only way epinowcast preprocessing would work --
+# return to this later. IDate was throwing errors if we used the dplyr processed
+# observed long above.
+observed_long <- epinowcast::germany_covid19_hosp[location == "DE"][age_group == "00+"] |> # nolint
+  enw_filter_report_dates(latest_date = nowcast_date) |>
+  enw_filter_reference_dates(include_days = n_history - 1)
+head(observed_long)
+# Get the reporting triangle, adding an additional day because epinowcast
+# we want the max_delay + 1 entries since 0 is a valid delay.
+pobs <- enw_preprocess_data(
+  obs = observed_long,
+  max_delay = max_delay + 1
+)
+
+triangle_full <- pobs$reporting_triangle[[1]]
+head(triangle_full)
+```
+
+# Estimate delay
+
+Now that we have the reporting triangle, we are now ready to pass it in to the
+`baselinenowcast` package to estimate the delay distribution.
+We will tell the function the maximum delay and the number of observations we
+want to use, though the default will be to use the whole reporting triangle.
+If the reporting triangle is too small for the user-specified delays and number
+of training observations, the function will throw an error.
+We only want to pass in the reporting triangle (for a single group!) to this
+function.
+If reference date are repeated because the reporting triangle contains multiple
+strata, the function will throw an error.
+
+The `get_delay_estimate()` function expects the following inputs:
+  - `triangle`: a matrix with the reporting triangle for a single strata. Here
+  the rows represent the time points and the columns represent the observed
+  delays, starting at 0.
+  - `max_delay`: an integer indicating the maximum delay to estimate. This must
+  be less than or equal to the number of rows in `triangle` minus 1, since we
+  assume `triangle` is indexed at 0.
+  - `n_history`: an integer indicating the number of observations by reference
+  date to use to fit the delay distribution. This must be less than or equal to
+  the number of rows in `triangle`.
+`enw_preprocess()` returns a `triangle` with the columns `.group` and
+`.reference_date` and delays indicated by the column names. While we will
+eventually write methods that will map from other input formats such as this,
+we will start by demonstrating the functionality on only the matrix of the
+reporting triangle.
+
+```{r estimate-delay}
+triangle <- triangle_full |>
+  dplyr::select(-`.group`, -reference_date) |> # remove unnecessary columns
+  as.matrix() |>
+  unname()
+head(triangle)
+
+delay_df <- get_delay_estimate(
+  triangle = triangle,
+  max_delay = max_delay,
+  n_history = n_history
+)
+
+ggplot(delay_df) +
+  geom_line(aes(x = delay, y = cumsum(pmf))) +
+  xlab("Delay") +
+  ylab("Cumulativee proportion reported") +
+  ggtitle("Empirical point estimate of cumulative proportion reported by delay") + # nolint
+  theme_bw()
+
+ggplot(delay_df) +
+  geom_line(aes(x = delay, y = pmf)) +
+  xlab("Delay") +
+  ylab("Proportion reported") +
+  ggtitle("Empirical point estimate of proportion reported by delay") +
+  theme_bw()
+```
+
+# Apply delay to generate point nowcast
+
+The next step in our workflow is to take the estimated delay distribution and
+apply it to the partially observed reporting triangle, generating an estimate
+of the number of new cases confirmed at each reference date and delay.
+This will generate a point estimate of what we can call the reporting square,
+which is the complete set of reference dates and delays.
+In this case, we will be applying the delay to the same reporting triangle we
+used to generate the estimate, but this doesn't always have to be the case.
+The reporting triangle we are applying it to must have the same `max_delay`
+as the delay estimate.
+
+```{r}
+# point_reporting_square <- apply_delay( #nolint
+#   triangle_to_nowcast = triangle,#nolint
+#   delay_df#nolint
+# )#nolint
+```
+
+# Estimate uncertainty
+
+So far, we've demonstrated how to generate a point estimate of a nowcast.
+We would like to generate a probabilistic nowcasts, which will require
+estimating the empirical error between the expected observations and the
+actual observations when applying the delay distribution.
+This assumes a negative binomial observation model.
+
+*Note, we could probably create objects of different classes for the outputs
+of each of these functions, but I am not sure this is necessary*
+
+```{r}
+# disp_params <- estimate_uncertainty( #nolint
+# triangle_to_nowcast = triangle #nolint
+# delay_pmf = delay_df$pmf #nolint
+# ) #nolint
+```
+
+# Generate probabilistic nowcast
+
+Now that we have estimated the dispersion, we can use the delay distribution,
+the reporting triangle that we want to nowcast, and the dispersion parameters to
+generate a probabilistic nowcast.
+
+```{r}
+# nowcast <- generate_nowcast( #nolint
+# triangle_to_nowcast = triangle, #nolint
+# delay_df = delay_df, #nolint
+# disp_params = disp_params #nolint
+# ) #nolint
+```
+
+In the above, we might want to make sure we include the necessary metadata if
+the user passes it in, so we could write methods that dispatch on objects of
+a particular type so that they spit back out things with dates if those were
+inputted.