From 52f24b0d085b43f98334faed31e0f4afc1f188df Mon Sep 17 00:00:00 2001
From: Juan Montenegro Torres
<60274234+Juanmontenegro99@users.noreply.github.com>
Date: Sat, 26 Aug 2023 22:37:48 -0500
Subject: [PATCH] #26 modified function and test
---
R/spatiotemporal.R | 381 ++++++++++++++-------------
man/endemic_outliers.Rd | 20 +-
man/morans_index.Rd | 16 +-
tests/testthat/test-spatiotemporal.R | 44 ++--
4 files changed, 235 insertions(+), 226 deletions(-)
diff --git a/R/spatiotemporal.R b/R/spatiotemporal.R
index 34dff5b6..d8d9021a 100644
--- a/R/spatiotemporal.R
+++ b/R/spatiotemporal.R
@@ -1,186 +1,195 @@
-#' Neighborhoods from real travel distances
-#'
-#' @description Function to build neighborhoods from real travel distances
-#' inside Colombia by land or river transport.
-#'
-#' @param query_vector Codes of the municipalities to consider for the
-#' neighborhoods.
-#' @param threshold Maximum traveling time around each municipality.
-
-#' @return neighborhood object according to the introduced threshold.
-#'
-#' @examples
-#' \dontrun{
-#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607)
-#' neighborhood(query_vector, 2)
-#' }
-#'
-#' @export
-neighborhoods <- function(query_vector, threshold = 2) {
- stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector)))
- path <- system.file("data", "distance_matrix.rda", package = "epiCo")
- load(path)
- distance_matrix <- distance_matrix
- distance <- distance_matrix[
- which(row.names(distance_matrix) %in%
- query_vector),
- which(names(distance_matrix) %in%
- query_vector)
- ]
- excluded <- query_vector[!query_vector %in% rownames(distance)]
- for (i in excluded) {
- warning("municipality ", i, " was not found")
- }
- adjacency_matrix <- as.matrix(distance <= threshold)
- list_weights <- spdep::mat2listw(adjacency_matrix)
- neighborhoods <- list_weights$neighbours
- return(neighborhoods)
-}
-
-#' Calculate spatial correlation of given municipalities in an incidence_rate
-#' object.
-#'
-#' @description Function to calculate spatial autocorrelation via Moran's Index
-#' from a given incidence_rate object grouped by municipality.
-#'
-#' @importFrom magrittr %>%
-#'
-#' @param incidence_rate Incidence rate object with only one observation for a
-#' group of municipalities.
-#' @param threshold Maximum traveling time around each municipality.
-#' @param plot if TRUE, returns a plot of influential observations in the
-#' Moran's plot.
-
-#' @return List of Moran's I clustering analysis, giving the quadrant of each
-#' observation, influential values.
-#'
-#' @examples
-#' \dontrun{
-#' morans_index(incidence_rate, 2, FALSE)
-#' }
-#' @export
-morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) {
- stopifnot(
- "`incidence_rate` must have observations for only one given date" =
- ncol(incidence_rate$rates) == length(incidence_rate$rates)
- )
- path_1 <- system.file("data", "divipola_table.rda", package = "epiCo")
- load(path_1)
- divipola_table <- divipola_table
- # Match with DIVIPOLA order
- incidence_rate_ordered <- incidence_rate[, order(match(
- colnames(incidence_rate$counts), divipola_table$COD_MPIO
- ))]
- mpios <- colnames(incidence_rate_ordered$counts)
-
- # Logarithmic transformation
- incidence_rate_log <- log(incidence_rate_ordered$rates)
- mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)])
- incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)]
-
- # Neighborhood structure
- nb <- neighborhoods(mpios_filtered, threshold)
- weights <- spdep::nb2listw(nb, style = "W")
-
- # Moran's I
- morans_i <- spdep::moran.plot(incidence_rate_log,
- listw = weights, plot = FALSE
- )
- moran_data_frame <- as.data.frame(morans_i)
- # Clusters
- moran_data_frame <- dplyr::mutate(moran_data_frame,
- cluster = dplyr::case_when(
- x > mean(x) & wx >
- mean(wx) ~ "HH",
- x < mean(x) & wx <
- mean(wx) ~ "LL",
- x > mean(x) & wx <
- mean(wx) ~ "HL",
- x < mean(x) & wx >
- mean(wx) ~ "LH"
- )
- )
- inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ]
- morans_index <- c(
- list(municipios = moran_data_frame$labels),
- list(quadrant = moran_data_frame$cluster),
- list(influential = moran_data_frame$is_inf),
- list(logIncidence = moran_data_frame$x),
- list(lagIncidence = moran_data_frame$wx)
- )
- if (!all(is.na(morans_index$quadrant))) {
- cat(paste("Influential municipalities are:", "\n"))
- # Influential observations
- for (i in seq_len(nrow(inf_mpios))) {
- if (!is.na(inf_mpios$cluster[i])) {
- # nolint start: string_boundary_linter
- relative_incidence <- ifelse(substr(
- inf_mpios$cluster[i], 1, 1
- ) == "H", "high", "low")
- # nolint end: string_boundary_linter
- relative_correlation <- ifelse(substr(
- inf_mpios$cluster[i], 2, 2
- ) == "H", "high", "low")
- cat(paste(
- inf_mpios$labels[i], "with", relative_incidence,
- "incidence and", relative_correlation, "spatial correlation",
- "\n"
- ))
- }
- }
- }
- # Plot
- if (plot) {
- if (!all(is.na(morans_index$quadrant))) {
- path_2 <- system.file("data", "spatial_polygons_col_2.rda",
- package = "epiCo"
- )
- load(path_2)
- spatial_polygons_col_2 <- spatial_polygons_col_2
- pal <- leaflet::colorFactor(
- palette = c(
- "#ba0001", "#357a38", "#2c7c94",
- "#fbe45b"
- ),
- domain = c("HH", "LL", "LH", "HL"),
- ordered = TRUE
- )
- pal_test <- pal(c("LL", "HH"))
- rm(pal_test)
- shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in%
- as.integer(inf_mpios$labels), ]
- shapes_plot <- shapes[, order(match(
- as.integer(inf_mpios$labels),
- shapes$MPIO_CDPMP
- ))]
- shapes_plot$CLUSTER <- inf_mpios$cluster
- shapes_plot$MPIO_CDPMP <- inf_mpios$labels
- shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[
- divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP
- ]
- # shapes_ordered
- popup_data <- paste0(
- "", "Municipality Name: ", "",
- shapes_plot$NOM_MPIO, "
",
- "", "Municipality Code: ", "",
- shapes_plot$MPIO_CDPMP, "
",
- "", "Cluster: ", "",
- shapes_plot$CLUSTER, "
"
- )
- leaflet::leaflet(shapes_plot) %>%
- leaflet::addTiles() %>%
- leaflet::addPolygons(
- stroke = TRUE,
- weight = 1,
- smoothFactor = 0.2,
- fillColor = ~ pal(CLUSTER),
- popup = popup_data,
- color = "white",
- fillOpacity = 0.75
- )
- } else {
- warning("There are no influential municipalities to plot")
- }
- }
- return(morans_index)
-}
+#' Neighborhoods from real travel distances
+#'
+#' @description Function to build neighborhoods from real travel distances
+#' inside Colombia by land or river transport.
+#'
+#' @param query_vector Codes of the municipalities to consider for the
+#' neighborhoods.
+#' @param threshold Maximum traveling time around each municipality.
+
+#' @return neighborhood object according to the introduced threshold.
+#'
+#' @examples
+#' \dontrun{
+#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607)
+#' neighborhood(query_vector, 2)
+#' }
+#'
+#' @export
+neighborhoods <- function(query_vector, threshold = 2) {
+ stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector)))
+ path <- system.file("data", "distance_matrix.rda", package = "epiCo")
+ load(path)
+ distance_matrix <- distance_matrix
+ distance <- distance_matrix[
+ which(row.names(distance_matrix) %in%
+ query_vector),
+ which(names(distance_matrix) %in%
+ query_vector)
+ ]
+ excluded <- query_vector[!query_vector %in% rownames(distance)]
+ for (i in excluded) {
+ warning("municipality ", i, " was not found")
+ }
+ adjacency_matrix <- as.matrix(distance <= threshold)
+ list_weights <- spdep::mat2listw(adjacency_matrix)
+ neighborhoods <- list_weights$neighbours
+ return(neighborhoods)
+}
+
+#' Calculate spatial correlation of given municipalities in an incidence_rate
+#' object.
+#'
+#' @description Function to calculate spatial autocorrelation via Moran's Index
+#' from a given incidence_rate object grouped by municipality.
+#'
+#' @importFrom magrittr %>%
+#'
+#' @param incidence_historic An incidence object with the historic weekly
+#' observations
+#' @param level Administration level at which incidence counts are grouped.
+#' @param scale Scale to consider when calculating the incidence_rate.
+#' @param threshold Maximum traveling time around each municipality.
+#' @param plot if TRUE, returns a plot of influential observations in the
+#' Moran's plot.
+
+#' @return List of Moran's I clustering analysis, giving the quadrant of each
+#' observation, influential values.
+#'
+#' @examples
+#' \dontrun{
+#' morans_index(incidence_rate, 2, FALSE)
+#' }
+#' @export
+morans_index <- function(incidence_historic, level, scale = 100000,
+ threshold = 2, plot = TRUE) {
+ stopifnot(
+ "`incidence_object` must have incidence class" =
+ (inherits(incidence_historic, "incidence"))
+ )
+
+ incidence_rate <- incidence_rate(
+ incidence_object = incidence_historic,
+ level = level, scale = scale
+ )
+
+ path_1 <- system.file("data", "divipola_table.rda", package = "epiCo")
+ load(path_1)
+ divipola_table <- divipola_table
+ # Match with DIVIPOLA order
+ incidence_rate_ordered <- incidence_rate[, order(match(
+ colnames(incidence_rate$counts), divipola_table$COD_MPIO
+ ))]
+ mpios <- colnames(incidence_rate_ordered$counts)
+
+ # Logarithmic transformation
+ incidence_rate_log <- log(incidence_rate_ordered$rates)
+ mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)])
+ incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)]
+
+ # Neighborhood structure
+ nb <- neighborhoods(mpios_filtered, threshold)
+ weights <- spdep::nb2listw(nb, style = "W")
+
+ # Moran's I
+ morans_i <- spdep::moran.plot(incidence_rate_log,
+ listw = weights, plot = FALSE
+ )
+ moran_data_frame <- as.data.frame(morans_i)
+ # Clusters
+ moran_data_frame <- dplyr::mutate(moran_data_frame,
+ cluster = dplyr::case_when(
+ x > mean(x) & wx >
+ mean(wx) ~ "HH",
+ x < mean(x) & wx <
+ mean(wx) ~ "LL",
+ x > mean(x) & wx <
+ mean(wx) ~ "HL",
+ x < mean(x) & wx >
+ mean(wx) ~ "LH"
+ )
+ )
+ inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ]
+ morans_index <- c(
+ list(municipios = moran_data_frame$labels),
+ list(quadrant = moran_data_frame$cluster),
+ list(influential = moran_data_frame$is_inf),
+ list(logIncidence = moran_data_frame$x),
+ list(lagIncidence = moran_data_frame$wx)
+ )
+ if (!all(is.na(morans_index$quadrant))) {
+ cat(paste("Influential municipalities are:", "\n"))
+ # Influential observations
+ for (i in seq_len(nrow(inf_mpios))) {
+ if (!is.na(inf_mpios$cluster[i])) {
+ # nolint start: string_boundary_linter
+ relative_incidence <- ifelse(substr(
+ inf_mpios$cluster[i], 1, 1
+ ) == "H", "high", "low")
+ # nolint end: string_boundary_linter
+ relative_correlation <- ifelse(substr(
+ inf_mpios$cluster[i], 2, 2
+ ) == "H", "high", "low")
+ cat(paste(
+ inf_mpios$labels[i], "with", relative_incidence,
+ "incidence and", relative_correlation, "spatial correlation",
+ "\n"
+ ))
+ }
+ }
+ }
+ # Plot
+ if (plot) {
+ if (!all(is.na(morans_index$quadrant))) {
+ path_2 <- system.file("data", "spatial_polygons_col_2.rda",
+ package = "epiCo"
+ )
+ load(path_2)
+ spatial_polygons_col_2 <- spatial_polygons_col_2
+ pal <- leaflet::colorFactor(
+ palette = c(
+ "#ba0001", "#357a38", "#2c7c94",
+ "#fbe45b"
+ ),
+ domain = c("HH", "LL", "LH", "HL"),
+ ordered = TRUE
+ )
+ pal_test <- pal(c("LL", "HH"))
+ rm(pal_test)
+ shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in%
+ as.integer(inf_mpios$labels), ]
+ shapes_plot <- shapes[, order(match(
+ as.integer(inf_mpios$labels),
+ shapes$MPIO_CDPMP
+ ))]
+ shapes_plot$CLUSTER <- inf_mpios$cluster
+ shapes_plot$MPIO_CDPMP <- inf_mpios$labels
+ shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[
+ divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP
+ ]
+ # shapes_ordered
+ popup_data <- paste0(
+ "", "Municipality Name: ", "",
+ shapes_plot$NOM_MPIO, "
",
+ "", "Municipality Code: ", "",
+ shapes_plot$MPIO_CDPMP, "
",
+ "", "Cluster: ", "",
+ shapes_plot$CLUSTER, "
"
+ )
+ leaflet::leaflet(shapes_plot) %>%
+ leaflet::addTiles() %>%
+ leaflet::addPolygons(
+ stroke = TRUE,
+ weight = 1,
+ smoothFactor = 0.2,
+ fillColor = ~ pal(CLUSTER),
+ popup = popup_data,
+ color = "white",
+ fillOpacity = 0.75
+ )
+ } else {
+ warning("There are no influential municipalities to plot")
+ }
+ }
+ return(morans_index)
+}
diff --git a/man/endemic_outliers.Rd b/man/endemic_outliers.Rd
index 41a6d041..0c2c2950 100644
--- a/man/endemic_outliers.Rd
+++ b/man/endemic_outliers.Rd
@@ -1,17 +1,10 @@
% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/endemic_channel.R, R/endemic_outliers.R
+% Please edit documentation in R/endemic_channel.R
\name{endemic_outliers}
\alias{endemic_outliers}
\title{Modifies the historic incidence to handle with the observations of epidemic
years}
\usage{
-endemic_outliers(
- historic,
- outlier_years,
- outliers_handling,
- geom_method = "shifted"
-)
-
endemic_outliers(
historic,
outlier_years,
@@ -41,14 +34,9 @@ geometric mean and take into account
calculation, see: geom_mean}
}
\value{
-A modified historic incidence
-
A modified historic incidence
}
\description{
-Function that modifies an historic incidence by including,
-ignoring, or replacing the observations of epidemic years
-
Function that modifies an historic incidence by including,
ignoring, or replacing the observations of epidemic years
}
@@ -59,10 +47,4 @@ endemic_outliers(historic, outlier_years, outliers_handling,
)
}
-\dontrun{
-endemic_outliers(historic, outlier_years, outliers_handling,
- geom_method = "shifted"
-)
-}
-
}
diff --git a/man/morans_index.Rd b/man/morans_index.Rd
index 69e56184..10a43516 100644
--- a/man/morans_index.Rd
+++ b/man/morans_index.Rd
@@ -5,11 +5,21 @@
\title{Calculate spatial correlation of given municipalities in an incidence_rate
object.}
\usage{
-morans_index(incidence_rate, threshold = 2, plot = TRUE)
+morans_index(
+ incidence_historic,
+ level,
+ scale = 1e+05,
+ threshold = 2,
+ plot = TRUE
+)
}
\arguments{
-\item{incidence_rate}{Incidence rate object with only one observation for a
-group of municipalities.}
+\item{incidence_historic}{An incidence object with the historic weekly
+observations}
+
+\item{level}{Administration level at which incidence counts are grouped.}
+
+\item{scale}{Scale to consider when calculating the incidence_rate.}
\item{threshold}{Maximum traveling time around each municipality.}
diff --git a/tests/testthat/test-spatiotemporal.R b/tests/testthat/test-spatiotemporal.R
index b8909f00..b2f619e0 100644
--- a/tests/testthat/test-spatiotemporal.R
+++ b/tests/testthat/test-spatiotemporal.R
@@ -24,7 +24,7 @@ test_that("Neighborhoods are built as expected", {
## Incidence rate objects for morans Index
-# Functional incidence rate
+# Functional incidence object
set.seed(3)
sample_groups <- c(5001, 5148, 5615, 5088, 5266, 5440, 5318, 5368, 5659)
sample_data <- sample(sample_groups, 200, replace = TRUE)
@@ -36,9 +36,8 @@ incidence_object <- incidence::incidence(sample_df$CASES,
interval = "month",
group = sample_df$GROUP
)
-incidence_rate <- incidence_rate(incidence_object, 2)
-# Failing incidence rate
+# Failing incidence object
sample_data_2 <- as.integer(sample(1:50, 200, replace = TRUE))
sample_dates_2 <- as.Date("2018-12-31") + sample_data_2
sample_groups_2 <- sample(c(5001, 5264, 5615, 5607), 200, replace = TRUE)
@@ -47,25 +46,34 @@ incidence_object_2 <- incidence::incidence(sample_df_2$CASES,
interval = "weeks",
group = sample_df_2$GROUP
)
-incidence_rate_2 <- incidence_rate(incidence_object_2, 2)
-# Extra incidence rate
-sample_df_3 <- sample_df[!sample_df$GROUP %in% c(5001, 5148), ]
-incidence_object_3 <- incidence::incidence(sample_df_3$CASES,
- interval = "weeks",
- group = sample_df_3$GROUP
-)
-incidence_rate_3 <- incidence_rate(incidence_object_3, 2)
test_that("Morans Index errors and warnings are thrown", {
- expect_error(morans_index(incidence_object, 2, FALSE))
- expect_error(morans_index(incidence_rate_2, 2, FALSE))
- expect_warning(morans_index(incidence_rate, 12, TRUE), type = "list")
+ expect_error(morans_index(c(20, 53, 90, 63), 2, plot = FALSE))
+ expect_error(morans_index(incidence_object_2,
+ level = 2,
+ threshold = 2, plot = FALSE
+ ))
+ expect_warning(morans_index(incidence_object,
+ level = 2,
+ threshold = 12, plot = TRUE
+ ), type = "list")
})
test_that("Morans Index works as expected", {
- expect_type(morans_index(incidence_rate, 1, FALSE), type = "list")
- expect_length(morans_index(incidence_rate, 1, FALSE), n = 5)
- expect_type(morans_index(incidence_rate, 2, TRUE), type = "list")
- expect_type(morans_index(incidence_rate, 12, TRUE), type = "list")
+ expect_type(morans_index(incidence_object,
+ level = 2, threshold = 1, plot = FALSE
+ ), type = "list")
+ expect_length(morans_index(incidence_object,
+ level = 2,
+ threshold = 1, plot = FALSE
+ ), n = 5)
+ expect_type(morans_index(incidence_object,
+ level = 2,
+ threshold = 2, plot = TRUE
+ ), type = "list")
+ expect_type(morans_index(incidence_object,
+ level = 2,
+ threshold = 12, plot = TRUE
+ ), type = "list")
})