From 52f24b0d085b43f98334faed31e0f4afc1f188df Mon Sep 17 00:00:00 2001
From: Juan Montenegro Torres
 <60274234+Juanmontenegro99@users.noreply.github.com>
Date: Sat, 26 Aug 2023 22:37:48 -0500
Subject: [PATCH] #26 modified function and test

---
 R/spatiotemporal.R                   | 381 ++++++++++++++-------------
 man/endemic_outliers.Rd              |  20 +-
 man/morans_index.Rd                  |  16 +-
 tests/testthat/test-spatiotemporal.R |  44 ++--
 4 files changed, 235 insertions(+), 226 deletions(-)

diff --git a/R/spatiotemporal.R b/R/spatiotemporal.R
index 34dff5b6..d8d9021a 100644
--- a/R/spatiotemporal.R
+++ b/R/spatiotemporal.R
@@ -1,186 +1,195 @@
-#' Neighborhoods from real travel distances
-#'
-#' @description Function to build neighborhoods from real travel distances
-#' inside Colombia by land or river transport.
-#'
-#' @param query_vector Codes of the municipalities to consider for the
-#' neighborhoods.
-#' @param threshold Maximum traveling time around each municipality.
-
-#' @return neighborhood object according to the introduced threshold.
-#'
-#' @examples
-#' \dontrun{
-#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607)
-#' neighborhood(query_vector, 2)
-#' }
-#'
-#' @export
-neighborhoods <- function(query_vector, threshold = 2) {
-  stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector)))
-  path <- system.file("data", "distance_matrix.rda", package = "epiCo")
-  load(path)
-  distance_matrix <- distance_matrix
-  distance <- distance_matrix[
-    which(row.names(distance_matrix) %in%
-      query_vector),
-    which(names(distance_matrix) %in%
-      query_vector)
-  ]
-  excluded <- query_vector[!query_vector %in% rownames(distance)]
-  for (i in excluded) {
-    warning("municipality ", i, " was not found")
-  }
-  adjacency_matrix <- as.matrix(distance <= threshold)
-  list_weights <- spdep::mat2listw(adjacency_matrix)
-  neighborhoods <- list_weights$neighbours
-  return(neighborhoods)
-}
-
-#' Calculate spatial correlation of given municipalities in an incidence_rate
-#' object.
-#'
-#' @description Function to calculate spatial autocorrelation via Moran's Index
-#' from a given incidence_rate object grouped by municipality.
-#'
-#' @importFrom magrittr %>%
-#'
-#' @param incidence_rate Incidence rate object with only one observation for a
-#' group of municipalities.
-#' @param threshold Maximum traveling time around each municipality.
-#' @param plot if TRUE, returns a plot of influential observations in the
-#' Moran's plot.
-
-#' @return List of Moran's I clustering analysis, giving the quadrant of each
-#' observation, influential values.
-#'
-#' @examples
-#' \dontrun{
-#' morans_index(incidence_rate, 2, FALSE)
-#' }
-#' @export
-morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) {
-  stopifnot(
-    "`incidence_rate` must have observations for only one given date" =
-      ncol(incidence_rate$rates) == length(incidence_rate$rates)
-  )
-  path_1 <- system.file("data", "divipola_table.rda", package = "epiCo")
-  load(path_1)
-  divipola_table <- divipola_table
-  # Match with DIVIPOLA order
-  incidence_rate_ordered <- incidence_rate[, order(match(
-    colnames(incidence_rate$counts), divipola_table$COD_MPIO
-  ))]
-  mpios <- colnames(incidence_rate_ordered$counts)
-
-  # Logarithmic transformation
-  incidence_rate_log <- log(incidence_rate_ordered$rates)
-  mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)])
-  incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)]
-
-  # Neighborhood structure
-  nb <- neighborhoods(mpios_filtered, threshold)
-  weights <- spdep::nb2listw(nb, style = "W")
-
-  # Moran's I
-  morans_i <- spdep::moran.plot(incidence_rate_log,
-    listw = weights, plot = FALSE
-  )
-  moran_data_frame <- as.data.frame(morans_i)
-  # Clusters
-  moran_data_frame <- dplyr::mutate(moran_data_frame,
-    cluster = dplyr::case_when(
-      x > mean(x) & wx >
-        mean(wx) ~ "HH",
-      x < mean(x) & wx <
-        mean(wx) ~ "LL",
-      x > mean(x) & wx <
-        mean(wx) ~ "HL",
-      x < mean(x) & wx >
-        mean(wx) ~ "LH"
-    )
-  )
-  inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ]
-  morans_index <- c(
-    list(municipios = moran_data_frame$labels),
-    list(quadrant = moran_data_frame$cluster),
-    list(influential = moran_data_frame$is_inf),
-    list(logIncidence = moran_data_frame$x),
-    list(lagIncidence = moran_data_frame$wx)
-  )
-  if (!all(is.na(morans_index$quadrant))) {
-    cat(paste("Influential municipalities are:", "\n"))
-    # Influential observations
-    for (i in seq_len(nrow(inf_mpios))) {
-      if (!is.na(inf_mpios$cluster[i])) {
-        # nolint start: string_boundary_linter
-        relative_incidence <- ifelse(substr(
-          inf_mpios$cluster[i], 1, 1
-        ) == "H", "high", "low")
-        # nolint end: string_boundary_linter
-        relative_correlation <- ifelse(substr(
-          inf_mpios$cluster[i], 2, 2
-        ) == "H", "high", "low")
-        cat(paste(
-          inf_mpios$labels[i], "with", relative_incidence,
-          "incidence and", relative_correlation, "spatial correlation",
-          "\n"
-        ))
-      }
-    }
-  }
-  # Plot
-  if (plot) {
-    if (!all(is.na(morans_index$quadrant))) {
-      path_2 <- system.file("data", "spatial_polygons_col_2.rda",
-        package = "epiCo"
-      )
-      load(path_2)
-      spatial_polygons_col_2 <- spatial_polygons_col_2
-      pal <- leaflet::colorFactor(
-        palette = c(
-          "#ba0001", "#357a38", "#2c7c94",
-          "#fbe45b"
-        ),
-        domain = c("HH", "LL", "LH", "HL"),
-        ordered = TRUE
-      )
-      pal_test <- pal(c("LL", "HH"))
-      rm(pal_test)
-      shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in%
-        as.integer(inf_mpios$labels), ]
-      shapes_plot <- shapes[, order(match(
-        as.integer(inf_mpios$labels),
-        shapes$MPIO_CDPMP
-      ))]
-      shapes_plot$CLUSTER <- inf_mpios$cluster
-      shapes_plot$MPIO_CDPMP <- inf_mpios$labels
-      shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[
-        divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP
-      ]
-      # shapes_ordered
-      popup_data <- paste0(
-        "<b>", "Municipality Name: ", "</b>",
-        shapes_plot$NOM_MPIO, "<br>",
-        "<b>", "Municipality Code: ", "</b>",
-        shapes_plot$MPIO_CDPMP, "<br>",
-        "<b>", "Cluster: ", "</b>",
-        shapes_plot$CLUSTER, "<br>"
-      )
-      leaflet::leaflet(shapes_plot) %>%
-        leaflet::addTiles() %>%
-        leaflet::addPolygons(
-          stroke = TRUE,
-          weight = 1,
-          smoothFactor = 0.2,
-          fillColor = ~ pal(CLUSTER),
-          popup = popup_data,
-          color = "white",
-          fillOpacity = 0.75
-        )
-    } else {
-      warning("There are no influential municipalities to plot")
-    }
-  }
-  return(morans_index)
-}
+#' Neighborhoods from real travel distances
+#'
+#' @description Function to build neighborhoods from real travel distances
+#' inside Colombia by land or river transport.
+#'
+#' @param query_vector Codes of the municipalities to consider for the
+#' neighborhoods.
+#' @param threshold Maximum traveling time around each municipality.
+
+#' @return neighborhood object according to the introduced threshold.
+#'
+#' @examples
+#' \dontrun{
+#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607)
+#' neighborhood(query_vector, 2)
+#' }
+#'
+#' @export
+neighborhoods <- function(query_vector, threshold = 2) {
+  stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector)))
+  path <- system.file("data", "distance_matrix.rda", package = "epiCo")
+  load(path)
+  distance_matrix <- distance_matrix
+  distance <- distance_matrix[
+    which(row.names(distance_matrix) %in%
+      query_vector),
+    which(names(distance_matrix) %in%
+      query_vector)
+  ]
+  excluded <- query_vector[!query_vector %in% rownames(distance)]
+  for (i in excluded) {
+    warning("municipality ", i, " was not found")
+  }
+  adjacency_matrix <- as.matrix(distance <= threshold)
+  list_weights <- spdep::mat2listw(adjacency_matrix)
+  neighborhoods <- list_weights$neighbours
+  return(neighborhoods)
+}
+
+#' Calculate spatial correlation of given municipalities in an incidence_rate
+#' object.
+#'
+#' @description Function to calculate spatial autocorrelation via Moran's Index
+#' from a given incidence_rate object grouped by municipality.
+#'
+#' @importFrom magrittr %>%
+#'
+#' @param incidence_historic An incidence object with the historic weekly
+#' observations
+#' @param level Administration level at which incidence counts are grouped.
+#' @param scale Scale to consider when calculating the incidence_rate.
+#' @param threshold Maximum traveling time around each municipality.
+#' @param plot if TRUE, returns a plot of influential observations in the
+#' Moran's plot.
+
+#' @return List of Moran's I clustering analysis, giving the quadrant of each
+#' observation, influential values.
+#'
+#' @examples
+#' \dontrun{
+#' morans_index(incidence_rate, 2, FALSE)
+#' }
+#' @export
+morans_index <- function(incidence_historic, level, scale = 100000,
+                         threshold = 2, plot = TRUE) {
+  stopifnot(
+    "`incidence_object` must have incidence class" =
+      (inherits(incidence_historic, "incidence"))
+  )
+
+  incidence_rate <- incidence_rate(
+    incidence_object = incidence_historic,
+    level = level, scale = scale
+  )
+
+  path_1 <- system.file("data", "divipola_table.rda", package = "epiCo")
+  load(path_1)
+  divipola_table <- divipola_table
+  # Match with DIVIPOLA order
+  incidence_rate_ordered <- incidence_rate[, order(match(
+    colnames(incidence_rate$counts), divipola_table$COD_MPIO
+  ))]
+  mpios <- colnames(incidence_rate_ordered$counts)
+
+  # Logarithmic transformation
+  incidence_rate_log <- log(incidence_rate_ordered$rates)
+  mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)])
+  incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)]
+
+  # Neighborhood structure
+  nb <- neighborhoods(mpios_filtered, threshold)
+  weights <- spdep::nb2listw(nb, style = "W")
+
+  # Moran's I
+  morans_i <- spdep::moran.plot(incidence_rate_log,
+    listw = weights, plot = FALSE
+  )
+  moran_data_frame <- as.data.frame(morans_i)
+  # Clusters
+  moran_data_frame <- dplyr::mutate(moran_data_frame,
+    cluster = dplyr::case_when(
+      x > mean(x) & wx >
+        mean(wx) ~ "HH",
+      x < mean(x) & wx <
+        mean(wx) ~ "LL",
+      x > mean(x) & wx <
+        mean(wx) ~ "HL",
+      x < mean(x) & wx >
+        mean(wx) ~ "LH"
+    )
+  )
+  inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ]
+  morans_index <- c(
+    list(municipios = moran_data_frame$labels),
+    list(quadrant = moran_data_frame$cluster),
+    list(influential = moran_data_frame$is_inf),
+    list(logIncidence = moran_data_frame$x),
+    list(lagIncidence = moran_data_frame$wx)
+  )
+  if (!all(is.na(morans_index$quadrant))) {
+    cat(paste("Influential municipalities are:", "\n"))
+    # Influential observations
+    for (i in seq_len(nrow(inf_mpios))) {
+      if (!is.na(inf_mpios$cluster[i])) {
+        # nolint start: string_boundary_linter
+        relative_incidence <- ifelse(substr(
+          inf_mpios$cluster[i], 1, 1
+        ) == "H", "high", "low")
+        # nolint end: string_boundary_linter
+        relative_correlation <- ifelse(substr(
+          inf_mpios$cluster[i], 2, 2
+        ) == "H", "high", "low")
+        cat(paste(
+          inf_mpios$labels[i], "with", relative_incidence,
+          "incidence and", relative_correlation, "spatial correlation",
+          "\n"
+        ))
+      }
+    }
+  }
+  # Plot
+  if (plot) {
+    if (!all(is.na(morans_index$quadrant))) {
+      path_2 <- system.file("data", "spatial_polygons_col_2.rda",
+        package = "epiCo"
+      )
+      load(path_2)
+      spatial_polygons_col_2 <- spatial_polygons_col_2
+      pal <- leaflet::colorFactor(
+        palette = c(
+          "#ba0001", "#357a38", "#2c7c94",
+          "#fbe45b"
+        ),
+        domain = c("HH", "LL", "LH", "HL"),
+        ordered = TRUE
+      )
+      pal_test <- pal(c("LL", "HH"))
+      rm(pal_test)
+      shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in%
+        as.integer(inf_mpios$labels), ]
+      shapes_plot <- shapes[, order(match(
+        as.integer(inf_mpios$labels),
+        shapes$MPIO_CDPMP
+      ))]
+      shapes_plot$CLUSTER <- inf_mpios$cluster
+      shapes_plot$MPIO_CDPMP <- inf_mpios$labels
+      shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[
+        divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP
+      ]
+      # shapes_ordered
+      popup_data <- paste0(
+        "<b>", "Municipality Name: ", "</b>",
+        shapes_plot$NOM_MPIO, "<br>",
+        "<b>", "Municipality Code: ", "</b>",
+        shapes_plot$MPIO_CDPMP, "<br>",
+        "<b>", "Cluster: ", "</b>",
+        shapes_plot$CLUSTER, "<br>"
+      )
+      leaflet::leaflet(shapes_plot) %>%
+        leaflet::addTiles() %>%
+        leaflet::addPolygons(
+          stroke = TRUE,
+          weight = 1,
+          smoothFactor = 0.2,
+          fillColor = ~ pal(CLUSTER),
+          popup = popup_data,
+          color = "white",
+          fillOpacity = 0.75
+        )
+    } else {
+      warning("There are no influential municipalities to plot")
+    }
+  }
+  return(morans_index)
+}
diff --git a/man/endemic_outliers.Rd b/man/endemic_outliers.Rd
index 41a6d041..0c2c2950 100644
--- a/man/endemic_outliers.Rd
+++ b/man/endemic_outliers.Rd
@@ -1,17 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/endemic_channel.R, R/endemic_outliers.R
+% Please edit documentation in R/endemic_channel.R
 \name{endemic_outliers}
 \alias{endemic_outliers}
 \title{Modifies the historic incidence to handle with the observations of epidemic
 years}
 \usage{
-endemic_outliers(
-  historic,
-  outlier_years,
-  outliers_handling,
-  geom_method = "shifted"
-)
-
 endemic_outliers(
   historic,
   outlier_years,
@@ -41,14 +34,9 @@ geometric mean and take into account
 calculation, see: geom_mean}
 }
 \value{
-A modified historic incidence
-
 A modified historic incidence
 }
 \description{
-Function that modifies an historic incidence by including,
-ignoring, or replacing the observations of epidemic years
-
 Function that modifies an historic incidence by including,
 ignoring, or replacing the observations of epidemic years
 }
@@ -59,10 +47,4 @@ endemic_outliers(historic, outlier_years, outliers_handling,
 )
 }
 
-\dontrun{
-endemic_outliers(historic, outlier_years, outliers_handling,
-  geom_method = "shifted"
-)
-}
-
 }
diff --git a/man/morans_index.Rd b/man/morans_index.Rd
index 69e56184..10a43516 100644
--- a/man/morans_index.Rd
+++ b/man/morans_index.Rd
@@ -5,11 +5,21 @@
 \title{Calculate spatial correlation of given municipalities in an incidence_rate
 object.}
 \usage{
-morans_index(incidence_rate, threshold = 2, plot = TRUE)
+morans_index(
+  incidence_historic,
+  level,
+  scale = 1e+05,
+  threshold = 2,
+  plot = TRUE
+)
 }
 \arguments{
-\item{incidence_rate}{Incidence rate object with only one observation for a
-group of municipalities.}
+\item{incidence_historic}{An incidence object with the historic weekly
+observations}
+
+\item{level}{Administration level at which incidence counts are grouped.}
+
+\item{scale}{Scale to consider when calculating the incidence_rate.}
 
 \item{threshold}{Maximum traveling time around each municipality.}
 
diff --git a/tests/testthat/test-spatiotemporal.R b/tests/testthat/test-spatiotemporal.R
index b8909f00..b2f619e0 100644
--- a/tests/testthat/test-spatiotemporal.R
+++ b/tests/testthat/test-spatiotemporal.R
@@ -24,7 +24,7 @@ test_that("Neighborhoods are built as expected", {
 
 ##  Incidence rate objects for morans Index
 
-# Functional incidence rate
+# Functional incidence object
 set.seed(3)
 sample_groups <- c(5001, 5148, 5615, 5088, 5266, 5440, 5318, 5368, 5659)
 sample_data <- sample(sample_groups, 200, replace = TRUE)
@@ -36,9 +36,8 @@ incidence_object <- incidence::incidence(sample_df$CASES,
   interval = "month",
   group = sample_df$GROUP
 )
-incidence_rate <- incidence_rate(incidence_object, 2)
 
-# Failing incidence rate
+# Failing incidence object
 sample_data_2 <- as.integer(sample(1:50, 200, replace = TRUE))
 sample_dates_2 <- as.Date("2018-12-31") + sample_data_2
 sample_groups_2 <- sample(c(5001, 5264, 5615, 5607), 200, replace = TRUE)
@@ -47,25 +46,34 @@ incidence_object_2 <- incidence::incidence(sample_df_2$CASES,
   interval = "weeks",
   group = sample_df_2$GROUP
 )
-incidence_rate_2 <- incidence_rate(incidence_object_2, 2)
 
-# Extra incidence rate
-sample_df_3 <- sample_df[!sample_df$GROUP %in% c(5001, 5148), ]
-incidence_object_3 <- incidence::incidence(sample_df_3$CASES,
-  interval = "weeks",
-  group = sample_df_3$GROUP
-)
-incidence_rate_3 <- incidence_rate(incidence_object_3, 2)
 
 test_that("Morans Index errors and warnings are thrown", {
-  expect_error(morans_index(incidence_object, 2, FALSE))
-  expect_error(morans_index(incidence_rate_2, 2, FALSE))
-  expect_warning(morans_index(incidence_rate, 12, TRUE), type = "list")
+  expect_error(morans_index(c(20, 53, 90, 63), 2, plot = FALSE))
+  expect_error(morans_index(incidence_object_2,
+    level = 2,
+    threshold = 2, plot = FALSE
+  ))
+  expect_warning(morans_index(incidence_object,
+    level = 2,
+    threshold = 12, plot = TRUE
+  ), type = "list")
 })
 
 test_that("Morans Index works as expected", {
-  expect_type(morans_index(incidence_rate, 1, FALSE), type = "list")
-  expect_length(morans_index(incidence_rate, 1, FALSE), n = 5)
-  expect_type(morans_index(incidence_rate, 2, TRUE), type = "list")
-  expect_type(morans_index(incidence_rate, 12, TRUE), type = "list")
+  expect_type(morans_index(incidence_object,
+    level = 2, threshold = 1, plot = FALSE
+  ), type = "list")
+  expect_length(morans_index(incidence_object,
+    level = 2,
+    threshold = 1, plot = FALSE
+  ), n = 5)
+  expect_type(morans_index(incidence_object,
+    level = 2,
+    threshold = 2, plot = TRUE
+  ), type = "list")
+  expect_type(morans_index(incidence_object,
+    level = 2,
+    threshold = 12, plot = TRUE
+  ), type = "list")
 })