From 52f24b0d085b43f98334faed31e0f4afc1f188df Mon Sep 17 00:00:00 2001 From: Juan Montenegro Torres <60274234+Juanmontenegro99@users.noreply.github.com> Date: Sat, 26 Aug 2023 22:37:48 -0500 Subject: [PATCH] #26 modified function and test --- R/spatiotemporal.R | 381 ++++++++++++++------------- man/endemic_outliers.Rd | 20 +- man/morans_index.Rd | 16 +- tests/testthat/test-spatiotemporal.R | 44 ++-- 4 files changed, 235 insertions(+), 226 deletions(-) diff --git a/R/spatiotemporal.R b/R/spatiotemporal.R index 34dff5b6..d8d9021a 100644 --- a/R/spatiotemporal.R +++ b/R/spatiotemporal.R @@ -1,186 +1,195 @@ -#' Neighborhoods from real travel distances -#' -#' @description Function to build neighborhoods from real travel distances -#' inside Colombia by land or river transport. -#' -#' @param query_vector Codes of the municipalities to consider for the -#' neighborhoods. -#' @param threshold Maximum traveling time around each municipality. - -#' @return neighborhood object according to the introduced threshold. -#' -#' @examples -#' \dontrun{ -#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607) -#' neighborhood(query_vector, 2) -#' } -#' -#' @export -neighborhoods <- function(query_vector, threshold = 2) { - stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector))) - path <- system.file("data", "distance_matrix.rda", package = "epiCo") - load(path) - distance_matrix <- distance_matrix - distance <- distance_matrix[ - which(row.names(distance_matrix) %in% - query_vector), - which(names(distance_matrix) %in% - query_vector) - ] - excluded <- query_vector[!query_vector %in% rownames(distance)] - for (i in excluded) { - warning("municipality ", i, " was not found") - } - adjacency_matrix <- as.matrix(distance <= threshold) - list_weights <- spdep::mat2listw(adjacency_matrix) - neighborhoods <- list_weights$neighbours - return(neighborhoods) -} - -#' Calculate spatial correlation of given municipalities in an incidence_rate -#' object. -#' -#' @description Function to calculate spatial autocorrelation via Moran's Index -#' from a given incidence_rate object grouped by municipality. -#' -#' @importFrom magrittr %>% -#' -#' @param incidence_rate Incidence rate object with only one observation for a -#' group of municipalities. -#' @param threshold Maximum traveling time around each municipality. -#' @param plot if TRUE, returns a plot of influential observations in the -#' Moran's plot. - -#' @return List of Moran's I clustering analysis, giving the quadrant of each -#' observation, influential values. -#' -#' @examples -#' \dontrun{ -#' morans_index(incidence_rate, 2, FALSE) -#' } -#' @export -morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { - stopifnot( - "`incidence_rate` must have observations for only one given date" = - ncol(incidence_rate$rates) == length(incidence_rate$rates) - ) - path_1 <- system.file("data", "divipola_table.rda", package = "epiCo") - load(path_1) - divipola_table <- divipola_table - # Match with DIVIPOLA order - incidence_rate_ordered <- incidence_rate[, order(match( - colnames(incidence_rate$counts), divipola_table$COD_MPIO - ))] - mpios <- colnames(incidence_rate_ordered$counts) - - # Logarithmic transformation - incidence_rate_log <- log(incidence_rate_ordered$rates) - mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)]) - incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)] - - # Neighborhood structure - nb <- neighborhoods(mpios_filtered, threshold) - weights <- spdep::nb2listw(nb, style = "W") - - # Moran's I - morans_i <- spdep::moran.plot(incidence_rate_log, - listw = weights, plot = FALSE - ) - moran_data_frame <- as.data.frame(morans_i) - # Clusters - moran_data_frame <- dplyr::mutate(moran_data_frame, - cluster = dplyr::case_when( - x > mean(x) & wx > - mean(wx) ~ "HH", - x < mean(x) & wx < - mean(wx) ~ "LL", - x > mean(x) & wx < - mean(wx) ~ "HL", - x < mean(x) & wx > - mean(wx) ~ "LH" - ) - ) - inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ] - morans_index <- c( - list(municipios = moran_data_frame$labels), - list(quadrant = moran_data_frame$cluster), - list(influential = moran_data_frame$is_inf), - list(logIncidence = moran_data_frame$x), - list(lagIncidence = moran_data_frame$wx) - ) - if (!all(is.na(morans_index$quadrant))) { - cat(paste("Influential municipalities are:", "\n")) - # Influential observations - for (i in seq_len(nrow(inf_mpios))) { - if (!is.na(inf_mpios$cluster[i])) { - # nolint start: string_boundary_linter - relative_incidence <- ifelse(substr( - inf_mpios$cluster[i], 1, 1 - ) == "H", "high", "low") - # nolint end: string_boundary_linter - relative_correlation <- ifelse(substr( - inf_mpios$cluster[i], 2, 2 - ) == "H", "high", "low") - cat(paste( - inf_mpios$labels[i], "with", relative_incidence, - "incidence and", relative_correlation, "spatial correlation", - "\n" - )) - } - } - } - # Plot - if (plot) { - if (!all(is.na(morans_index$quadrant))) { - path_2 <- system.file("data", "spatial_polygons_col_2.rda", - package = "epiCo" - ) - load(path_2) - spatial_polygons_col_2 <- spatial_polygons_col_2 - pal <- leaflet::colorFactor( - palette = c( - "#ba0001", "#357a38", "#2c7c94", - "#fbe45b" - ), - domain = c("HH", "LL", "LH", "HL"), - ordered = TRUE - ) - pal_test <- pal(c("LL", "HH")) - rm(pal_test) - shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in% - as.integer(inf_mpios$labels), ] - shapes_plot <- shapes[, order(match( - as.integer(inf_mpios$labels), - shapes$MPIO_CDPMP - ))] - shapes_plot$CLUSTER <- inf_mpios$cluster - shapes_plot$MPIO_CDPMP <- inf_mpios$labels - shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[ - divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP - ] - # shapes_ordered - popup_data <- paste0( - "", "Municipality Name: ", "", - shapes_plot$NOM_MPIO, "
", - "", "Municipality Code: ", "", - shapes_plot$MPIO_CDPMP, "
", - "", "Cluster: ", "", - shapes_plot$CLUSTER, "
" - ) - leaflet::leaflet(shapes_plot) %>% - leaflet::addTiles() %>% - leaflet::addPolygons( - stroke = TRUE, - weight = 1, - smoothFactor = 0.2, - fillColor = ~ pal(CLUSTER), - popup = popup_data, - color = "white", - fillOpacity = 0.75 - ) - } else { - warning("There are no influential municipalities to plot") - } - } - return(morans_index) -} +#' Neighborhoods from real travel distances +#' +#' @description Function to build neighborhoods from real travel distances +#' inside Colombia by land or river transport. +#' +#' @param query_vector Codes of the municipalities to consider for the +#' neighborhoods. +#' @param threshold Maximum traveling time around each municipality. + +#' @return neighborhood object according to the introduced threshold. +#' +#' @examples +#' \dontrun{ +#' query_vector <- c(5001, 5002, 5004, 5021, 5030, 5615, 5607) +#' neighborhood(query_vector, 2) +#' } +#' +#' @export +neighborhoods <- function(query_vector, threshold = 2) { + stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector))) + path <- system.file("data", "distance_matrix.rda", package = "epiCo") + load(path) + distance_matrix <- distance_matrix + distance <- distance_matrix[ + which(row.names(distance_matrix) %in% + query_vector), + which(names(distance_matrix) %in% + query_vector) + ] + excluded <- query_vector[!query_vector %in% rownames(distance)] + for (i in excluded) { + warning("municipality ", i, " was not found") + } + adjacency_matrix <- as.matrix(distance <= threshold) + list_weights <- spdep::mat2listw(adjacency_matrix) + neighborhoods <- list_weights$neighbours + return(neighborhoods) +} + +#' Calculate spatial correlation of given municipalities in an incidence_rate +#' object. +#' +#' @description Function to calculate spatial autocorrelation via Moran's Index +#' from a given incidence_rate object grouped by municipality. +#' +#' @importFrom magrittr %>% +#' +#' @param incidence_historic An incidence object with the historic weekly +#' observations +#' @param level Administration level at which incidence counts are grouped. +#' @param scale Scale to consider when calculating the incidence_rate. +#' @param threshold Maximum traveling time around each municipality. +#' @param plot if TRUE, returns a plot of influential observations in the +#' Moran's plot. + +#' @return List of Moran's I clustering analysis, giving the quadrant of each +#' observation, influential values. +#' +#' @examples +#' \dontrun{ +#' morans_index(incidence_rate, 2, FALSE) +#' } +#' @export +morans_index <- function(incidence_historic, level, scale = 100000, + threshold = 2, plot = TRUE) { + stopifnot( + "`incidence_object` must have incidence class" = + (inherits(incidence_historic, "incidence")) + ) + + incidence_rate <- incidence_rate( + incidence_object = incidence_historic, + level = level, scale = scale + ) + + path_1 <- system.file("data", "divipola_table.rda", package = "epiCo") + load(path_1) + divipola_table <- divipola_table + # Match with DIVIPOLA order + incidence_rate_ordered <- incidence_rate[, order(match( + colnames(incidence_rate$counts), divipola_table$COD_MPIO + ))] + mpios <- colnames(incidence_rate_ordered$counts) + + # Logarithmic transformation + incidence_rate_log <- log(incidence_rate_ordered$rates) + mpios_filtered <- as.numeric(mpios[which(incidence_rate_log > -Inf)]) + incidence_rate_log <- incidence_rate_log[which(incidence_rate_log > -Inf)] + + # Neighborhood structure + nb <- neighborhoods(mpios_filtered, threshold) + weights <- spdep::nb2listw(nb, style = "W") + + # Moran's I + morans_i <- spdep::moran.plot(incidence_rate_log, + listw = weights, plot = FALSE + ) + moran_data_frame <- as.data.frame(morans_i) + # Clusters + moran_data_frame <- dplyr::mutate(moran_data_frame, + cluster = dplyr::case_when( + x > mean(x) & wx > + mean(wx) ~ "HH", + x < mean(x) & wx < + mean(wx) ~ "LL", + x > mean(x) & wx < + mean(wx) ~ "HL", + x < mean(x) & wx > + mean(wx) ~ "LH" + ) + ) + inf_mpios <- moran_data_frame[which(moran_data_frame$is_inf), ] + morans_index <- c( + list(municipios = moran_data_frame$labels), + list(quadrant = moran_data_frame$cluster), + list(influential = moran_data_frame$is_inf), + list(logIncidence = moran_data_frame$x), + list(lagIncidence = moran_data_frame$wx) + ) + if (!all(is.na(morans_index$quadrant))) { + cat(paste("Influential municipalities are:", "\n")) + # Influential observations + for (i in seq_len(nrow(inf_mpios))) { + if (!is.na(inf_mpios$cluster[i])) { + # nolint start: string_boundary_linter + relative_incidence <- ifelse(substr( + inf_mpios$cluster[i], 1, 1 + ) == "H", "high", "low") + # nolint end: string_boundary_linter + relative_correlation <- ifelse(substr( + inf_mpios$cluster[i], 2, 2 + ) == "H", "high", "low") + cat(paste( + inf_mpios$labels[i], "with", relative_incidence, + "incidence and", relative_correlation, "spatial correlation", + "\n" + )) + } + } + } + # Plot + if (plot) { + if (!all(is.na(morans_index$quadrant))) { + path_2 <- system.file("data", "spatial_polygons_col_2.rda", + package = "epiCo" + ) + load(path_2) + spatial_polygons_col_2 <- spatial_polygons_col_2 + pal <- leaflet::colorFactor( + palette = c( + "#ba0001", "#357a38", "#2c7c94", + "#fbe45b" + ), + domain = c("HH", "LL", "LH", "HL"), + ordered = TRUE + ) + pal_test <- pal(c("LL", "HH")) + rm(pal_test) + shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in% + as.integer(inf_mpios$labels), ] + shapes_plot <- shapes[, order(match( + as.integer(inf_mpios$labels), + shapes$MPIO_CDPMP + ))] + shapes_plot$CLUSTER <- inf_mpios$cluster + shapes_plot$MPIO_CDPMP <- inf_mpios$labels + shapes_plot$NOM_MPIO <- divipola_table$NOM_MPIO[ + divipola_table$COD_MPIO %in% shapes_plot$MPIO_CDPMP + ] + # shapes_ordered + popup_data <- paste0( + "", "Municipality Name: ", "", + shapes_plot$NOM_MPIO, "
", + "", "Municipality Code: ", "", + shapes_plot$MPIO_CDPMP, "
", + "", "Cluster: ", "", + shapes_plot$CLUSTER, "
" + ) + leaflet::leaflet(shapes_plot) %>% + leaflet::addTiles() %>% + leaflet::addPolygons( + stroke = TRUE, + weight = 1, + smoothFactor = 0.2, + fillColor = ~ pal(CLUSTER), + popup = popup_data, + color = "white", + fillOpacity = 0.75 + ) + } else { + warning("There are no influential municipalities to plot") + } + } + return(morans_index) +} diff --git a/man/endemic_outliers.Rd b/man/endemic_outliers.Rd index 41a6d041..0c2c2950 100644 --- a/man/endemic_outliers.Rd +++ b/man/endemic_outliers.Rd @@ -1,17 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/endemic_channel.R, R/endemic_outliers.R +% Please edit documentation in R/endemic_channel.R \name{endemic_outliers} \alias{endemic_outliers} \title{Modifies the historic incidence to handle with the observations of epidemic years} \usage{ -endemic_outliers( - historic, - outlier_years, - outliers_handling, - geom_method = "shifted" -) - endemic_outliers( historic, outlier_years, @@ -41,14 +34,9 @@ geometric mean and take into account calculation, see: geom_mean} } \value{ -A modified historic incidence - A modified historic incidence } \description{ -Function that modifies an historic incidence by including, -ignoring, or replacing the observations of epidemic years - Function that modifies an historic incidence by including, ignoring, or replacing the observations of epidemic years } @@ -59,10 +47,4 @@ endemic_outliers(historic, outlier_years, outliers_handling, ) } -\dontrun{ -endemic_outliers(historic, outlier_years, outliers_handling, - geom_method = "shifted" -) -} - } diff --git a/man/morans_index.Rd b/man/morans_index.Rd index 69e56184..10a43516 100644 --- a/man/morans_index.Rd +++ b/man/morans_index.Rd @@ -5,11 +5,21 @@ \title{Calculate spatial correlation of given municipalities in an incidence_rate object.} \usage{ -morans_index(incidence_rate, threshold = 2, plot = TRUE) +morans_index( + incidence_historic, + level, + scale = 1e+05, + threshold = 2, + plot = TRUE +) } \arguments{ -\item{incidence_rate}{Incidence rate object with only one observation for a -group of municipalities.} +\item{incidence_historic}{An incidence object with the historic weekly +observations} + +\item{level}{Administration level at which incidence counts are grouped.} + +\item{scale}{Scale to consider when calculating the incidence_rate.} \item{threshold}{Maximum traveling time around each municipality.} diff --git a/tests/testthat/test-spatiotemporal.R b/tests/testthat/test-spatiotemporal.R index b8909f00..b2f619e0 100644 --- a/tests/testthat/test-spatiotemporal.R +++ b/tests/testthat/test-spatiotemporal.R @@ -24,7 +24,7 @@ test_that("Neighborhoods are built as expected", { ## Incidence rate objects for morans Index -# Functional incidence rate +# Functional incidence object set.seed(3) sample_groups <- c(5001, 5148, 5615, 5088, 5266, 5440, 5318, 5368, 5659) sample_data <- sample(sample_groups, 200, replace = TRUE) @@ -36,9 +36,8 @@ incidence_object <- incidence::incidence(sample_df$CASES, interval = "month", group = sample_df$GROUP ) -incidence_rate <- incidence_rate(incidence_object, 2) -# Failing incidence rate +# Failing incidence object sample_data_2 <- as.integer(sample(1:50, 200, replace = TRUE)) sample_dates_2 <- as.Date("2018-12-31") + sample_data_2 sample_groups_2 <- sample(c(5001, 5264, 5615, 5607), 200, replace = TRUE) @@ -47,25 +46,34 @@ incidence_object_2 <- incidence::incidence(sample_df_2$CASES, interval = "weeks", group = sample_df_2$GROUP ) -incidence_rate_2 <- incidence_rate(incidence_object_2, 2) -# Extra incidence rate -sample_df_3 <- sample_df[!sample_df$GROUP %in% c(5001, 5148), ] -incidence_object_3 <- incidence::incidence(sample_df_3$CASES, - interval = "weeks", - group = sample_df_3$GROUP -) -incidence_rate_3 <- incidence_rate(incidence_object_3, 2) test_that("Morans Index errors and warnings are thrown", { - expect_error(morans_index(incidence_object, 2, FALSE)) - expect_error(morans_index(incidence_rate_2, 2, FALSE)) - expect_warning(morans_index(incidence_rate, 12, TRUE), type = "list") + expect_error(morans_index(c(20, 53, 90, 63), 2, plot = FALSE)) + expect_error(morans_index(incidence_object_2, + level = 2, + threshold = 2, plot = FALSE + )) + expect_warning(morans_index(incidence_object, + level = 2, + threshold = 12, plot = TRUE + ), type = "list") }) test_that("Morans Index works as expected", { - expect_type(morans_index(incidence_rate, 1, FALSE), type = "list") - expect_length(morans_index(incidence_rate, 1, FALSE), n = 5) - expect_type(morans_index(incidence_rate, 2, TRUE), type = "list") - expect_type(morans_index(incidence_rate, 12, TRUE), type = "list") + expect_type(morans_index(incidence_object, + level = 2, threshold = 1, plot = FALSE + ), type = "list") + expect_length(morans_index(incidence_object, + level = 2, + threshold = 1, plot = FALSE + ), n = 5) + expect_type(morans_index(incidence_object, + level = 2, + threshold = 2, plot = TRUE + ), type = "list") + expect_type(morans_index(incidence_object, + level = 2, + threshold = 12, plot = TRUE + ), type = "list") })