diff --git a/.gitignore b/.gitignore index e75435c1..5a40bd39 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ po/*~ # RStudio Connect folder rsconnect/ +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index c39a24d1..8b2f099b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,6 @@ RoxygenNote: 7.2.3 Imports: dplyr, spdep, - geosphere, incidence, lubridate, ggplot2, diff --git a/NAMESPACE b/NAMESPACE index da713c50..2780a99b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,13 +1,10 @@ # Generated by roxygen2: do not edit by hand export(age_risk) -export(auto_endemic_channel) export(describe_ethnicity) export(describe_occupation) export(detect_outbreaks_ewma) export(endemic_channel) -export(endemic_outliers) -export(endemic_plot) export(epi_calendar) export(ewma) export(geom_mean) diff --git a/R/demographics.R b/R/demographics.R index fa2c5a1f..78e9edd3 100644 --- a/R/demographics.R +++ b/R/demographics.R @@ -1,421 +1,463 @@ -#' Returns the population pyramid of the consulted region -#' -#' @description Function that returns the population pyramid of the municipality -#' or department of an specific year -#' @param divipola_code A numeric code accounting for the territory of interest -#' @param year A numeric input for year of interest -#' @param gender A boolean to consult data disaggregated by gender -#' @param total A boolean for returning the total number rather than the -#' proportion of the country's population -#' @param plot A boolean for displaying a plot -#' -#' @importFrom rlang .data -#' -#' @return A dataframe with the proportion or total count of individuals -#' @examples -#' \dontrun{ -#' population_pyramid(15001, 2015, total = TRUE, plot = TRUE) -#' } -#' @export -#' -population_pyramid <- function(divipola_code, year, - gender = TRUE, total = TRUE, plot = FALSE) { - stopifnot( - "`year` is only available from 2005 to 2026, - please select a valid year" = (year >= 2005 & year <= 2026), - "`divipola_code` must be numeric" = (is.numeric(divipola_code) & - length(divipola_code) == 1) - ) - path <- system.file("data", "divipola_table.rda", package = "epiCo") - load(path) - divipola_table <- divipola_table - - if (divipola_code == 0) { - path_0 <- system.file("data", "population_projection_col_0.rda", - package = "epiCo" - ) - load(path_0) - population_projection_col_0 <- population_projection_col_0 - pop_data_dpto <- dplyr::filter( - population_projection_col_0, - population_projection_col_0$DP == divipola_code & - population_projection_col_0$ANO == year - ) - - female_total <- as.numeric(pop_data_dpto[104:204]) - male_total <- as.numeric(pop_data_dpto[3:103]) - } else if (divipola_code %in% divipola_table$COD_DPTO) { - path_1 <- system.file("data", "population_projection_col_1.rda", - package = "epiCo" - ) - load(path_1) - population_projection_col_1 <- population_projection_col_1 - pop_data_dpto <- dplyr::filter( - population_projection_col_1, - .data$DP == divipola_code & .data$ANO == year - ) - - female_total <- as.numeric(pop_data_dpto[104:204]) - male_total <- as.numeric(pop_data_dpto[3:103]) - } else if (divipola_code %in% divipola_table$COD_MPIO) { - path_2 <- system.file("data", "population_projection_col_2.rda", - package = "epiCo" - ) - load(path_2) - population_projection_col_2 <- population_projection_col_2 - pop_data_mun <- dplyr::filter( - population_projection_col_2, - .data$DPMP == divipola_code & .data$ANO == year - ) - - female_total <- as.numeric(pop_data_mun[104:204]) - male_total <- as.numeric(pop_data_mun[3:103]) - } else { - stop("There is no location assigned to the consulted DIVIPOLA code") - } - - if (!total) { - female_total <- female_total / sum(female_total) - male_total <- male_total / sum(male_total) - } - - if (gender) { - pop_pyramid <- data.frame( - age = rep(0:100, 2), - population = c(female_total, male_total), - gender = c(rep("F", 101), rep("M", 101)) - ) - } else { - pop_pyramid <- data.frame( - age = 0:100, - population = c(female_total + male_total) - ) - } - - if (plot) { - if (gender) { - pop_pyramid$population <- c(-1 * female_total, male_total) - - pop_pyramid_plot <- ggplot2::ggplot( - pop_pyramid, - ggplot2::aes( - x = .data$age, - y = .data$population, - fill = .data$gender - ) - ) + - ggplot2::geom_bar( - data = dplyr::filter(pop_pyramid, .data$gender == "F"), - stat = "identity" - ) + - ggplot2::geom_bar( - data = dplyr::filter(pop_pyramid, .data$gender == "M"), - stat = "identity" - ) + - ggplot2::coord_flip() - - pop_pyramid$population <- c(female_total, male_total) - } else { - pop_pyramid_plot <- ggplot2::ggplot( - pop_pyramid, - ggplot2::aes( - x = .data$age, - y = .data$population - ) - ) + - ggplot2::geom_bar(stat = "identity") - } - - if (total) { - pop_pyramid_plot <- pop_pyramid_plot + - ggplot2::ylab("Total population") - } else { - pop_pyramid_plot <- pop_pyramid_plot + - ggplot2::ylab("Proportion of population") - } - - print(pop_pyramid_plot) - } - - return(pop_pyramid) -} - - - -#' Returns the probability mass function of being infected given age and gender -#' -#' @description Function that returns the probability of being infected given -#' age and gender -#' -#' @param age A vector with the ages of cases in years -#' @param gender A vector with the gender of cases 'F' and 'M' -#' @param population_pyramid A dataframe with the count of individuals -#' @param plot A boolean for displaying a plot -#' -#' @importFrom rlang .data -#' -#' @return A dataframe with the proportion or total count of individuals -#' @export -#' -#' -age_risk <- function(age, gender = NULL, population_pyramid, plot = FALSE) { - stopifnot("`age` must be a numeric vector" = is.numeric(age)) - if (!is.null(gender)) { - stopifnot( - "`population_pyramid` should include gender" = - (length(population_pyramid) == 3) - ) - ages_female <- age[gender == "F"] - pyramid_female <- dplyr::filter(population_pyramid, .data$gender == "F") - hist_female <- graphics::hist(ages_female, - breaks = 0:101, - right = FALSE, - plot = FALSE - ) - - age_risk_female <- data.frame( - age = pyramid_female$age, - prob = hist_female$counts / pyramid_female$population, - gender = rep("F", 101), - stringsAsFactors = FALSE - ) - - ages_male <- age[gender == "M"] - pyramid_male <- dplyr::filter(population_pyramid, .data$gender == "M") - hist_male <- graphics::hist(ages_male, - breaks = 0:101, - right = FALSE, - plot = FALSE - ) - - age_risk_male <- data.frame( - age = pyramid_male$age, - prob = hist_male$counts / pyramid_male$population, - gender = rep("M", 101), - stringsAsFactors = FALSE - ) - - age_risk <- rbind(age_risk_female, age_risk_male) - } else { - hist_total <- graphics::hist(age, - breaks = 0:101, - right = FALSE, - plot = FALSE - ) - - age_risk <- data.frame( - age = population_pyramid$age, - prob = hist_total$counts / population_pyramid$population - ) - } - - - if (plot) { - if (!is.null(gender)) { - age_risk$prob <- c(-1 * age_risk_female$prob, age_risk_male$prob) - - age_risk_plot <- ggplot2::ggplot( - age_risk, - ggplot2::aes( - x = .data$age, - y = .data$prob, - fill = .data$gender - ) - ) + - ggplot2::geom_bar( - data = dplyr::filter(age_risk, .data$gender == "F"), - stat = "identity" - ) + - ggplot2::geom_bar( - data = dplyr::filter(age_risk, .data$gender == "M"), - stat = "identity" - ) + - ggplot2::coord_flip() - } else { - age_risk_plot <- ggplot2::ggplot(age_risk, ggplot2::aes( - x = .data$age, - y = .data$prob - )) + - ggplot2::geom_bar(stat = "identity") - } - - print(age_risk_plot) - } - - - return(age_risk) -} - -#' Provides the sociological description of ethnicities in Colombia -#' -#' @description Function that returns the description of consulted ethnicities -#' @param ethnic_labels A numeric vector with the codes of ethnicities to -#' consult -#' @param language "ES" for description in spanish "EN" for english -#' -#' @return A printed message with the description of the ethnicities -#' @examples -#' \dontrun{ -#' describe_ethnicity(c(1, 2, 3, 4)) -#' } -#' @export -describe_ethnicity <- function(ethnic_labels, language = "ES") { - stopifnot( - "`ethnic_labels` must be a numeric vector" = - is.numeric(ethnic_labels) - ) - ethnic_labels <- as.data.frame(ethnic_labels) - - #### ESPAÑOL #### - indigena_es <- "Persona de ascendencia amerindia que comparten sentimientos - de identificacion con su pasado aborigen, manteniendo rasgos y valores - propios de su cultura tradicional, asi como formas de organizacion - y control social propios" - - rom_es <- "Son comunidades que tienen una identidad etnica y cultural propia; - se caracterizan por una tradicion nomada, y tienen su propio idioma - que es el romanes" - - raizal_es <- "Poblacion ubicada en el Archipielago de San Andres, Providencia - y Santa Catalina, con raices culturales afroanglo-antillanas, - cuyos integrantes tienen rasgos socioculturales y linguisticos - claramente diferenciados del resto de la poblacion afrocolombiana" - - palenquero_es <- "Poblacion ubicada en el municipio de San Basilio de - Palenque, departamento de Bolivar, donde se habla el palenquero, - lenguaje criollo" - - afro_es <- "Persona de ascendencia afrocolombiana que poseen una cultura - propia, y tienen sus propias tradiciones y costumbre dentro de la relacion - campo-poblado" - - #### ENGLISH #### - indigena_en <- "A person of Amerindian descent who shares feelings of - identification with their aboriginal past, maintaining traits and values - of their traditional culture, as well as their own forms of organization - and social control" - - rom_en <- "They are communities that have their own ethnic and cultural - identity; They are characterized by a nomadic tradition, and have their own - language, which is Romanesque" - - raizal_en <- "Population located in the Archipelago of San Andres, Providencia - and Santa Catalina, with Afro-Anglo-Antillean cultural roots, whose members - have clearly differentiated sociocultural and linguistic traits - from the rest of the Afro-Colombian population" - - palenquero_en <- "Population located in the municipality of San Basilio de - Palenque, department of Bolivar, where palenquero is spoken, - a Creole language" - - afro_en <- "Person of Afro-Colombian descent who have their own culture, - and have their own traditions and customs within the - rural-populated relationship" - - ##### - - descriptions_es <- c(indigena_es, rom_es, raizal_es, palenquero_es, afro_es) - description_en <- c(indigena_en, rom_en, raizal_en, palenquero_en, afro_en) - - labels <- order(unique(ethnic_labels$ethnic_labels)) - - if (language == "EN") { - return(description_en[labels]) - } else { - return(descriptions_es[labels]) - } -} - -#' Get ISCO-88 occupation labels from codes -#' -#' @description Function that translates a vector of ISCO-88 occupation codes -#' into a vector of labels -#' @param isco_codes A numeric vector of ISCO-88 occupation codes -#' (major, submajor, minor or unit) -#' @param output_level A string parameter that defines the level of the desired -#' label (major, submajor, minor or unit) -#' -#' @return A string vector of ISCO-88 labels -#' @examples -#' \dontrun{ -#' describe_occupation(1111, level = 1) -#' } -#' @export -describe_occupation <- function(isco_codes, output_level) { - stopifnot("`isco_codes` must be a numeric vector" = is.numeric(isco_codes)) - path <- system.file("data", "isco88_table.rda", package = "epiCo") - load(path) - isco88_table <- isco88_table - input_level <- dplyr::case_when( - isco_codes %in% c(0, 110) ~ "Armed Forces", - nchar(isco_codes) == 1 ~ "major", - nchar(isco_codes) == 2 ~ "sub_major", - nchar(isco_codes) == 3 ~ "minor", - nchar(isco_codes) == 4 ~ "unit", - TRUE ~ NA_character_ - ) - tryCatch( - { - output_level_index <- as.numeric(sapply(output_level, switch, - "major" = 1, - "major_label" = 1, - "sub_major" = 2, - "sub_major_label" = 2, - "minor" = 3, - "minor_label" = 3, - "unit" = 4, - "unit_label" = 4, - simplify = "array" - )) - }, - error = function(e) { - stop( - "Output level does not exist, please check your input", - call. = FALSE - ) - } - ) - - - input_level_index <- sapply(input_level, switch, - "major" = 1, - "sub_major" = 2, - "minor" = 3, - "unit" = 4, - "Armed Forces" = 0, - simplify = "array" - ) - - isco88_labels <- as.list(input_level) - for (i in seq(1, length(isco_codes))) { - tryCatch( - { - isco_code <- isco_codes[i] - if (isco_code == 0 | isco_code == 110) { - isco88_labels[[i]] <- as.array(isco88_table[ - isco88_table$major == 0, - output_level - ]) - } else if (input_level_index[i] < output_level_index) { - index_start <- match(isco_code, isco88_table[, input_level[i]]) - n_match <- sum(isco88_table[, input_level[i]] == isco_code) - index_end <- index_start + n_match - 1 - isco88_labels[[i]] <- as.array(isco88_table[ - index_start:index_end, - output_level - ]) - } else { - isco88_labels[i] <- isco88_table[which(isco88_table[input_level[i]] - == isco_code)[1], output_level] - } - }, - error = function(e) { - stop( - "Code ", isco_code, " does not exist, please check your input", - call. = FALSE - ) - } - ) - } - return(isco88_labels) -} +#' Returns the population pyramid of the consulted region +#' +#' @description Function that returns the population pyramid of the municipality +#' or department of an specific year +#' @param divipola_code A numeric code accounting for the territory of interest +#' @param year A numeric input for year of interest +#' @param gender A boolean to consult data disaggregated by gender +#' @param range A numeric value from 1 to 100 for the age range to use +#' @param total A boolean for returning the total number rather than the +#' proportion of the country's population +#' @param plot A boolean for displaying a plot +#' +#' @importFrom rlang .data +#' +#' @return A dataframe with the proportion or total count of individuals +#' @examples +#' \dontrun{ +#' population_pyramid(15001, 2015, total = TRUE, plot = TRUE) +#' } +#' @export +#' +population_pyramid <- function(divipola_code, year, + gender = TRUE, range = 5, total = TRUE, + plot = FALSE) { + stopifnot( + "`year` is only available from 2005 to 2026, + please select a valid year" = (year >= 2005 & year <= 2026), + "`divipola_code` must be numeric" = (is.numeric(divipola_code) & + length(divipola_code) == 1), + "`range` must be a numeric value between 1 and 100" = (is.numeric(range)) + ) + path <- system.file("extdata", "divipola_table.rda", package = "epiCo") + load(path) + divipola_table <- divipola_table + + if (divipola_code == 0) { + path_0 <- system.file("extdata", "population_projection_col_0.rda", + package = "epiCo" + ) + load(path_0) + population_projection_col_0 <- population_projection_col_0 + pop_data_dpto <- dplyr::filter( + population_projection_col_0, + ((population_projection_col_0$DP == divipola_code) & + (population_projection_col_0$ANO == year)) + ) + + female_counts <- as.numeric(pop_data_dpto[104:204]) + male_counts <- as.numeric(pop_data_dpto[3:103]) + } else if (divipola_code %in% divipola_table$COD_DPTO) { + path_1 <- system.file("extdata", "population_projection_col_1.rda", + package = "epiCo" + ) + load(path_1) + population_projection_col_1 <- population_projection_col_1 + pop_data_dpto <- dplyr::filter( + population_projection_col_1, + ((.data$DP == divipola_code) & (.data$ANO == year)) + ) + + female_counts <- as.numeric(pop_data_dpto[104:204]) + male_counts <- as.numeric(pop_data_dpto[3:103]) + } else if (divipola_code %in% divipola_table$COD_MPIO) { + path_2 <- system.file("extdata", "population_projection_col_2.rda", + package = "epiCo" + ) + load(path_2) + population_projection_col_2 <- population_projection_col_2 + pop_data_mun <- dplyr::filter( + population_projection_col_2, + ((.data$DPMP == divipola_code) & (.data$ANO == year)) + ) + + female_counts <- as.numeric(pop_data_mun[104:204]) + male_counts <- as.numeric(pop_data_mun[3:103]) + } else { + stop("There is no location assigned to the consulted DIVIPOLA code") + } + + female_total <- vector(length = length(seq( + 1, length(female_counts) - range, + range + ))) + male_total <- vector(length = length(seq( + 1, length(female_counts) - range, + range + ))) + cont <- 1 + for (h in seq(1, length(female_counts) - range, range)) { + female_total[cont] <- sum(female_counts[h:h + range]) + male_total[cont] <- sum(male_counts[h:h + range]) + cont <- cont + 1 + } + + if (!total) { + female_total <- female_total / sum(female_total) + male_total <- male_total / sum(male_total) + } + + if (gender) { + pop_pyramid <- data.frame( + age = rep(seq(0, length(female_counts) - range, range), 2), + population = c(female_total, male_total), + gender = c( + rep("F", ceiling((length(female_counts) - range) / range)), + rep("M", ceiling((length(male_counts) - range) / range)) + ) + ) + } else { + pop_pyramid <- data.frame( + age = seq(1, length(female_counts) - range, range), + population = c(female_total + male_total) + ) + } + + if (plot) { + if (gender) { + pop_pyramid$population <- c(-1 * female_total, male_total) + + pop_pyramid_plot <- ggplot2::ggplot( + pop_pyramid, + ggplot2::aes( + x = .data$age, + y = .data$population, + fill = .data$gender + ) + ) + + ggplot2::geom_bar( + data = dplyr::filter(pop_pyramid, .data$gender == "F"), + stat = "identity" + ) + + ggplot2::geom_bar( + data = dplyr::filter(pop_pyramid, .data$gender == "M"), + stat = "identity" + ) + + ggplot2::coord_flip() + + pop_pyramid$population <- c(female_total, male_total) + } else { + pop_pyramid_plot <- ggplot2::ggplot( + pop_pyramid, + ggplot2::aes( + x = .data$age, + y = .data$population + ) + ) + + ggplot2::geom_bar(stat = "identity") + + ggplot2::coord_flip() + } + + if (total) { + pop_pyramid_plot <- pop_pyramid_plot + + ggplot2::ylab("Total population") + } else { + pop_pyramid_plot <- pop_pyramid_plot + + ggplot2::ylab("Proportion of population") + } + + print(pop_pyramid_plot) + } + + return(pop_pyramid) +} + + + +#' Returns the probability mass function of being infected given age and gender +#' +#' @description Function that returns the probability of being infected given +#' age and gender +#' +#' @param age A vector with the ages of cases in years +#' @param gender A vector with the gender of cases 'F' and 'M' +#' @param population_pyramid A dataframe with the count of individuals +#' @param plot A boolean for displaying a plot +#' +#' @importFrom rlang .data +#' +#' @return A dataframe with the proportion or total count of individuals +#' @export +#' +#' +age_risk <- function(age, gender = NULL, population_pyramid, plot = FALSE) { + stopifnot("`age` must be a numeric vector" = is.numeric(age)) + if (!is.null(gender)) { + stopifnot( + "`gender` does not have the same number of elements as `age`" = + (length(gender) == length(age)), + "`population_pyramid` should include gender" = + (length(population_pyramid) == 3) + ) + age_female <- age[gender == "F"] + pyramid_female <- dplyr::filter(population_pyramid, .data$gender == "F") + hist_female <- graphics::hist(age_female, + breaks = c( + 0, + pyramid_female$age + + (pyramid_female$age[2] - + pyramid_female$age[1]) + ), + plot = FALSE + ) + + age_risk_female <- data.frame( + age = pyramid_female$age, + prob = hist_female$counts / pyramid_female$population, + gender = rep("F", length(pyramid_female$age)), + stringsAsFactors = FALSE + ) + + age_male <- age[gender == "M"] + pyramid_male <- dplyr::filter(population_pyramid, .data$gender == "M") + hist_male <- graphics::hist(age_male, + breaks = c( + 0, + pyramid_male$age + + (pyramid_male$age[2] - + pyramid_male$age[1]) + ), + plot = FALSE + ) + + age_risk_male <- data.frame( + age = pyramid_male$age, + prob = hist_male$counts / pyramid_male$population, + gender = rep("M", length(pyramid_male$age)), + stringsAsFactors = FALSE + ) + + age_risk <- rbind(age_risk_female, age_risk_male) ###### + } else { + if (length(population_pyramid) == 3) { + population_pyramid <- aggregate(population ~ age, population_pyramid, sum) + } + hist_total <- graphics::hist(age, + breaks = c( + 0, + population_pyramid$age + + (population_pyramid$age[2] - + population_pyramid$age[1]) + ), + plot = FALSE + ) + + age_risk <- data.frame( + age = population_pyramid$age, + prob = hist_total$counts / population_pyramid$population + ) + } + + + if (plot) { + if (!is.null(gender)) { + age_risk$prob <- c(-1 * age_risk_female$prob, age_risk_male$prob) + + age_risk_plot <- ggplot2::ggplot( + age_risk, + ggplot2::aes( + x = .data$age, + y = .data$prob, + fill = .data$gender + ) + ) + + ggplot2::geom_bar( + data = dplyr::filter(age_risk, .data$gender == "F"), + stat = "identity" + ) + + ggplot2::geom_bar( + data = dplyr::filter(age_risk, .data$gender == "M"), + stat = "identity" + ) + + ggplot2::coord_flip() + } else { + age_risk_plot <- ggplot2::ggplot(age_risk, ggplot2::aes( + x = .data$age, + y = .data$prob + )) + + ggplot2::geom_bar(stat = "identity") + + ggplot2::coord_flip() + } + + print(age_risk_plot) + } + + + return(age_risk) +} + +#' Provides the sociological description of ethnicities in Colombia +#' +#' @description Function that returns the description of consulted ethnicities +#' @param ethnic_labels A numeric vector with the codes of ethnicities to +#' consult +#' @param language "ES" for description in spanish "EN" for english +#' +#' @return A printed message with the description of the ethnicities +#' @examples +#' \dontrun{ +#' describe_ethnicity(c(1, 2, 3, 4)) +#' } +#' @export +describe_ethnicity <- function(ethnic_labels, language = "ES") { + stopifnot( + "`ethnic_labels` must be a numeric vector" = + is.numeric(ethnic_labels) + ) + ethnic_labels <- as.data.frame(ethnic_labels) + + #### ESPAOL #### + indigena_es <- "Persona de ascendencia amerindia que comparten sentimientos + de identificacion con su pasado aborigen, manteniendo rasgos y valores + propios de su cultura tradicional, asi como formas de organizacion + y control social propios" + + rom_es <- "Son comunidades que tienen una identidad etnica y cultural propia; + se caracterizan por una tradicion nomada, y tienen su propio idioma + que es el romanes" + + raizal_es <- "Poblacion ubicada en el Archipielago de San Andres, Providencia + y Santa Catalina, con raices culturales afroanglo-antillanas, + cuyos integrantes tienen rasgos socioculturales y linguisticos + claramente diferenciados del resto de la poblacion afrocolombiana" + + palenquero_es <- "Poblacion ubicada en el municipio de San Basilio de + Palenque, departamento de Bolivar, donde se habla el palenquero, + lenguaje criollo" + + afro_es <- "Persona de ascendencia afrocolombiana que poseen una cultura + propia, y tienen sus propias tradiciones y costumbre dentro de la relacion + campo-poblado" + + #### ENGLISH #### + indigena_en <- "A person of Amerindian descent who shares feelings of + identification with their aboriginal past, maintaining traits and values + of their traditional culture, as well as their own forms of organization + and social control" + + rom_en <- "They are communities that have their own ethnic and cultural + identity; They are characterized by a nomadic tradition, and have their own + language, which is Romanesque" + + raizal_en <- "Population located in the Archipelago of San Andres, Providencia + and Santa Catalina, with Afro-Anglo-Antillean cultural roots, whose members + have clearly differentiated sociocultural and linguistic traits + from the rest of the Afro-Colombian population" + + palenquero_en <- "Population located in the municipality of San Basilio de + Palenque, department of Bolivar, where palenquero is spoken, + a Creole language" + + afro_en <- "Person of Afro-Colombian descent who have their own culture, + and have their own traditions and customs within the + rural-populated relationship" + + ##### + + descriptions_es <- c(indigena_es, rom_es, raizal_es, palenquero_es, afro_es) + description_en <- c(indigena_en, rom_en, raizal_en, palenquero_en, afro_en) + + labels <- order(unique(ethnic_labels$ethnic_labels)) + + if (language == "EN") { + return(description_en[labels]) + } else { + return(descriptions_es[labels]) + } +} + +# nolint start +#' Get ISCO-88 occupation labels from codes +#' +#' @description Function that translates a vector of ISCO-88 occupation codes +#' into a vector of labels +#' @param isco_codes A numeric vector of ISCO-88 occupation codes +#' (major, submajor, minor or unit) +#' @param output_level A string parameter that defines the level of the desired +#' label (major, submajor, minor or unit) +#' +#' @return A string vector of ISCO-88 labels +#' @examples +#' \dontrun{ +#' describe_occupation(1111, level = 1) +#' } +#' @export +describe_occupation <- function(isco_codes, output_level) { + stopifnot("`isco_codes` must be a numeric vector" = is.numeric(isco_codes)) + path <- system.file("extdata", "isco88_table.rda", package = "epiCo") + load(path) + isco88_table <- isco88_table + input_level <- dplyr::case_when( + isco_codes %in% c(0, 110) ~ "Armed Forces", + nchar(isco_codes) == 1 ~ "major", + nchar(isco_codes) == 2 ~ "sub_major", + nchar(isco_codes) == 3 ~ "minor", + nchar(isco_codes) == 4 ~ "unit", + TRUE ~ NA_character_ + ) + tryCatch( + { + output_level_index <- as.numeric(sapply(output_level, switch, + "major" = 1, + "major_label" = 1, + "sub_major" = 2, + "sub_major_label" = 2, + "minor" = 3, + "minor_label" = 3, + "unit" = 4, + "unit_label" = 4, + simplify = "array" + )) + }, + error = function(e) { + stop( + "Output level does not exist, please check your input", + call. = FALSE + ) + } + ) + + + input_level_index <- sapply(input_level, switch, + "major" = 1, + "sub_major" = 2, + "minor" = 3, + "unit" = 4, + "Armed Forces" = 0, + simplify = "array" + ) + + isco88_labels <- as.list(input_level) + for (i in seq(1, length(isco_codes))) { + tryCatch( + { + isco_code <- isco_codes[i] + if (isco_code == 0 | isco_code == 110) { + isco88_labels[[i]] <- as.array(isco88_table[ + isco88_table$major == 0, + output_level + ]) + } else if (input_level_index[i] < output_level_index) { + index_start <- match(isco_code, isco88_table[, input_level[i]]) + n_match <- sum(isco88_table[, input_level[i]] == isco_code) + index_end <- index_start + n_match - 1 + isco88_labels[[i]] <- as.array(isco88_table[ + index_start:index_end, + output_level + ]) + } else { + isco88_labels[i] <- isco88_table[which(isco88_table[input_level[i]] + == isco_code)[1], output_level] + } + }, + error = function(e) { + stop( + "Code ", isco_code, " does not exist, please check your input", + call. = FALSE + ) + } + ) + } + return(isco88_labels) +} +# nolint end diff --git a/R/distance_matrix.R b/R/distance_matrix.R deleted file mode 100644 index 1df93d17..00000000 --- a/R/distance_matrix.R +++ /dev/null @@ -1,8 +0,0 @@ -#' Distance matrix between municipalities -#' -#' @title distance_matrix -#' @description Distance matrix between all Colombia's municipalities by land -#' or river. -#' @name distance_matrix -#' @usage data(distance_matrix) -"distance_matrix" diff --git a/R/endemic_channel.R b/R/endemic_channel.R index c02642e9..73f99ccf 100644 --- a/R/endemic_channel.R +++ b/R/endemic_channel.R @@ -1,168 +1,3 @@ -#' Returns an automated endemic channel of a disease -#' -#' @description Function that performs automated data wrangling necessary for -#' an automated endemic channel given a method and an specific disease, location -#' and year. -#' -#' @param disease_name Disease to be consulted -#' @param divipola_code DIVIPOLA code of the municipality where the disease is -#' consulted -#' @param year Year of observations -#' @param observations A numeric vector with the current observations (Optional) -#' @param location TBD -#' @param window A numeric value to specify the number of previous and -#' posterior periods to include in the calculation of the current period mean -#' @param method A string with the mean calculation method of preference -#' (median, mean, or geometric) or to use unusual behavior method (Poisson -#' Distribution Test for hypoendemic settings) -#' @param geom_method A string with the selected method for geometric mean -#' calculation, see: geom_mean -#' @param outlier_years A numeric vector with the outlier years -#' @param outliers_handling A string with the handling decision regarding -#' outlier years -#' @param ci = 0.95 A numeric value to specify the confidence interval to use -#' with the geometric method -#' @param plot A boolean for displaying a plot -#' -#' @return TBD -#' -#' @examples -#' \dontrun{ -#' unusual_behaviour(incidence_historic) -#' } -#' -#' @export -auto_endemic_channel <- function(disease_name, divipola_code, year, - observations = NULL, - location = "O", window = 7, - method = "geometric", geom_method = "shifted", - outlier_years = NULL, - outliers_handling = "ignored", - ci = 0.95, - plot = TRUE) { - ## Data import and cleaning #### - - years_to_analyze <- seq(year - window + 1, year) - - events_to_analyze <- disease_name - - tags_to_analyze <- c( - "FEC_NOT", - "COD_PAIS_O", "COD_DPTO_O", "COD_MUN_O", - "COD_DPTO_R", "COD_MUN_R", - "COD_DPTO_N", "COD_MUN_N" - ) - - disease_data <- data.frame(matrix(ncol = length(tags_to_analyze), nrow = 0)) - colnames(disease_data) <- tags_to_analyze - - for (y in years_to_analyze) { - for (e in events_to_analyze) { - temp_data <- sivirep::import_data_event(y, e) - temp_data$FEC_NOT <- as.character(temp_data$FEC_NOT) - temp_data$FEC_NOT <- format( - as.Date(temp_data$FEC_NOT, - # nolint start: nonportable_path_linter - tryFormats = c("%Y-%m-%d", "%d/%m/%Y") - # nolint end: nonportable_path_linter - ), - "%Y-%m-%d" - ) - disease_data <- rbind( - disease_data, - dplyr::select( - temp_data, - tags_to_analyze - ) - ) - } - } - - ## Dates and DIVIPOLA codes preparation and cleaning - - disease_data <- disease_data %>% dplyr::mutate( - COD_MUN_R = dplyr::case_when( - .data$COD_DPTO_R == 1 ~ .data$COD_PAIS_O, # 1 indicates residence abroad - nchar(.data$COD_MUN_R) == 1 ~ - as.numeric(paste(.data$COD_DPTO_R, - .data$COD_MUN_R, - sep = "00" - )), - nchar(.data$COD_MUN_R) == 2 ~ - as.numeric(paste(.data$COD_DPTO_R, - .data$COD_MUN_R, - sep = "0" - )), - nchar(.data$COD_MUN_R) == 3 ~ - as.numeric(paste0(.data$COD_DPTO_R, - .data$COD_MUN_R - )), - TRUE ~ NA_real_ - ), - COD_MUN_O = dplyr::case_when( - # 1 indicates infection occurred abroad - .data$COD_DPTO_O == 1 ~ .data$COD_PAIS_O, - nchar(.data$COD_MUN_O) == 1 ~ - as.numeric(paste(.data$COD_DPTO_O, - .data$COD_MUN_O, - sep = "00" - )), - nchar(.data$COD_MUN_O) == 2 ~ - as.numeric(paste(.data$COD_DPTO_O, - .data$COD_MUN_O, - sep = "0" - )), - nchar(.data$COD_MUN_O) == 3 ~ - as.numeric(paste0(.data$COD_DPTO_O, - .data$COD_MUN_O - )), - TRUE ~ NA_real_ - ), - EPI_WEEK = lubridate::epiweek(.data$FEC_NOT), - EPI_MONTH = lubridate::month(.data$FEC_NOT), - EPI_YEAR = lubridate::epiyear(.data$FEC_NOT) - ) - - # Cleaning of cases without specified municipalities - - disease_data <- dplyr::filter(disease_data, !is.na(disease_data$COD_MUN_O)) - disease_data <- dplyr::filter(disease_data, !is.na(disease_data$COD_MUN_R)) - disease_data <- dplyr::filter(disease_data, !is.na(disease_data$COD_MUN_N)) - - # Cleaning of cases out of the years range - - disease_data <- dplyr::filter( - disease_data, - .data$EPI_YEAR %in% years_to_analyze - ) - - # Cleaning of cases from abroad - - disease_data <- dplyr::filter(disease_data, .data$COD_PAIS_O == 170) - - # Cleaning of typos - path <- system.file("data", "divipola_table.rda", package = "epiCo") - load(path) - divipola_table <- divipola_table - - typos <- which(disease_data$COD_PAIS_O == 170 & - !(disease_data$COD_MUN_O %in% divipola_table$COD_MPIO)) - - disease_data <- disease_data[-typos, ] - - ##### - - - disease_data <- dplyr::filter(disease_data, .data$COD_MUN_O == divipola_code) - - interval <- ifelse(method == "unusual_behavior", "1 month", "1 epiweek") - - incidence_historic <- incidence::incidence(disease_data$FEC_NOT, - interval = interval - ) - - endemic_channel(observations, incidence_historic, plot = TRUE) -} #' Returns the endemic channel of a disease #' #' Function that builds the endemic channel of a disease time series based on @@ -199,29 +34,107 @@ endemic_channel <- function(incidence_historic, observations = NULL, stopifnot( "`incidence_historic` must be an incidence object" = inherits(incidence_historic, "incidence"), - "`observations`must be numeric and positive" = - (is.numeric(observations) & sign(observations) != -1), + "`incidence_historic` must contain at least one complete epidemiological + year" = + ((incidence_historic$interval == "1 week" & + length(incidence_historic$dates) >= 52) || + (incidence_historic$interval == "1 month" & + length(incidence_historic$dates) >= 12)), "incidence interval should be `1 month`, `1 week` or `1 epiweek`" = incidence_historic$interval %in% - c("1 month", "1 week", "1 epiweek") - ) - ifelse(incidence_historic$interval == "1 month", - period <- 12, - period <- 52 + c("1 month", "1 week", "1 epiweek"), + "`method` should be `median`, `mean`, `geometric` or `unusual behavior`" = + method %in% + c("median", "mean", "geometric", "unusual_behavior"), + "`ci` must be a number between 0 and 1" = + (ci >= 0 & ci <= 1 & is.numeric(ci)), + "`plot` must be a boolean" = + (is.logical(plot)) ) - obs <- c(observations, rep(NA, period - length(observations))) - years <- unique(lubridate::epiyear(incidence::get_dates(incidence_historic))) - extra_weeks <- which(lubridate::epiweek(incidence_historic$dates) == 53) + if (!is.null(observations)) { + stopifnot( + "`observations` must be numeric and positive" = + (is.numeric(observations) && + all(observations >= 0)), + "`observations` size doesn't correspond to incidence interval" = + ((incidence_historic$interval == "1 week" & + length(observations) == 52) || + (incidence_historic$interval == "1 month" & + length(observations) == 12)) + ) + } - ifelse(incidence_historic$interval == "1 month", + extra_weeks <- which(lubridate::epiweek(incidence_historic$dates) == 53) + if (incidence_historic$interval == "1 month") { + period <- 12 + if (lubridate::month(incidence_historic$dates[1]) != 1) { + first_year <- lubridate::year(incidence_historic$dates[1]) + new_date <- paste(as.character(first_year + 1), "01-01", sep = "-") + incidence_historic <- incidence_historic[incidence_historic$dates >= + as.Date(new_date)] + msg <- paste( + "Data prior to", new_date, + "were not used for the endemic channel calculation." + ) + warning(msg) + } + if (lubridate::month(incidence_historic$dates[ + length(incidence_historic$dates) + ]) != 12) { + last_year <- lubridate::year(incidence_historic$dates[ + length(incidence_historic$dates) + ]) + new_date <- paste(as.character(last_year - 1), "12-01", sep = "-") + incidence_historic <- incidence_historic[incidence_historic$dates <= + as.Date(new_date)] + msg <- paste( + "Data after", new_date, + "were not used for the endemic channel calculation." + ) + warning(msg) + } counts_historic <- as.numeric(incidence::get_counts( incidence_historic - )), + )) + } else if (incidence_historic$interval == "1 week") { + period <- 52 + first_year <- lubridate::epiyear(incidence_historic$dates[1]) + if (incidence_historic$dates[1] != epiCo::epi_calendar(first_year)[1]) { + new_date <- epiCo::epi_calendar(first_year + 1)[1] + incidence_historic <- incidence_historic[incidence_historic$dates >= + new_date] + msg <- paste( + "Data prior to", new_date, + "were not used for the endemic channel calculation." + ) + warning(msg) + } + last_year <- lubridate::epiyear(incidence_historic$dates[ + length(incidence_historic$dates) + ]) + last_epidate <- epiCo::epi_calendar(last_year)[ + length(epiCo::epi_calendar(last_year)) + ] + if (incidence_historic$dates[length(incidence_historic$dates)] != + last_epidate) { + new_date <- epiCo::epi_calendar(last_year - 1)[ + length(epiCo::epi_calendar(last_year - 1)) + ] + incidence_historic <- incidence_historic[incidence_historic$dates <= + as.Date(new_date)] + msg <- paste( + "Data after", new_date, + "were not used for the endemic channel calculation." + ) + warning(msg) + } counts_historic <- as.numeric(incidence::get_counts( incidence_historic )[-extra_weeks]) - ) + } + obs <- c(observations, rep(NA, period - length(observations))) + years <- unique(lubridate::epiyear(incidence::get_dates(incidence_historic))) historic <- as.data.frame(matrix(counts_historic, nrow = length(years), @@ -235,63 +148,66 @@ endemic_channel <- function(incidence_historic, observations = NULL, geom_method ) - if (method == "median") { - central <- as.numeric(apply(historic, - MARGIN = 2, - FUN = stats::quantile, p = 0.5 - )) - up_lim <- as.numeric(apply(historic, - MARGIN = 2, - FUN = stats::quantile, p = 0.75 - )) - low_lim <- as.numeric(apply(historic, - MARGIN = 2, - FUN = stats::quantile, p = 0.25 - )) - } else if (method == "mean") { - central <- as.numeric(colMeans(historic)) - interval <- as.numeric(apply(historic, - MARGIN = 2, FUN = function(x) { - stats::qt( - p = c((1 - ci) / 2), - df = length(x) - 1 - ) * - stats::sd(x) / sqrt(length(x)) - } - )) - up_lim <- central + abs(interval) - low_lim <- central - abs(interval) - } else if (method == "geometric") { - central <- as.numeric(apply(historic, - MARGIN = 2, - FUN = geom_mean, method = geom_method - )) - interval <- as.numeric(apply(historic, - MARGIN = 2, FUN = function(x) { - stats::qt( - p = c((1 - ci) / 2), - df = length(x) - 1 - ) * - stats::sd(x) / sqrt(length(x)) + switch(method, + median = { + central <- as.numeric(apply(historic, + MARGIN = 2, + FUN = stats::quantile, p = 0.5 + )) + up_lim <- as.numeric(apply(historic, + MARGIN = 2, + FUN = stats::quantile, p = 0.75 + )) + low_lim <- as.numeric(apply(historic, + MARGIN = 2, + FUN = stats::quantile, p = 0.25 + )) + }, + mean = { + central <- as.numeric(colMeans(historic)) + interval <- as.numeric(apply(historic, + MARGIN = 2, FUN = function(x) { + stats::qt( + p = c((1 - ci) / 2), + df = length(x) - 1 + ) * + stats::sd(x) / sqrt(length(x)) + } + )) + up_lim <- central + abs(interval) + low_lim <- central - abs(interval) + }, + geometric = { + central <- as.numeric(apply(historic, + MARGIN = 2, + FUN = geom_mean, method = geom_method + )) + interval <- as.numeric(apply(historic, + MARGIN = 2, FUN = function(x) { + stats::qt( + p = c((1 - ci) / 2), + df = length(x) - 1 + ) * + stats::sd(x) / sqrt(length(x)) + } + )) + up_lim <- central + abs(interval) + low_lim <- central - abs(interval) + }, + unusual_behavior = { + central <- as.numeric(colMeans(historic)) + up_lim <- NULL + low_lim <- NULL + for (c in central) { + poiss_test <- stats::poisson.test(round(c), + alternative = "two.sided", + conf.level = ci + ) + up_lim <- c(up_lim, poiss_test$conf.int[2]) + low_lim <- c(low_lim, poiss_test$conf.int[1]) } - )) - up_lim <- central + abs(interval) - low_lim <- central - abs(interval) - } else if (method == "unusual_behavior") { - central <- as.numeric(colMeans(historic)) - up_lim <- NULL - low_lim <- NULL - for (c in central) { - poiss_test <- stats::poisson.test(round(c), - alternative = "two.sided", - conf.level = ci - ) - up_lim <- c(up_lim, poiss_test$conf.int[2]) - low_lim <- c(low_lim, poiss_test$conf.int[1]) } - } else { - return("Error in central tendency method") - } + ) channel_data <- data.frame( central = central, @@ -348,7 +264,7 @@ endemic_channel <- function(incidence_historic, observations = NULL, #' ) #' } #' -#' @export +#' @keywords internal endemic_outliers <- function(historic, outlier_years, outliers_handling, geom_method = "shifted") { if (outliers_handling == "included") { @@ -404,7 +320,7 @@ endemic_outliers <- function(historic, outlier_years, outliers_handling, #' endemic_plot(channel_data, method, outlier_years, outliers_handling) #' } #' -#' @export +#' @keywords internal endemic_plot <- function(channel_data, method, outlier_years, outliers_handling) { endemic_channel_plot <- ggplot2::ggplot( diff --git a/R/population_projection_col.R b/R/population_projection_col.R deleted file mode 100644 index 0ff39cc5..00000000 --- a/R/population_projection_col.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Colombian Population projection -#' -#' @title population_projection_col -#' @description Documentation for datasets regarding population projection -#' -#' @name epiCo-population-data -NULL - -#' Colombian population projection -#' @rdname population_projection_col_0 -#' @description Population projection of Colombia. -#' @usage data(population_projection_col_0) -"population_projection_col_0" -#' Department population projection -#' @rdname population_projection_col -#' @description Population projection of the departments of Colombia. -#' @usage data(population_projection_col_1) -"population_projection_col_1" -#' Municipality population projection -#' @rdname population_projection_col -#' @description Population projection of the municipalities of Colombia. -#' @usage data(population_projection_col_2) -"population_projection_col_2" diff --git a/R/spatial_polygons_col.R b/R/spatial_polygons_col.R deleted file mode 100644 index a704032a..00000000 --- a/R/spatial_polygons_col.R +++ /dev/null @@ -1,23 +0,0 @@ -#' Spatial polygons of Colombia -#' -#' @title spatial_polygons_col -#' @description Documentation for datasets regarding spatial polygons -#' -#' @name epiCo-spatial-data -NULL - -#' Colombian spatial polygons -#' @rdname spatial_polygons_col -#' @description Spatial polygon of Colombia. -#' @usage data(spatial_polygons_col_0) -"spatial_polygons_col_0" -#' Departments spatial polygons -#' @rdname spatial_polygons_col -#' @description Spatial polygons of the departments of Colombia. -#' @usage data(spatial_polygons_col_1) -"spatial_polygons_col_1" -#' Municipalities spatial polygons -#' @rdname spatial_polygons_col -#' @description Spatial polygons of the municipalities of Colombia. -#' @usage data(spatial_polygons_col_2) -"spatial_polygons_col_2" diff --git a/R/spatiotemporal.R b/R/spatiotemporal.R index 34dff5b6..62211f25 100644 --- a/R/spatiotemporal.R +++ b/R/spatiotemporal.R @@ -6,7 +6,6 @@ #' @param query_vector Codes of the municipalities to consider for the #' neighborhoods. #' @param threshold Maximum traveling time around each municipality. - #' @return neighborhood object according to the introduced threshold. #' #' @examples @@ -18,7 +17,7 @@ #' @export neighborhoods <- function(query_vector, threshold = 2) { stopifnot("`query_vector` must be numeric" = (is.numeric(query_vector))) - path <- system.file("data", "distance_matrix.rda", package = "epiCo") + path <- system.file("extdata", "distance_matrix.rda", package = "epiCo") load(path) distance_matrix <- distance_matrix distance <- distance_matrix[ @@ -45,8 +44,11 @@ neighborhoods <- function(query_vector, threshold = 2) { #' #' @importFrom magrittr %>% #' -#' @param incidence_rate Incidence rate object with only one observation for a -#' group of municipalities. +#' @param incidence_object An object is the incidence of an observation for the +#' different locations. +#' @param level Administration level at which incidence counts are grouped. +#' (0=national, 1=state/department, 2=city/municipality). +#' @param scale Scale to consider when calculating the incidence_rate. #' @param threshold Maximum traveling time around each municipality. #' @param plot if TRUE, returns a plot of influential observations in the #' Moran's plot. @@ -56,15 +58,22 @@ neighborhoods <- function(query_vector, threshold = 2) { #' #' @examples #' \dontrun{ -#' morans_index(incidence_rate, 2, FALSE) +#' morans_index(incidence_object, 2, FALSE) #' } #' @export -morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { +morans_index <- function(incidence_object, level, scale = 100000, threshold = 2, + plot = TRUE) { stopifnot( - "`incidence_rate` must have observations for only one given date" = - ncol(incidence_rate$rates) == length(incidence_rate$rates) + "`incidence_object` must have incidence class" = + (inherits(incidence_object, "incidence")), + "`threshold` must be numeric" = (is.numeric(threshold)), + "`plot` must be boolean" = (is.logical(plot)) + ) + incidence_rate <- incidence_rate( + incidence_object = incidence_object, + level = level, scale = scale ) - path_1 <- system.file("data", "divipola_table.rda", package = "epiCo") + path_1 <- system.file("extdata", "divipola_table.rda", package = "epiCo") load(path_1) divipola_table <- divipola_table # Match with DIVIPOLA order @@ -131,12 +140,15 @@ morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { } # Plot if (plot) { - if (!all(is.na(morans_index$quadrant))) { - path_2 <- system.file("data", "spatial_polygons_col_2.rda", + if (all(is.na(morans_index$quadrant))) { + warning("There are no influential municipalities to plot") + } else { + path_2 <- system.file("extdata", "spatial_polygons_col_2.rda", package = "epiCo" ) load(path_2) spatial_polygons_col_2 <- spatial_polygons_col_2 + # nolint start pal <- leaflet::colorFactor( palette = c( "#ba0001", "#357a38", "#2c7c94", @@ -145,8 +157,7 @@ morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { domain = c("HH", "LL", "LH", "HL"), ordered = TRUE ) - pal_test <- pal(c("LL", "HH")) - rm(pal_test) + # nolint end shapes <- spatial_polygons_col_2[spatial_polygons_col_2$MPIO_CDPMP %in% as.integer(inf_mpios$labels), ] shapes_plot <- shapes[, order(match( @@ -167,7 +178,8 @@ morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { "", "Cluster: ", "", shapes_plot$CLUSTER, "
" ) - leaflet::leaflet(shapes_plot) %>% + # nolint start + clusters_plot <- leaflet::leaflet(shapes_plot) %>% leaflet::addTiles() %>% leaflet::addPolygons( stroke = TRUE, @@ -178,8 +190,8 @@ morans_index <- function(incidence_rate, threshold = 2, plot = TRUE) { color = "white", fillOpacity = 0.75 ) - } else { - warning("There are no influential municipalities to plot") + # nolint end + return(list(moran_data = morans_index, leaflet_map = clusters_plot)) } } return(morans_index) diff --git a/R/utils.R b/R/utils.R index 967bebe4..0a1ea994 100644 --- a/R/utils.R +++ b/R/utils.R @@ -61,9 +61,9 @@ epi_calendar <- function(year, jan_days = 4) { #' @description Function that estimates incidence rates from a incidence class #' object and population projections. #' @param incidence_object An incidence object. -#' @param level Administration level at which incidence counts are grouped. +#' @param level Administration level at which incidence counts are grouped +#' (0 = national, 1 = state/department, 2 = city/municipality). #' @param scale Scale to consider when calculating the incidence_rate. -#' (0=national, 1=state/department, 2=city/municipality). #' #' @return A modified incidence object where counts are normalized with the #' population. @@ -85,7 +85,7 @@ incidence_rate <- function(incidence_object, level, scale = 100000) { dates <- lubridate::year(incidence_object$dates) years <- unique(dates) if (level == 0) { - path_0 <- system.file("data", "population_projection_col_0.rda", + path_0 <- system.file("extdata", "population_projection_col_0.rda", package = "epiCo" ) load(path_0) @@ -94,7 +94,7 @@ incidence_rate <- function(incidence_object, level, scale = 100000) { populations$code <- population_projection_col_0$DP groups <- 0 } else if (level == 1) { - path_1 <- system.file("data", "population_projection_col_1.rda", + path_1 <- system.file("extdata", "population_projection_col_1.rda", package = "epiCo" ) load(path_1) @@ -103,7 +103,7 @@ incidence_rate <- function(incidence_object, level, scale = 100000) { populations$code <- population_projection_col_1$DP groups <- as.numeric(colnames(incidence_object$counts)) } else if (level == 2) { - path_2 <- system.file("data", "population_projection_col_2.rda", + path_2 <- system.file("extdata", "population_projection_col_2.rda", package = "epiCo" ) load(path_2) @@ -123,7 +123,8 @@ incidence_rate <- function(incidence_object, level, scale = 100000) { # } else { populations <- dplyr::filter( populations, - .data$code %in% groups & .data$ANO %in% years + .data$code %in% groups, + .data$ANO %in% years ) incidence_rates <- incidence_object$counts diff --git a/README.md b/README.md index 718899c9..61ddc812 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,17 @@ -# epiCo +# epiCo -*epiCo* provides functions for clustering, regression, now casting, and sup reporting analyses vector-borne diseases in Colombia. - -**R** from common *health information systems*. +*epiCo* provides statistical tools for the analysis of demographic trends, spatiotemporal behavior, and outbreaks characterization of vector-borne diseases in Colombia. - [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![R-CMD-check](https://github.com/epiverse-trace/epico/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/epiverse-trace/epico/actions/workflows/R-CMD-check.yaml) [![Codecov test coverage](https://codecov.io/gh/epiverse-trace/epico/branch/main/graph/badge.svg)](https://app.codecov.io/gh/epiverse-trace/epico?branch=main) -[![lifecycle-concept](https://raw.githubusercontent.com/reconverse/reconverse.github.io/master/images/badge-concept.svg)](https://www.reconverse.org/lifecycle.html#concept) +[![lifecycle-experimental](https://raw.githubusercontent.com/reconverse/reconverse.github.io/master/images/badge-experimental.svg)](https://www.reconverse.org/lifecycle.html#experimental) ## Installation @@ -27,32 +24,34 @@ You can install the development version of *epiCo* from remotes::install_github("epiverse-trace/epiCo") ``` -## Example +## Motivation -These examples illustrate some of the current functionalities: +When reviewing the current epidemiological bulletins published by the local Secretariats of Health in Colombia we identified an opportunity to create a tool to: +- Include spatial and demographic risk assesments into their routine epidemiological reports to better identify population groups for potential interventions +- Facilitate the understanding of the different epidemiological profiles within a region in Colombia regarding onset, duration, magnitude, and frequency of the outbreaks +- Strengthen the transparency of methods used for the outbreaks analyses +- Provide more informative context by performing correlation and regression analyses with local socioeconomic and climate data -``` r -library(epiCo) -library(incidence) -library(qcc) +The package allows to perform interoperable analyses of linelist data from +[SIVIGILA](https://www.ins.gov.co/Direcciones/Vigilancia/Paginas/SIVIGILA.aspx) (accesible using Epiverse-TRACE package [sivirep](https://github.com/epiverse-trace/sivirep)) with spatial, socioeconomic and climate data (accesible using Epiverse-TRACE package [ColOpenData](). -## Occupation labels +*epiCo* can be used to perform the following main tasks at municipality, departmental, or national level: -isco_codes <- c(7321, 2411, 4121, 3439, 3431) -isco_labels <- get_occupationLabels(isco_codes, output_level = "unit_label") +1) To identify demographic vulnerabilities from linelist data and socioeconomic census, including risk assesment based on age, gender, occupation, and ethnicity. +2) To asses hot-spots analyses (as Local Moran's index) based on real travel distances in Colombia estimated from [Bravo-Vega C., Santos-Vega M., & Cordovez J.M. (2022)](https://doi.org/10.1371/journal.pntd.0010270). +3) To generate automated outbreaks characterization (onset, duration, magnitude, and frequency) using traditional methods as the [endemic channel](https://iris.paho.org/handle/10665.2/8562) and poisson tests for unusual behavior, or by Statistical Process Control methods as ARIMA Control Chart for outliers detection. +4) To estimate the probability of the effective reproductive number (Rt) to be higher than one (i.e. outbreak onset) based on work by [Parag, K.V., & Donnelly, C.A. (2022)](https://doi.org/10.1371/journal.pcbi.1010004) & [Codeço, C.T., Villela, D.A., & Coelho, F.C. (2018)](https://doi.org/10.1016/j.epidem.2018.05.011). +5) To perform correlation analyses between categorial socioeconomic data and climate time series, and epidemiological data to generate a report of potential drivers and trends of VBDs outbreaks. +6) *Future features will include nowcasting assesment as underreport estimation and short term forecasting* -## Incidence rates estimation +All functionalities are performed automatically from epidemiological, demographic, spatial and socioconomic data published by Colombian institutions but methods can also be customized, as well as inputed data, so hipothetical information can be tested within the package. -data("dengue_orinoquia_2017") -incidence_object <- incidence(dengue_orinoquia_2017$FEC_NOT, groups = dengue_orinoquia_2017$COD_MUN_O, interval = "1 week") -incidenceRate_object <- estimate_incidenceRate(incidence_object, level = 2) - -## Outbreaks detection +## Example -# EWMA method +These examples illustrate some of the current functionalities: -incidence_arauca <- incidence_object$counts[,"81001"] -outbreaks_object <- detect_outbreaks_EWMA(incidence_arauca, lambda = 0.2, nsigmas = 2) +``` r +library(epiCo) ``` diff --git a/data/epi_data.rda b/data/epi_data.rda new file mode 100644 index 00000000..a40d0fe8 Binary files /dev/null and b/data/epi_data.rda differ diff --git a/epiCo.Rproj b/epiCo.Rproj new file mode 100644 index 00000000..8d82f1f3 --- /dev/null +++ b/epiCo.Rproj @@ -0,0 +1,17 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/data/distance_matrix.rda b/inst/extdata/distance_matrix.rda similarity index 100% rename from data/distance_matrix.rda rename to inst/extdata/distance_matrix.rda diff --git a/inst/extdata/divipola_table.rda b/inst/extdata/divipola_table.rda new file mode 100644 index 00000000..0406f0fe Binary files /dev/null and b/inst/extdata/divipola_table.rda differ diff --git a/inst/extdata/epi_data.rda b/inst/extdata/epi_data.rda new file mode 100644 index 00000000..a40d0fe8 Binary files /dev/null and b/inst/extdata/epi_data.rda differ diff --git a/inst/extdata/isco88_table.rda b/inst/extdata/isco88_table.rda new file mode 100644 index 00000000..86ca1cdf Binary files /dev/null and b/inst/extdata/isco88_table.rda differ diff --git a/data/population_projection_col_0.rda b/inst/extdata/population_projection_col_0.rda similarity index 100% rename from data/population_projection_col_0.rda rename to inst/extdata/population_projection_col_0.rda diff --git a/data/population_projection_col_1.rda b/inst/extdata/population_projection_col_1.rda similarity index 100% rename from data/population_projection_col_1.rda rename to inst/extdata/population_projection_col_1.rda diff --git a/data/population_projection_col_2.rda b/inst/extdata/population_projection_col_2.rda similarity index 100% rename from data/population_projection_col_2.rda rename to inst/extdata/population_projection_col_2.rda diff --git a/data/spatial_polygons_col_0.rda b/inst/extdata/spatial_polygons_col_0.rda similarity index 100% rename from data/spatial_polygons_col_0.rda rename to inst/extdata/spatial_polygons_col_0.rda diff --git a/data/spatial_polygons_col_1.rda b/inst/extdata/spatial_polygons_col_1.rda similarity index 100% rename from data/spatial_polygons_col_1.rda rename to inst/extdata/spatial_polygons_col_1.rda diff --git a/data/spatial_polygons_col_2.rda b/inst/extdata/spatial_polygons_col_2.rda similarity index 100% rename from data/spatial_polygons_col_2.rda rename to inst/extdata/spatial_polygons_col_2.rda diff --git a/man/auto_endemic_channel.Rd b/man/auto_endemic_channel.Rd deleted file mode 100644 index 7f46aef9..00000000 --- a/man/auto_endemic_channel.Rd +++ /dev/null @@ -1,67 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/endemic_channel.R -\name{auto_endemic_channel} -\alias{auto_endemic_channel} -\title{Returns an automated endemic channel of a disease} -\usage{ -auto_endemic_channel( - disease_name, - divipola_code, - year, - observations = NULL, - location = "O", - window = 7, - method = "geometric", - geom_method = "shifted", - outlier_years = NULL, - outliers_handling = "ignored", - ci = 0.95, - plot = TRUE -) -} -\arguments{ -\item{disease_name}{Disease to be consulted} - -\item{divipola_code}{DIVIPOLA code of the municipality where the disease is -consulted} - -\item{year}{Year of observations} - -\item{observations}{A numeric vector with the current observations (Optional)} - -\item{location}{TBD} - -\item{window}{A numeric value to specify the number of previous and -posterior periods to include in the calculation of the current period mean} - -\item{method}{A string with the mean calculation method of preference -(median, mean, or geometric) or to use unusual behavior method (Poisson -Distribution Test for hypoendemic settings)} - -\item{geom_method}{A string with the selected method for geometric mean -calculation, see: geom_mean} - -\item{outlier_years}{A numeric vector with the outlier years} - -\item{outliers_handling}{A string with the handling decision regarding -outlier years} - -\item{ci}{= 0.95 A numeric value to specify the confidence interval to use -with the geometric method} - -\item{plot}{A boolean for displaying a plot} -} -\value{ -TBD -} -\description{ -Function that performs automated data wrangling necessary for -an automated endemic channel given a method and an specific disease, location -and year. -} -\examples{ -\dontrun{ -unusual_behaviour(incidence_historic) -} - -} diff --git a/man/detect_outbreaks_EWMA.Rd b/man/detect_outbreaks_ewma.Rd similarity index 100% rename from man/detect_outbreaks_EWMA.Rd rename to man/detect_outbreaks_ewma.Rd diff --git a/man/distance_matrix.Rd b/man/distance_matrix.Rd deleted file mode 100644 index 69dc2b47..00000000 --- a/man/distance_matrix.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/distance_matrix.R -\docType{data} -\name{distance_matrix} -\alias{distance_matrix} -\title{distance_matrix} -\format{ -An object of class \code{data.frame} with 1121 rows and 1121 columns. -} -\usage{ -data(distance_matrix) -} -\description{ -Distance matrix between all Colombia's municipalities by land -or river. -} -\details{ -Distance matrix between municipalities -} -\keyword{datasets} diff --git a/man/endemic_outliers.Rd b/man/endemic_outliers.Rd index 41a6d041..8b654c93 100644 --- a/man/endemic_outliers.Rd +++ b/man/endemic_outliers.Rd @@ -1,17 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/endemic_channel.R, R/endemic_outliers.R +% Please edit documentation in R/endemic_channel.R \name{endemic_outliers} \alias{endemic_outliers} \title{Modifies the historic incidence to handle with the observations of epidemic years} \usage{ -endemic_outliers( - historic, - outlier_years, - outliers_handling, - geom_method = "shifted" -) - endemic_outliers( historic, outlier_years, @@ -41,14 +34,9 @@ geometric mean and take into account calculation, see: geom_mean} } \value{ -A modified historic incidence - A modified historic incidence } \description{ -Function that modifies an historic incidence by including, -ignoring, or replacing the observations of epidemic years - Function that modifies an historic incidence by including, ignoring, or replacing the observations of epidemic years } @@ -59,10 +47,5 @@ endemic_outliers(historic, outlier_years, outliers_handling, ) } -\dontrun{ -endemic_outliers(historic, outlier_years, outliers_handling, - geom_method = "shifted" -) -} - } +\keyword{internal} diff --git a/man/endemic_plot.Rd b/man/endemic_plot.Rd index cb18b6ca..200edebe 100644 --- a/man/endemic_plot.Rd +++ b/man/endemic_plot.Rd @@ -29,3 +29,4 @@ endemic_plot(channel_data, method, outlier_years, outliers_handling) } } +\keyword{internal} diff --git a/man/epiCo-population-data.Rd b/man/epiCo-population-data.Rd deleted file mode 100644 index 68473fa9..00000000 --- a/man/epiCo-population-data.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/population_projection_col.R -\name{epiCo-population-data} -\alias{epiCo-population-data} -\title{population_projection_col} -\description{ -Documentation for datasets regarding population projection -} -\details{ -Colombian Population projection -} diff --git a/man/epiCo-spatial-data.Rd b/man/epiCo-spatial-data.Rd deleted file mode 100644 index 593920a2..00000000 --- a/man/epiCo-spatial-data.Rd +++ /dev/null @@ -1,11 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_polygons_col.R -\name{epiCo-spatial-data} -\alias{epiCo-spatial-data} -\title{spatial_polygons_col} -\description{ -Documentation for datasets regarding spatial polygons -} -\details{ -Spatial polygons of Colombia -} diff --git a/man/epi_data.Rd b/man/epi_data.Rd new file mode 100644 index 00000000..7ea3f44f --- /dev/null +++ b/man/epi_data.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/epi_data.R +\docType{data} +\name{epi_data} +\alias{epi_data} +\title{epi_data} +\format{ +An object of class \code{tbl_df} (inherits from \code{tbl}, \code{data.frame}) with 31353 rows and 15 columns. +} +\usage{ +data(epi_data) +} +\description{ +Epidemiological data for the Tolima department for the years +2015 to 2021 +} +\details{ +Epidemiological data +} +\keyword{datasets} diff --git a/man/figures/endemic_channel_figure.png b/man/figures/endemic_channel_figure.png new file mode 100644 index 00000000..afa90afe Binary files /dev/null and b/man/figures/endemic_channel_figure.png differ diff --git a/man/figures/logo.jpg b/man/figures/logo.jpg new file mode 100644 index 00000000..c9d4f7c8 Binary files /dev/null and b/man/figures/logo.jpg differ diff --git a/man/incidence_rate.Rd b/man/incidence_rate.Rd index 823bde1c..25d52c2c 100644 --- a/man/incidence_rate.Rd +++ b/man/incidence_rate.Rd @@ -9,10 +9,10 @@ incidence_rate(incidence_object, level, scale = 1e+05) \arguments{ \item{incidence_object}{An incidence object.} -\item{level}{Administration level at which incidence counts are grouped.} +\item{level}{Administration level at which incidence counts are grouped +(0 = national, 1 = state/department, 2 = city/municipality).} -\item{scale}{Scale to consider when calculating the incidence_rate. -(0=national, 1=state/department, 2=city/municipality).} +\item{scale}{Scale to consider when calculating the incidence_rate.} } \value{ A modified incidence object where counts are normalized with the diff --git a/man/morans_index.Rd b/man/morans_index.Rd index 69e56184..d31a46b1 100644 --- a/man/morans_index.Rd +++ b/man/morans_index.Rd @@ -5,11 +5,22 @@ \title{Calculate spatial correlation of given municipalities in an incidence_rate object.} \usage{ -morans_index(incidence_rate, threshold = 2, plot = TRUE) +morans_index( + incidence_object, + level, + scale = 1e+05, + threshold = 2, + plot = TRUE +) } \arguments{ -\item{incidence_rate}{Incidence rate object with only one observation for a -group of municipalities.} +\item{incidence_object}{An object is the incidence of an observation for the +different locations.} + +\item{level}{Administration level at which incidence counts are grouped. +(0=national, 1=state/department, 2=city/municipality).} + +\item{scale}{Scale to consider when calculating the incidence_rate.} \item{threshold}{Maximum traveling time around each municipality.} @@ -26,6 +37,6 @@ from a given incidence_rate object grouped by municipality. } \examples{ \dontrun{ -morans_index(incidence_rate, 2, FALSE) +morans_index(incidence_object, 2, FALSE) } } diff --git a/man/population_projection_col.Rd b/man/population_projection_col.Rd deleted file mode 100644 index df7f9b77..00000000 --- a/man/population_projection_col.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/population_projection_col.R -\docType{data} -\name{population_projection_col_1} -\alias{population_projection_col_1} -\alias{population_projection_col_2} -\title{Department population projection} -\format{ -An object of class \code{data.frame} with 726 rows and 308 columns. - -An object of class \code{data.frame} with 24684 rows and 308 columns. -} -\usage{ -data(population_projection_col_1) - -data(population_projection_col_2) -} -\description{ -Population projection of the departments of Colombia. - -Population projection of the municipalities of Colombia. -} -\keyword{datasets} diff --git a/man/population_projection_col_0.Rd b/man/population_projection_col_0.Rd deleted file mode 100644 index 89c2f697..00000000 --- a/man/population_projection_col_0.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/population_projection_col.R -\docType{data} -\name{population_projection_col_0} -\alias{population_projection_col_0} -\title{Colombian population projection} -\format{ -An object of class \code{data.frame} with 22 rows and 308 columns. -} -\usage{ -data(population_projection_col_0) -} -\description{ -Population projection of Colombia. -} -\keyword{datasets} diff --git a/man/population_pyramid.Rd b/man/population_pyramid.Rd index 65fb816c..66ce2585 100644 --- a/man/population_pyramid.Rd +++ b/man/population_pyramid.Rd @@ -8,6 +8,7 @@ population_pyramid( divipola_code, year, gender = TRUE, + range = 5, total = TRUE, plot = FALSE ) @@ -19,6 +20,8 @@ population_pyramid( \item{gender}{A boolean to consult data disaggregated by gender} +\item{range}{A numeric value from 1 to 100 for the age range to use} + \item{total}{A boolean for returning the total number rather than the proportion of the country's population} diff --git a/man/spatial_polygons_col.Rd b/man/spatial_polygons_col.Rd deleted file mode 100644 index 33598c78..00000000 --- a/man/spatial_polygons_col.Rd +++ /dev/null @@ -1,30 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/spatial_polygons_col.R -\docType{data} -\name{spatial_polygons_col_0} -\alias{spatial_polygons_col_0} -\alias{spatial_polygons_col_1} -\alias{spatial_polygons_col_2} -\title{Colombian spatial polygons} -\format{ -An object of class \code{SpatialPolygonsDataFrame} with 1 rows and 2 columns. - -An object of class \code{SpatialPolygonsDataFrame} with 33 rows and 9 columns. - -An object of class \code{SpatialPolygonsDataFrame} with 1121 rows and 12 columns. -} -\usage{ -data(spatial_polygons_col_0) - -data(spatial_polygons_col_1) - -data(spatial_polygons_col_2) -} -\description{ -Spatial polygon of Colombia. - -Spatial polygons of the departments of Colombia. - -Spatial polygons of the municipalities of Colombia. -} -\keyword{datasets} diff --git a/tests/testthat/test-demographics.R b/tests/testthat/test-demographics.R index b2133718..d6c97c9b 100644 --- a/tests/testthat/test-demographics.R +++ b/tests/testthat/test-demographics.R @@ -19,6 +19,11 @@ test_that("Population pyramid errors are thrown", { divipola_code = c(5001, 5044), year = 2020 )) + expect_error(population_pyramid( + divipola_code = 5001, + year = 2020, + range = "5" + )) }) test_that("Population pyramid obtaines data", { @@ -28,7 +33,7 @@ test_that("Population pyramid obtaines data", { divipola_code = 5001, year = 2020 )), - c(202L, 3L) + c(40L, 3L) ) expect_identical( dim(population_pyramid( @@ -36,7 +41,7 @@ test_that("Population pyramid obtaines data", { year = 2020, gender = FALSE )), - c(101L, 2L) + c(20L, 2L) ) expect_identical( dim(population_pyramid( @@ -44,7 +49,7 @@ test_that("Population pyramid obtaines data", { year = 2020, total = FALSE )), - c(202L, 3L) + c(40L, 3L) ) expect_identical( dim(population_pyramid( @@ -53,7 +58,7 @@ test_that("Population pyramid obtaines data", { gender = FALSE, total = FALSE )), - c(101L, 2L) + c(20L, 2L) ) expect_identical( dim(population_pyramid( @@ -63,22 +68,29 @@ test_that("Population pyramid obtaines data", { total = FALSE, plot = TRUE )), - c(101L, 2L) + c(20L, 2L) ) - expect_identical( dim(population_pyramid( divipola_code = 5, year = 2020 )), - c(202L, 3L) + c(40L, 3L) ) expect_identical( dim(population_pyramid( divipola_code = 0, year = 2020 )), - c(202L, 3L) + c(40L, 3L) + ) + expect_identical( + dim(population_pyramid( + divipola_code = 5001, + year = 2020, + range = 20 + )), + c(10L, 3L) ) }) @@ -131,6 +143,11 @@ test_that("Population pyramid is not NA", { year = 2006, plot = TRUE ))) + expect_false(anyNA(population_pyramid( + divipola_code = 0, + year = 2006, + range = 10 + ))) }) # data for age_risk tests diff --git a/tests/testthat/test-endemic_channel.R b/tests/testthat/test-endemic_channel.R new file mode 100644 index 00000000..f8535f0c --- /dev/null +++ b/tests/testthat/test-endemic_channel.R @@ -0,0 +1,109 @@ +## data for incidence rate + +# dates +set.seed(3) +sample_data <- as.integer(sample(1:3285, 500, replace = TRUE)) +sample_dates <- as.Date("2013-01-01") + sample_data + +sample_dates <- sample_dates[lubridate::year(lubridate::as_date(sample_dates, + origin = "1970-01-01" +)) < 2020] +sample_dates <- c(as.Date("2013-01-01"), sample_dates, as.Date("2019-12-28")) + +sample_df <- data.frame(CASES = sample_dates) + +# incidence objects +historic_data <- incidence::incidence(sample_df$CASES, interval = "1 epiweek") +historic_data_m <- incidence::incidence(sample_df$CASES, interval = "1 month") +historic_data_d <- incidence::incidence(sample_df$CASES, interval = "1 day") +historic_data_short <- historic_data[historic_data$dates <= "2013-05-26", ] +historic_data_w_short <- historic_data[historic_data$dates <= "2017-05-01", ] +historic_data_w_short <- historic_data_w_short[historic_data_w_short$dates >= + "2013-05-01", ] +historic_data_m_short <- historic_data_m[historic_data_m$dates <= + "2017-05-01", ] +historic_data_m_short <- historic_data_m_short[historic_data_m_short$dates >= + "2013-05-01", ] + +test_that("Endemic channel throws expected erors", { + expect_error(endemic_channel(incidence_historic = c(20, 53, 90, 63))) + expect_error(endemic_channel(incidence_historic = historic_data_short)) + expect_error(endemic_channel( + incidence_historic = historic_data, + observations = c(0, 0, -1) + )) + expect_error(endemic_channel( + incidence_historic = historic_data_d, + observations = seq(1, 52) + )) + expect_error(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 54) + )) + expect_error(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 14) + )) + expect_error(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 14), method = "poisson" + )) + expect_error(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 14), ci = "0.95" + )) + expect_error(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 14), ci = 1.2 + )) + expect_error(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 14), plot = "TRUE" + )) +}) + +test_that("Endemic channel works as expected", { + expect_type(endemic_channel( + incidence_historic = historic_data_m_short + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data_w_short + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52) + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52), + method = "geometric" + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52), + method = "median" + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52), + method = "mean" + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52), + method = "unusual_behavior" + ), "list") + expect_type(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52), + plot = TRUE + ), "list") + expect_identical(dim(endemic_channel( + incidence_historic = historic_data, + observations = seq(1, 52) + )), c(52L, 4L)) + expect_identical(dim(endemic_channel( + incidence_historic = historic_data_m, + observations = seq(1, 12) + )), c(12L, 4L)) +}) diff --git a/tests/testthat/test-spatiotemporal.R b/tests/testthat/test-spatiotemporal.R index b8909f00..e819a56c 100644 --- a/tests/testthat/test-spatiotemporal.R +++ b/tests/testthat/test-spatiotemporal.R @@ -1,71 +1,86 @@ -#### Tests for spatiotemporal module #### - -test_that("Neighborhoods errors and warnings are thrown", { - # error on parameters - expect_error(neighborhoods(c("5001", 5148, 5206, 5266, 5088, 5440, 5615))) - expect_warning(neighborhoods(c(500010, 5148, 5206, 5266, 5088, 5440, 5615))) -}) - -test_that("Neighborhoods are built as expected", { - # class - expect_s3_class( - neighborhoods(c(5001, 5148, 5206, 5266, 5088, 5440, 5615)), - "nb" - ) - expect_length( - neighborhoods(c(5001, 5148, 5206, 5266, 5088, 5440, 5615)), - 7 - ) - expect_length( - neighborhoods(c(5001, 500148, 15206, 5266, 5088, 5440, 5615)), - 5 - ) -}) - -## Incidence rate objects for morans Index - -# Functional incidence rate -set.seed(3) -sample_groups <- c(5001, 5148, 5615, 5088, 5266, 5440, 5318, 5368, 5659) -sample_data <- sample(sample_groups, 200, replace = TRUE) - -sample_df <- data.frame(GROUP = sample_data) -sample_df$CASES <- "2010-03-01" - -incidence_object <- incidence::incidence(sample_df$CASES, - interval = "month", - group = sample_df$GROUP -) -incidence_rate <- incidence_rate(incidence_object, 2) - -# Failing incidence rate -sample_data_2 <- as.integer(sample(1:50, 200, replace = TRUE)) -sample_dates_2 <- as.Date("2018-12-31") + sample_data_2 -sample_groups_2 <- sample(c(5001, 5264, 5615, 5607), 200, replace = TRUE) -sample_df_2 <- data.frame(CASES = sample_dates_2, GROUP = sample_groups_2) -incidence_object_2 <- incidence::incidence(sample_df_2$CASES, - interval = "weeks", - group = sample_df_2$GROUP -) -incidence_rate_2 <- incidence_rate(incidence_object_2, 2) - -# Extra incidence rate -sample_df_3 <- sample_df[!sample_df$GROUP %in% c(5001, 5148), ] -incidence_object_3 <- incidence::incidence(sample_df_3$CASES, - interval = "weeks", - group = sample_df_3$GROUP -) -incidence_rate_3 <- incidence_rate(incidence_object_3, 2) - -test_that("Morans Index errors and warnings are thrown", { - expect_error(morans_index(incidence_object, 2, FALSE)) - expect_error(morans_index(incidence_rate_2, 2, FALSE)) - expect_warning(morans_index(incidence_rate, 12, TRUE), type = "list") -}) - -test_that("Morans Index works as expected", { - expect_type(morans_index(incidence_rate, 1, FALSE), type = "list") - expect_length(morans_index(incidence_rate, 1, FALSE), n = 5) - expect_type(morans_index(incidence_rate, 2, TRUE), type = "list") - expect_type(morans_index(incidence_rate, 12, TRUE), type = "list") -}) +#### Tests for spatiotemporal module #### + +test_that("Neighborhoods errors and warnings are thrown", { + # error on parameters + expect_error(neighborhoods(c("5001", 5148, 5206, 5266, 5088, 5440, 5615))) + expect_warning(neighborhoods(c(500010, 5148, 5206, 5266, 5088, 5440, 5615))) +}) + +test_that("Neighborhoods are built as expected", { + # class + expect_s3_class( + neighborhoods(c(5001, 5148, 5206, 5266, 5088, 5440, 5615)), + "nb" + ) + expect_length( + neighborhoods(c(5001, 5148, 5206, 5266, 5088, 5440, 5615)), + 7 + ) + expect_length( + neighborhoods(c(5001, 500148, 15206, 5266, 5088, 5440, 5615)), + 5 + ) +}) + +## Incidence objects for morans Index + +# Functional incidence object +set.seed(3) +sample_groups <- c(5001, 5148, 5615, 5088, 5266, 5440, 5318, 5368, 5659) +sample_data <- sample(sample_groups, 200, replace = TRUE) + +sample_df <- data.frame(GROUP = sample_data) +sample_df$CASES <- "2010-03-01" + +incidence_object <- incidence::incidence(sample_df$CASES, + interval = "month", + group = sample_df$GROUP +) + +# Failing incidence object +sample_data_2 <- as.integer(sample(1:50, 200, replace = TRUE)) +sample_dates_2 <- as.Date("2018-12-31") + sample_data_2 +sample_groups_2 <- sample(c(5001, 5264, 5615, 5607), 200, replace = TRUE) +sample_df_2 <- data.frame(CASES = sample_dates_2, GROUP = sample_groups_2) +incidence_object_2 <- incidence::incidence(sample_df_2$CASES, + interval = "weeks", + group = sample_df_2$GROUP +) + +test_that("Morans Index errors and warnings are thrown", { + expect_error(morans_index(c(20, 53, 90, 63), 2, plot = FALSE)) + expect_error(morans_index(incidence_object_2, + level = 2, + threshold = 2, plot = FALSE + )) + expect_error(morans_index(incidence_object, + level = 2, + threshold = "2", plot = FALSE + )) + expect_error(morans_index(incidence_object, + level = 2, + threshold = 2, plot = "FALSE" + )) + expect_warning(morans_index(incidence_object, + level = 2, + threshold = 12, plot = TRUE + ), type = "list") +}) + +test_that("Morans Index works as expected", { + expect_type(morans_index(incidence_object, + level = 2, threshold = 1, plot = FALSE + ), type = "list") + expect_length(morans_index(incidence_object, + level = 2, + threshold = 1, plot = FALSE + ), n = 5) + expect_type(morans_index(incidence_object, + level = 2, + threshold = 2, plot = TRUE + ), type = "list") + expect_type(morans_index(incidence_object, + level = 2, + threshold = 12, plot = TRUE + ), type = "list") +}) diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/demographic_vignette.Rmd b/vignettes/demographic_vignette.Rmd new file mode 100644 index 00000000..d4ddc063 --- /dev/null +++ b/vignettes/demographic_vignette.Rmd @@ -0,0 +1,168 @@ +--- +title: "epiCo - Demographic module" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{epiCo - Demographic module} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +**epiCo**'s demographic module is a tool for demographic descriptive +analysis and risk assessment of epidemiological events in Colombia. +Based on linelist data provided by the Colombian National Surveillance System +[SIVIGILA](https://www.ins.gov.co/Direcciones/Vigilancia/Paginas/SIVIGILA.aspx) +and demographic data from the Colombian National Administrative Department of +Statistics +[DANE](https://microdatos.dane.gov.co/index.php/catalog/central/about). + +The module allows you to: + +- Consult and visualize the population pyramid of a municipality, + department, or the country for a year of interest. +- Consult the definitions of demographic variables as ethnicities, + special populations groups, and occupational labels. +- Estimate the distribution of occupations reported in the linelist + according to the [ISCO 88 + Standard](https://www.ilo.org/public/english/bureau/stat/isco/isco88/). +- Estimate the incidence rate of a municipality, department, or the country. +- Calculate and visualize a relative age risk by normalizing the + distribution of cases by the specific age structure of the location + of interest. + +In the following vignette you will learn how to: + +1. Navigate the Codification of the Political Administrative Division of Colombia (DIVIPOLA). +2. Consult, visualize and interpret Colombian population pyramids at different +population scales. +3. Interpret the demographic variables reported by the SIVIGILA (as ethnicities, +special population groups, and occupatioanl labels). +4. Understand the epidemiological data example. +5. Estimate weekly and monthly global incidence rates for a municipality, department or the country. +6. Integrate age distributions of cases with population pyramids to obtain an age +risk assessment for a disease. + +## 1. Navigating the Codification of the Political Administrative Division of Colombia (DIVIPOLA) + +DIVIPOLA is a standardized nomenclature, designed by the DANE, for the identification of territorial entities (departments, districts and municipalities), non-municipalized areas and populated centers, by assigning a unique numerical code to each of these territorial units. + +Colombia has: + +- 32 Departments (Administrative Division Level 1) +- 1102 Municipalities (Administrative Division Level 2) +- 1 Island +- 18 Non-municipalized areas +- 6678 Populated centers + +Two digits are used for the codification of Departments, and five digits are used for the codification of municipalities (the first two being the the department where they are located). + +**epiCo** provides the complete list of departments and municipalities codes through a built-in dataset. + +```{r} +library(epiCo) +library(incidence) + +data("divipola_table") +``` +## 2. Population pyramids + +**epiCo** provides built-in dataset with the population projections of Colombia at national, department, and municipality level (provided by the DANE). + +You can perform queries to these data by using the 'population_pyramid' function, providing the DIVIPOLA code of the territory of interest and the year to consult. + +```{r} +ibague_code <- 73001 # DIVIPOLA code for the city of Ibagu +year <- 2016 # Year to consult +ibague_pyramid_2016 <- population_pyramid(ibague_code, year) # Population +# pyramid (dataframe) for the city of Ibagu in the year 2019 dissagregated +# by sex +head(ibague_pyramid_2016) +``` + +Definition of age ranges, and plotting are also provided for both: total number of individuals or proportion of individuals + +```{r} +ibague_code <- 73001 # DIVIPOLA code for the city of Ibagu +year <- 2019 # Year to consult +age_range <- 5 # Age range or window +ibague_pyramid_2019 <- population_pyramid(ibague_code, year, + range = age_range, + gender = TRUE, total = TRUE, + plot = TRUE +) +``` + + +## 3. Demographic variables + +Events of epidemiological relevance are reported to the SIVIGILA using an official notification form (see, [link](https://www.ins.gov.co/buscador-eventos/Paginas/Fichas-y-Protocolos.aspx)). + +**epiCo** provides function to consult the dictionaries to the etnicity cathegories, special population groups, and occupation codification used by the SIVIGILA. + +```{r} +demog_data <- data.frame( + id = c(0001, 002, 003, 004, 005), + ethnicity_label = c(3, 4, 1, 2, 5), + occupation_label = c(6111, 3221, 5113, 5133, 6111) +) + +describe_ethnicity(demog_data$ethnicity_label) # Description of ethnicities +# reported in the consulted dataset +describe_occupation( + isco_codes = demog_data$occupation_label, + output_level = "unit" +) # Description of the occupations +# reported in the consulted dataset +``` +## 4. Epidemiological data + +**epiCo** is a tool that produces analyses based on epidemiological data extracted from SIVIGILA or provided by the user. In the data folder is the epi_data file, which shows an example of the structure used by the package, which is the same as the one reported by SIVIGILA. This file contains the cases of all the municipalities in Tolima for the years 2015–2021. + +The following analyses use the dengue cases reported in Tolima in 2019. + +```{r} +data("epi_data") +data_event <- epi_data + +data_tolima <- data_event[lubridate::year(data_event$fec_not) == 2019, ] +head(data_tolima) +``` + +## 5. Estimation of incidence rates + +The incidence rate feature of **epiCo** requires the [incidence2](https://github.com/reconverse/incidence2) package to produce a modified incidence object where, instead of a count vector (or matrix). It transforms the object to provide a rate element accounting for the number of cases in the time period divided by the total number of inhabitants in the region and year. + +**epiCo** uses the DANE population projections as denominators; therefore, it is necessary to provide the administration level at which incidences are calculated. + +```{r} +incidence_object <- incidence( + dates = data_tolima$fec_not, + groups = data_tolima$cod_mun_o, + interval = "1 epiweek" +) +incidence_rate_object <- incidence_rate(incidence_object, level = 2) +head(incidence_rate_object$counts) +``` + +If groups in the incidence object are not within the DIVIPOLA coding for municipalities (level 2) or departments (level 1), or a national estimation is intended (level 0), the function will not be able to estimate an incidence rate. + +## 6. Estimation of risk by age group + +Normalization of data is a key aspect on epidemiology. **epiCo** allows to take an age distribution of cases and normalize the epidemiological data with the age structure of a population. This normalization allows to estimate the age risk of a disease according to the age structure of the general population in a municipality, department, or the country in a certain year. + +```{r} +data_ibague <- data_tolima[data_tolima$cod_mun_o == 73001, ] + +age_risk_data <- age_risk( + age = data_ibague$edad, + population_pyramid = ibague_pyramid_2019, + gender = data_ibague$sexo, plot = TRUE +) +``` + diff --git a/vignettes/endemic_channel.Rmd b/vignettes/endemic_channel.Rmd new file mode 100644 index 00000000..6f923acb --- /dev/null +++ b/vignettes/endemic_channel.Rmd @@ -0,0 +1,162 @@ +--- +title: "Building an Endemic Channel with epiCo" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{Building an Endemic Channel with epiCo} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +**epiCo**'s *endemic channel* module is a tool to estimate a central tendency line and upper and lower limits for the expected number of cases of a disease, based on the historical data provided by the user and a set of customized parameters. + +The `endemic_channel` function allows you to: + +- Estimate a central tendency measure (arithmetic mean, geometric mean, or median) +- Estimate a confidence interval (based on the selected central tendency measure) +- Select the epidemic (outlier) years in your data and decide how to handle them +- Plot the estimated endemic channel with the parameters/assumptions displayed +- Plot the current (observed) cases to evaluate your epidemiological situation (ie. occurrance of an outbreak) + +In the following vignette you will learn: + +1. What is an endemic channel, its benefits, drawbacks, and cautions +2. An example of the historical data needed to build an endemic channel +3. How to use the `endemic_channel` function +4. How to interpret and communicate the results + +## 1. What is an endemic channel? + +Using an endemic channel is a visual strategy to represent the historical behavior of a disease in a specific region on a epidemic curve that defines the central tendency of the cases during a year, and upper and lower limits where cases are expected to vary. This plot provides four areas known as "$\color{green}{\text{Success}}$", "$\color{Gold}{\text{Safety}}$", "$\color{orange}{\text{Warning}}$", and "$\color{Red}{\text{Epidemic}}$" bands (Figure 1) which are later used to define the epidemiological situation of the region based on the the current (observed) cases. + +The broader definition and methodology for the endemic channel was provided by [Bormant (1999)](https://iris.paho.org/handle/10665.2/8562). + +```{r, echo = FALSE, out.width = '100%'} +knitr::include_graphics( + path = file.path("man", "figures", "endemic_channel_figure.png"), + error = FALSE +) +``` + +The data needed to build an endemic channel is the weekly or monthly incidence of the disease for previous years in the region of interest. **epiCo** asks for at least one year of data but endemic channels are typically estimated from *5 to 7 years of information*. + +It is natural to presume that more years provide better estimations, since the statistical analyses will be more robust, but it is important to have contextual information to asseverate that transmission, surveillance or demographic conditions (among other factors) have not change during this period of time. Regarding the frequency of the data, weekly incidence may provide more useful information about the disease behavior and the moment in which an epidemiological warning should be raised, however it is up to the experience of the users and their context whether this resolution of data can be achieved. + +Another common decision while the building of an endemic channel is to ignore previous epidemic years to avoid misrepresentation of the traditional transmission dynamics. **epiCo** does not suggest epidemic years automatically but its `endemic_channel` function does provide the option to annotate the outlier years and decide whether to include, ignore, or adjust them. More data gathering tips and sources are provided later in this vignette. + +### The central tendency measure (CTM) + +To have a statistical summary of the disease behavior, the endemic channel provides a central tendency measure (CTM) that should describe the typical value of the historical data. + +Since the nature of the epidemiological data tends no be not normal, the typically CTM used in the endemic channels is the *geometric mean*. This CTM is known to better represent the expected value of a data set with a skewed distribution and/or susceptible to outliers (previous outbreaks). The `endemic_channel` function uses the geometric mean as default CTM but arithmetic mean or median can be chosen according to user's experience and context. + +Is important to note that as the geometric mean performs a multiplication of the data, when zero cases are reported in a moment of time the estimation leads to a zero CTM. To avoid this, **epiCo**'s `endemic_channel` function performs a shift on the data to sum up one case (default value) to all observations and then it subtracts this shift to the final calculation. To avoid random selection of the shift, users can ask `endemic_channel` function to find an optimal value based on +[de la Cruz & Kreft, 2019](https://doi.org/10.48550/arXiv.1806.06403), or they can also ask to use a weighted method as described by [Habib, 2012](https://www.arpapress.com/volumes/vol11issue3/ijrras_11_3_08.pdf). + +Finally, the `endemic_channel` function can perform a Poisson test (unusual behavior method) on the historical data if is requested by the user after taking into the account the pertinence of the test, since it is mostly used for scenarios or diseases with very low incidence records. This method uses the arithmetic mean as CTM. + +### The upper and lower limits + +The upper and lower limits of the endemic channel provide a confidence interval that are used to define the epidemiological bands previously described. + +**epiCo**'s `endemic_channel` function have the following preset of limits according to the selected CTM: + +- Arithmetic or Geometric mean as CTM: mean +/- *t* value for a 95% CI multiplied by variance of the data over the number of observations +- Median as CTM: quantiles 0.25 and 0.75 as lower and upper limits respectively +- Poisson test (Unusual behavior method): limits provided by a two sided Poisson test with 0.95% CI + +## 2. Historical data needed to build an endemic channel + +This section provides some strategies to obtain, handle and gather the historical data needed to build an endemic channel. It presumes that user have either the linelist or incidence data for the disease of interest, or the need to consult historical data from the Colombian National Surveillance System +[SIVIGILA](https://www.ins.gov.co/Direcciones/Vigilancia/Paginas/SIVIGILA.aspx). + +Regardless the user´s case, the goal of this section is to create an `incidence` object with the historical data of the disease.This object must account for the incidence of a single region (no groups are allowed in the `endemic_channel` function), it must have a weekly or monthly interval (epiweeks are also allowed), and it must represent at least one year or information. + +To have a broader understanding of the `incidence` package please refer to its [vignettes](https://www.repidemicsconsortium.org/incidence/). + +### Historic incidence from linelist or SIVIGILA data + +The endemic channel is more useful when data up to the immediate previous year is available, however this is not always possible for users out of the surveillance institutions. As an option, users can access to SIVIGILA's linelist data that is typically published until the second semester of the year (ie. data for 2022 was published in the second semester of 2023). + + + +For this example, the data provided in the data folder of the package will be used. These data present the data for dengue cases for all municipalities in Tolima for the years 2015 to 2021. In this case, we will use the cases from the municipality of Ibagué to calculate the respective endemic channel. + +```{r setup, eval = FALSE} +library(epiCo) + +data("epi_data") +data_event <- epi_data + +data_ibague <- data_event[data_event$cod_mun_o == 73001, ] + +## Building the historic incidence data + +incidence_historic <- incidence::incidence(data_ibague$fec_not, + interval = "1 epiweek" +) + +print(incidence_historic) +``` + +When users have their own linelist data the goal is the same: to clean and handle the data to build the `incidence` object (named `incidence_historic` in the previous example). The key message is that a list of dates that account for the event of interest is needed to later be used in the [`incidence`](https://www.repidemicsconsortium.org/incidence/) package. + +### Setting up the data from incidence information + +Future versions of **epiCo**'s `endemic_channel` function will allow the users to import a table accounting for periods of time (weeks or months) and count of cases, so when they do not have specific information on the dates for each case and endemic channel can still be builded. + +## 3. Using **epiCo**'s `endemic_channel` function + +After gathering the incidence data, users are ready to ask **epiCo** to build an *endemic channel*. + +The `endemic_channel` function has the following parameters: + +- `incidence_historic`: An incidence object with the cases counts of the previous years +- `observations`: A numeric vector with the current observations *(default = NULL)* +- `method`: A string with the mean calculation method of preference ("median", "mean", or "geometric") or "unusual_behavior" method (Poisson Distribution Test) *(default = "geometric")* +- `geom_method`: A string with the selected method for geometric mean calculation (see epiCo's geom_mean function) *(default = "shifted")* +- `outlier_years`: A numeric vector with the outlier (epidemic) years *(default = NULL)* +- `outliers_handling`: A string with the handling decision regarding outlier years: + + - ignored = data from outlier years will not take into account *(default)* + - included = data from outlier years will take into account + - replaced_by_median = data from outlier years will be replaced with the median and take into account + - replaced_by_mean = data from outlier years will be replaced with the mean and take into account + - replaced_by_geom_mean = data from outlier years will be replaced with the geometric mean and take into account +- `ci`: A numeric value to specify the confidence interval to use with the geometric method *(default = 0.95)* +- `plot`: A boolean for displaying a plot *(default = FALSE)* + +The output of *epiCo*'s `endemic_channel` function is a dataframe with the observation, historical mean, and confidence intervals, and a plot displaying the four epidemiological bands, the current observations (if provided), and the methods and outliers handling set by the user. + +### Example + +The following example uses the previously historic incidence accessed from the SIVIGILA data source to build the 2021 *endemic channel* for Ibagué city. + +```{r, eval = FALSE} +observations <- sample(15:100, 52, replace = FALSE) + +outlier_years <- c(2016, 2019) + +endemic_channel( + incidence_historic = incidence_historic, + observations = observations, + outlier_years = outlier_years, + plot = TRUE +) +``` + +## 4. Interpretation and communication of results + +### Tips + +### Warnings and disclosures + +### Exporting the endemic channel plot + + diff --git a/vignettes/spatiotemporal_vignette.Rmd b/vignettes/spatiotemporal_vignette.Rmd new file mode 100644 index 00000000..501411b3 --- /dev/null +++ b/vignettes/spatiotemporal_vignette.Rmd @@ -0,0 +1,93 @@ +--- +title: "Spatiotemporal analyses with epiCo" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{spatiotemporal_vignette} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +**epiCo**'s *spatiotemporal* module is a tool to analyze the spatial correlation of cases based on their coordinates of notification and the real travel times within Colombian territory. + +The *spatiotemporal* module allows users to: + +- Define the neighbors of a municipality based on a travel time threshold that accounts for real connectivity and infrastructure +- Perform a Moran’s Index autocorrelation analyses on the defined neighborhood and the provided incidence data (internally converted to incidence rate) +- Plot a heat map with the High (spatial correlation) - High (incidence rate) clusters, as well as Low - Low ones. + +In the following vignette you will learn: + +1. How real travel times were estimated from Colombian connectivity and infrastructure +2. To obtain the neighbors of a municipality from real travel times using the `neighborhoods` function +3. To use **epiCo**’s `morans_index` function +4. How to interpret and communicate the results + +## 1. Real travel times among Colombian municipalities + +Several approaches to estimate proximity and neighborhood of a point (or region) are developed in the literature. Some rely on euclidean distances among the centroid of the areas, whereas others rely in contiguity approaches between their boundaries. + +**epiCo** aims to propose a new approach on neighborhood estimation by defining the proximities among municipalities based on real available infrastructure in the territory. This is important in countries as Colombia where topology shapes the connectivity and interaction between municipalities, and where euclidean distances and boundaries may lead to an overestimation of nearness when in reality one of the three mountain ranges or on of the several rivers in the country may lead to large travel distances and times. + +To estimate real travel times among Colombian municipalities, a travel times matrix was calculated based on [Bravo-Vega C., Santos-Vega M., & Cordovez J.M. (2022)](https://doi.org/10.1371/journal.pntd.0010270) study. The travel times accounts for the fastest path to connect one municipality to all municipalities in a friction map that provides different speeds according to the presence and quality of the roads, fluvial transport, or walking possibilities. + +## 2. Defining a neighborhood with **epiCo** + +Since **epiCo** storage all travel times among municipalities in Colombia, the definition of a neighborhood has to be based on the expertise and knowledge of the user regarding what is the maximum travel time in which a municipality is considered a neighbor. + +The `neighborhoods` function receives as parameters a vector of DIVIPOLA codes accounting for the municipalities to consider as potential neighbors, and a threshold that accounts for the maximum travel time (in hours) that a municipality can be from another. + +```{r, messages = FALSE, eval = TRUE} +library(epiCo) +library(dplyr) +library(incidence) +data(divipola_table) +cundinamarca_data <- dplyr::filter( + divipola_table, + NOM_DPTO == "CUNDINAMARCA" +) %>% + select(COD_MPIO, LATITUD, LONGITUD) + +cundinamarca_neighborhood <- neighborhoods( + query_vector = cundinamarca_data$COD_MPIO, + threshold = 0.5 +) + +plot(cundinamarca_neighborhood, cbind( + cundinamarca_data$LATITUD, + cundinamarca_data$LONGITUD +)) +``` + +## 3. *epiCo*'s `morans_index` function + +*epiCo* provides a function to perform a [Local Moran's index analysis](https://r-spatial.github.io/spdep/reference/moran.plot.html) from an [`incidence`](https://www.repidemicsconsortium.org/incidence/) object with unique observations for a set of Colombian municipalities. + +Internally, the function reads the `incidence` object's groups as the DIVIPOLA codes to: + +1) Estimate incidence rates using **epiCo**'s `incidence_rate` function +2) Evaluate them on the **epiCo**'s `neighborhoods` function + +It is necessary for the user to provide the administrative level that the groups account for (0 - National level, 1 - Department level, 2 - Municipality level), the scale at which incidence rates should be estimated (cases per number of inhabitants), and the travel time threshold for the neighborhood definition. + +The following example uses the cases of the municipalities of Tolima for the year 2019. +```{r, messages = FALSE, eval = TRUE} +data("epi_data") +data_event <- epi_data + +data_tolima <- data_event[lubridate::year(data_event$fec_not) == 2019, ] +incidence_object <- incidence( + dates = data_tolima$fec_not, + groups = data_tolima$cod_mun_o, + interval = "12 months" +) + +monrans_tolima <- morans_index(incidence_object = incidence_object, level = 2) +monrans_tolima$leaflet_map +```