Skip to content

Commit

Permalink
Merge pull request nationalparkservice#110 from RobLBaker/main
Browse files Browse the repository at this point in the history
v0.3.2
  • Loading branch information
RobLBaker authored Oct 6, 2023
2 parents 3333ad7 + 80eb282 commit febe64e
Show file tree
Hide file tree
Showing 80 changed files with 1,211 additions and 124 deletions.
5 changes: 3 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: DPchecker
Title: Checks Data Packages for Congruence
Version: 0.3.1
Version: 0.3.2
Authors@R: c(
person("Rob", "Baker", email = "robert_baker@nps.gov", role = c("cre", "aut"), comment = c(ORCID = "0000-0001-7591-5035")),
person(c("Sarah", "E."), "Wright", email = "sarah_wright@nps.gov", role = "aut"),
Expand Down Expand Up @@ -31,7 +31,8 @@ Imports:
tibble,
crayon,
httr,
jsonlite
jsonlite,
stats
Remotes:
https://github.com/NCEAS/arcticdatautils
URL: https://nationalparkservice.github.io/DPchecker/
4 changes: 4 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(DPchecker_example)
export(convert_datetime_format)
export(load_data)
Expand All @@ -11,6 +12,7 @@ export(test_cui_dissemination)
export(test_datatable_urls)
export(test_datatable_urls_doi)
export(test_date_range)
export(test_dates_parse)
export(test_delimiter)
export(test_doi)
export(test_doi_format)
Expand All @@ -24,6 +26,7 @@ export(test_footer)
export(test_geographic_cov)
export(test_header_num)
export(test_int_rights)
export(test_keywords)
export(test_license)
export(test_metadata_version)
export(test_methods)
Expand All @@ -36,6 +39,7 @@ export(test_orcid_resolves)
export(test_pii_data_emails)
export(test_pii_meta_emails)
export(test_pub_date)
export(test_public_points)
export(test_publisher)
export(test_publisher_city)
export(test_publisher_state)
Expand Down
14 changes: 14 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# DPchecker 0.3.2

* Adjusted `test_date_range()` so that it can handle data columns that contain both dates and times. Times are truncated to midnight such that if the data collected on the first day indicated is considered "in range" and data collected on the last day indicated is considered "in range".
* Added `test_public_points()` to the list of functions in the DPchecker.Rmd file.
* Added `test_public_points()` to the list of functions run by `run_congruence_checks()`.
* Added `test_public_points()` function to test whether metadata contains GPS coordinates if the package is not public.
* Added `test_keywords()` to the list of function in the DPchecker.Rmd file.
* Added `test_keywords()` to the list of functions run by `run_congruence_checks()`.
* Added new function, `test_keywords()` to test for presence of keywords in metadata; something that is required for EML extraction on DataStore.
* Updated `test_valid_filenames()` to accept filenames with hyphens (in addition to filenames with alpha-numerics and underscore). Filenames still must start with a letter.
* Added the `test_dates_parse()` function to the list of functions in the DPchecker.Rmd file.
* Added the `test_dates_parse()` function to the list of functions that are run when the function `run_congruence_checks()` is called.
* Added the function `test_dates_parse()` to test whether the date formats supplied in the metadata match the values supplied in the data files.
* Added a `NEWS.md` file to track changes to the DPchecker R package.
76 changes: 76 additions & 0 deletions R/optional_eml_elements.R
Original file line number Diff line number Diff line change
Expand Up @@ -423,3 +423,79 @@ test_orcid_match <- function(metadata = load_metadata(directory)){
return(invisible(metadata))
}

#' Test for public GPS point coordinates
#'
#' @description `test_public_points()` will look for GPS point coordinates in metadata, if the data package is not public will warn users that they are potentially publishing confidential unclassified information (CUI). Specifically, if no GPS points are identified, the function passes. If the CUI dissemination code is set to "PUBLIC" and GPS points are identified, the function passes. If the CUI dissemination code is not set to "PUBLIC" and GPS points are identified, the function will fail with a warning. If no CUI dissemination code is detected, the function fails with an error.
#'
#' @details The contents of metadata are public even if the data itself is restricted. This means that if GPS coordinates (a common type of data that must be redacted or fuzzed before it can be made public) exist in metadata, these GPS coordinates will be publicly available. The function will warn people of that potentiality. The function will only flag GPS points (not bounding boxes or polygons or other shapes). The function only checks for GPS points in the geographicCoverage element.
#'
#' @inheritParams test_pub_date
#'
#' @return invisible(metadata)
#' @export
#'
#' @examples
#' \dontrun{
#' test_public_points()
#' }
test_public_points <- function(metadata = load_metadata(directory)){
#check whether metadata is a properly formated EML document
is_eml(metadata)
#get geographic coverage element from metadata:
geo_cov <- metadata[["dataset"]][["coverage"]][["geographicCoverage"]]
#if htere is no geo_coverage, tell the user there's no potential CUI GPS points:
missing_geo<-is.null(geo_cov)
if(missing_geo){
cli::cli_inform(c("v" = "No potentially confidential GPS points found in metadata"))
return(invisible(metadata))
}

#drop `@context` item from geo_cov
geo_cov$`@context` <- NULL

# If there's only geographic coverage element, geo_cov ends up with one less level of nesting. Re-nest it so that the rest of the code works consistently
if ("geographicDescription" %in% names(geo_cov)) {
geo_cov <- list(geo_cov)
}

#place to store GPS points
point_detect <- NULL

#check whether points are detected (as opposed to lines or boxes)
for(i in 1:length(seq_along(geo_cov))){
lat_point <-
geo_cov[[i]][["boundingCoordinates"]][["westBoundingCoordinate"]]== geo_cov[[i]][["boundingCoordinates"]][["eastBoundingCoordinate"]]
long_point <-
geo_cov[[i]][["boundingCoordinates"]][["southBoundingCoordinate"]] == geo_cov[[i]][["boundingCoordinates"]][["northBoundingCoordinate"]]

#if a point is found, add it to the point_detect list:
if(lat_point == TRUE & long_point == TRUE){
point_detect <- append(point_detect, geo_cov[[i]][["geographicDescription"]])
}
}

#if no GPS points in metadata, pass the test and inform user:
if(is.null(point_detect)){
cli::cli_inform(c("v" = "No GPS points detected in metadata"))
} else {
# if GPS coordinates are detected:
# get the CUI designation
cui <- arcticdatautils::eml_get_simple(metadata, "CUI")
if(is.null(cui)){
#if no CUI designation, fail the test and require designating CUI:
cli::cli_abort(c("x" = "No CUI designation found. Unable to determine whether GPS points potentially contain CUI. Please use {.fn EMLeditor::set_cui} to designate a CUI dissemination category."))
}
#if there is a CUI designation....
if(!is.null(cui)){
#if CUI is set to public, pass the test and tell the user GPS coordinates will be public
if(cui == "PUBLIC"){
cli::cli_inform(c("v" = "CUI is set to PUBLIC and all GPS coordinates will be publicly available."))
}
else {
# if CUI is not public, warn the user that GPS coordiantes will be public.
cli::cli_warn(c("!" = "CUI is not set to PUBLIC. GPS coordinates detected in metadata will be publicly available. Are you sure?"))
}
}
}
return(invisible(metadata))
}
23 changes: 23 additions & 0 deletions R/required_eml_elements.R
Original file line number Diff line number Diff line change
Expand Up @@ -697,3 +697,26 @@ test_creator <- function(metadata = load_metadata(directory)){

}

#' Test for Keywords
#'
#' @description `test_keywords()` tests to see whether metadata contains at least one "Keywords Set".
#'
#' @inheritParams test_pub_date
#'
#' @return invisilbe(meatadatda)
#' @export
#'
#' @examples
#' \dontrun{
#' test_keywords()
#' }
test_keywords <- function (metadata = load_metadata(directory)){
is_eml(metadata)
#get creators
keywords <- metadata[["dataset"]][["keywordSet"]]
if(is.null(keywords)){
cli::cli_abort(c("x" = "No keywords detected. Metadata must contain at least one keyword."))
} else{
cli::cli_inform(c("v" = "Metadata contains keyword(s)."))
}
}
30 changes: 28 additions & 2 deletions R/run_checks.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,15 @@ run_congruence_checks <- function(directory = here::here(),
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})
tryCatch(test_keywords(metadata),
error = function(e) {
err_count <<- err_count + 1
cli::cli_bullets(c(e$message, e$body))
},
warning = function(w) {
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})
tryCatch(test_by_for_nps(metadata),
error = function(e) {
err_count <<- err_count + 1
Expand Down Expand Up @@ -290,7 +299,6 @@ run_congruence_checks <- function(directory = here::here(),
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})

tryCatch(test_methods(metadata),
error = function(e) {
err_count <<- err_count + 1
Expand Down Expand Up @@ -434,6 +442,15 @@ run_congruence_checks <- function(directory = here::here(),
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})
tryCatch(test_dates_parse(directory, metadata),
error = function(e) {
err_count <<- err_count + 1
cli::cli_bullets(c(e$message, e$body))
},
warning = function(w) {
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})
tryCatch(test_date_range(directory, metadata, skip_cols = skip_cols),
error = function(e) {
err_count <<- err_count + 1
Expand All @@ -444,7 +461,7 @@ run_congruence_checks <- function(directory = here::here(),
cli::cli_bullets(c(w$message, w$body))
})

cli::cli_h2("Checking data compliance")
cli::cli_h2("Checking data and metadata compliance")
tryCatch(test_pii_data_emails(directory),
error = function(e) {
err_count <<- err_count + 1
Expand All @@ -454,6 +471,15 @@ run_congruence_checks <- function(directory = here::here(),
warn_count <<- warn_count + 1
cli::cli_verbatim (c(w$message, w$body))
})
tryCatch(test_public_points(metadata),
error = function(e) {
err_count <<- err_count + 1
cli::cli_bullets(c(e$message, e$body))
},
warning = function(w) {
warn_count <<- warn_count + 1
cli::cli_bullets(c(w$message, w$body))
})

}

Expand Down
Loading

0 comments on commit febe64e

Please sign in to comment.