diff --git a/.gitignore b/.gitignore index 8deb9336..5fd60219 100644 --- a/.gitignore +++ b/.gitignore @@ -63,3 +63,6 @@ WA_raw_data.rds WV_raw_data.rds WI_raw_data.rds WY_raw_data.rds +.Rdata +.httr-oauth +.DS_Store diff --git a/DESCRIPTION b/DESCRIPTION index 25b1a58a..43c74722 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -41,7 +41,8 @@ Imports: stats, gganimate, lubridate, - maps + maps, + readr Depends: R (>= 3.5.0) Suggests: diff --git a/NAMESPACE b/NAMESPACE index f211c920..dc866bd2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -23,6 +23,7 @@ export(InvalidSpeciation) export(PotentialDuplicateRowID) export(QAPPDocAvailable) export(QAPPapproved) +export(SummarizeCharacteristics) export(TADABigdataRetrieval) export(TADAdataRetrieval) export(readWQPwebservice) diff --git a/R/DataDiscoveryRetrieval.R b/R/DataDiscoveryRetrieval.R index bed0a1ae..a752356c 100644 --- a/R/DataDiscoveryRetrieval.R +++ b/R/DataDiscoveryRetrieval.R @@ -256,11 +256,10 @@ readWQPwebservice <- function(webservice) { #' #' @examples #' \dontrun{ -#' tada2 <- TADABigdataRetrieval(startDate = "01-01-2021", -#' endDate = "01-01-2022", -#' characteristicName = "Nitrogen", -#' siteType = "Stream") +#' tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream") #' } +#' + TADABigdataRetrieval <- function(startDate = "null", endDate = "null", @@ -307,7 +306,10 @@ TADABigdataRetrieval <- function(startDate = "null", if(length(siteid_all) > 0) { l=length(siteid_all) #len(sites) - g=100 #grouping size + g=100 #max number of sites pulled per WQP query + #may want to consider using the total number of records in a given + #download group instead, e.g., records must not exceed some maximum + #threshold (e.g. USGS uses 250,000 records per group for their pipelines) nl=ceiling(l/g) #number of queries i=0 diff --git a/R/Utilities.R b/R/Utilities.R index e19eba90..5f1976ca 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -388,4 +388,5 @@ checkColumns <- function(.data, expected_cols) { if (all(expected_cols %in% colnames(.data)) == FALSE) { stop("The dataframe does not contain the required fields to use TADA. Use either the full physical/chemical profile downloaded from WQP or download the TADA profile template available on the EPA TADA webpage.") } -} \ No newline at end of file +} + diff --git a/R/Visualizations.R b/R/Visualizations.R index bdc25030..40199e56 100644 --- a/R/Visualizations.R +++ b/R/Visualizations.R @@ -1,5 +1,6 @@ -#' Generate Animated Map +#' @title Generate Animated Map #' +#' @description #' Animated map code adapted from USGS blog: https://waterdata.usgs.gov/blog/large_sample_pull/ #' #' @param .data TADA dataframe @@ -8,7 +9,6 @@ #' #' @export #' -#' CreateAnimatedMap <- function(.data) { @@ -66,8 +66,9 @@ CreateAnimatedMap <- function(.data) { } -#' Generate Map +#' @title Generate Map #' +#' @description #' Function will plot WQP stations on a map. Stations with an invalid or imprecise #' latitude or longitude will be colored red; all other stations will be colored #' blue. @@ -104,3 +105,32 @@ maps::map() graphics::points(.data$LongitudeMeasure, .data$LatitudeMeasure, col="red", pch=20) } + + +#' @title Summarize data downloaded for each Characteristic +#' +#' @description +#' Function to summarize the number of sites and records downloaded from the +#' WQP for each CharacteristicName. +#' +#' @param .data TADA data frame containing the data downloaded from the WQP, where +#' each row represents a unique data record. +#' +#' @return +#' Saves a .csv file containing the total number of sites and records downloaded +#' from the Water Quality Portal for each requested characteristic name. +#' +#' @export +#' + +SummarizeCharacteristics <- function(.data){ + + # Summarize WQP data pull + wqp_summary <- .data %>% + dplyr::group_by(CharacteristicName) %>% + dplyr::summarize(n_sites = length(unique(MonitoringLocationIdentifier)), + n_records = length(ResultMeasureValue), + .groups = 'drop') %>% + dplyr::select(CharacteristicName, n_sites, n_records) + +} diff --git a/man/SummarizeCharacteristics.Rd b/man/SummarizeCharacteristics.Rd new file mode 100644 index 00000000..32f0835c --- /dev/null +++ b/man/SummarizeCharacteristics.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Visualizations.R +\name{SummarizeCharacteristics} +\alias{SummarizeCharacteristics} +\title{Summarize data downloaded for each Characteristic} +\usage{ +SummarizeCharacteristics(.data) +} +\arguments{ +\item{.data}{TADA data frame containing the data downloaded from the WQP, where +each row represents a unique data record.} +} +\value{ +Saves a .csv file containing the total number of sites and records downloaded +from the Water Quality Portal for each requested characteristic name. +} +\description{ +Function to summarize the number of sites and records downloaded from the +WQP for each CharacteristicName. +} diff --git a/man/TADABigdataRetrieval.Rd b/man/TADABigdataRetrieval.Rd index 41db5aa3..6b2dbdfa 100644 --- a/man/TADABigdataRetrieval.Rd +++ b/man/TADABigdataRetrieval.Rd @@ -59,9 +59,7 @@ See ?MeasureValueSpecialCharacters and ?autoclean documentation for more informa } \examples{ \dontrun{ -tada2 <- TADABigdataRetrieval(startDate = "01-01-2021", - endDate = "01-01-2022", - characteristicName = "Nitrogen", - siteType = "Stream") +tada2 <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream") } + } diff --git a/vignettes/WQPDataHarmonization.Rmd b/vignettes/WQPDataHarmonization.Rmd index 3e5fac82..ff00cdb2 100644 --- a/vignettes/WQPDataHarmonization.Rmd +++ b/vignettes/WQPDataHarmonization.Rmd @@ -56,6 +56,7 @@ dataRetrieval from GitHub. ```{r, results = 'hide', message = FALSE, warning = FALSE} # remotes::install_github("USGS-R/dataRetrieval", dependencies=TRUE) + remotes::install_github("USEPA/TADA", dependencies=TRUE) ``` @@ -213,7 +214,7 @@ Option 1: Use the TADAdataRetrieval function. #dataRetrievalProfile <- dataRetrieval::readWQPdata(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "01-01-2021", ignore_attributes = TRUE) #You can edit this to define your own WQP query inputs below -TADAProfile <- TADAdataRetrieval(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "01-01-2021") +TADAProfile <- TADAdataRetrieval(statecode = "UT", characteristicName = c("Ammonia", "Nitrate", "Nitrogen"), startDate = "10-01-2020") ``` @@ -240,10 +241,17 @@ States that is available for the time period, characteristicName, and siteType requested. See ?TADABigdataRetrieval for more details. WARNING, this can take -multiple hours to run. The total run time depends on your query inputs. +multiple HOURS to run. The total run time depends on your query inputs. + +```{r} +# AllWaterTempData <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream") +``` + +Review all column names in the TADA Profile ```{r} -#AllWaterTempData <- TADABigdataRetrieval(startDate = "2019-01-01", endDate = "2021-12-31", characteristicName = "Temperature, water", siteType = "Stream") +colnames(TADAProfile) +TADAProfile_CharSummary <- SummarizeCharacteristics(TADAProfile) ``` ## Invalid coordinates @@ -300,11 +308,6 @@ map('county', 'utah') points(TADAProfileClean1$LongitudeMeasure, TADAProfileClean1$LatitudeMeasure, col="red", pch=20) ``` -Review all column names in the TADA Profile - -```{r} -colnames(TADAProfile) -``` ## Depth unit conversions @@ -338,7 +341,7 @@ entering the following code in the console: ?ConvertDepthUnits ```{r} #converts all depth profile data to meters -TADAProfileClean2 <- ConvertDepthUnits(TADAProfileClean1, unit = "ft", transform = TRUE) +TADAProfileClean2 <- ConvertDepthUnits(TADAProfileClean1, unit = "m", transform = TRUE) ``` ## Result unit conversions