Skip to content

Commit

Permalink
Merge pull request #110 from USEPA/Aug11cm
Browse files Browse the repository at this point in the history
Aug11cm
  • Loading branch information
mthawley authored Aug 25, 2022
2 parents 73c4c4a + 9f74799 commit 3e954b3
Show file tree
Hide file tree
Showing 17 changed files with 21,235 additions and 23 deletions.
1 change: 1 addition & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
^_pkgdown\.yml$
^docs$
^pkgdown$
^\.github$
1 change: 1 addition & 0 deletions .github/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.html
52 changes: 52 additions & 0 deletions .github/workflows/pkgdown.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
on:
push:
branches: [develop]
pull_request:
branches: [develop]
release:
types: [published]
workflow_dispatch:

name: pkgdown

jobs:
pkgdown:
runs-on: ubuntu-latest
# Only restrict concurrency for non-PR jobs
concurrency:
group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
steps:
- uses: actions/checkout@v2

- uses: r-lib/actions/setup-pandoc@v2

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::pkgdown, local::.
needs: website

- name: Build site
run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE)
shell: Rscript {0}

- name: Deploy to GitHub pages 🚀
if: github.event_name != 'pull_request'
uses: JamesIves/github-pages-deploy-action@4.1.4
with:
clean: false
branch: gh-pages
folder: docs

url: https://usepa.github.io/TADA/

template:
bootstrap: 5
bootswatch: cerulean
5 changes: 4 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ Imports:
stringr,
utils,
RColorBrewer,
stats
stats,
tidyverse,
lubridate
Depends:
R (>= 2.10)
Suggests:
Expand All @@ -40,3 +42,4 @@ Suggests:
VignetteBuilder: knitr, rmarkdown
Language: en-US
Config/testthat/edition: 3
URL: https://usepa.github.io/TADA/
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export(PotentialDuplicateRowID)
export(QAPPDocAvailable)
export(QAPPapproved)
export(RemoveEmptyColumns)
export(TADABigdataRetrieval)
export(TADAdataRetrieval)
export(WQXTargetUnits)
export(readWQPwebservice)
Expand Down
199 changes: 198 additions & 1 deletion R/DataDiscoveryRetrieval.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,25 @@
#'
#' Retrieve data from Water Quality Portal (WQP) and output a TADA-compatible
#' dataset.
#'
#' Keep in mind that all the query filters for the WQP work as an AND
#' but within the fields there are ORs. So for example,
#' characteristics – if you choose pH & DO – it’s an OR. Similarly, if you
#' choose VA and IL, it’s an OR. But the combo of fields are ANDs.
#' Such as State/VA AND Characteristic/DO".
#' "Characteristic" and "Characteristic Group" also work as an AND.
#'
#' @param statecode Code that identifies a state
#' @param startDate Start Date
#' @param countycode Code that identifies a county
#' @param siteid Unique monitoring station identifier
#' @param siteType Type of waterbody
#' @param characteristicName Name of characteristic
#' @param characteristicName Name of parameter
#' @param ActivityMediaName Sampling substrate such as water, air, or sediment
#' @param endDate End Date
#'
#' @return TADA-compatible dataframe
#'
#' @export
#'

Expand All @@ -21,6 +30,7 @@ TADAdataRetrieval <- function(statecode = "null",
siteid = "null",
siteType = "null",
characteristicName = "null",
ActivityMediaName = "null",
endDate = "null"
) {

Expand Down Expand Up @@ -62,6 +72,12 @@ TADAdataRetrieval <- function(statecode = "null",
WQPquery <- c(WQPquery, characteristicName = characteristicName)
}

if (length(ActivityMediaName)>1) {
WQPquery <- c(WQPquery, ActivityMediaName = list(ActivityMediaName))
} else if (ActivityMediaName != "null") {
WQPquery <- c(WQPquery, ActivityMediaName = ActivityMediaName)
}

if (length(endDate)>1) {
WQPquery <- c(WQPquery, endDate = list(endDate))
} else if (endDate != "null") {
Expand Down Expand Up @@ -281,3 +297,184 @@ TADAprofileCheck <- function(.data) {
stop("The dataframe does not contain the required fields to use TADA. Use either the full physical/chemical profile downloaded from WQP or download the TADA profile template available on the EPA TADA webpage.")
}
}



#' Large WQP data pulls using dataRetrieval for all data from all sites in the contiguous United States.
#'
#' This function uses the WQP summary service to limit the amount
#' downloaded to only relevant data. For large data sets, that can
#' save a lot of time and ultimately reduce the complexity of subsequent
#' data processing.
#'
#' This function will join data from multiple WQP profiles and output a
#' TADA-compatible dataset.
#'
#' @param startDate Start Date YYYY-MM-DD format, for example, "1995-01-01"
#' @param endDate end date in YYYY-MM-DD format, for example, "2020-12-31"
#' @param characteristicName Name of water quality parameter
#' @param siteType Name of water body type (e.g., "Stream", "Lake, Reservoir, Impoundment")
#'
#' @return TADA-compatible dataframe
#'
#' @export
#'

TADABigdataRetrieval <- function(startDate = "null",
endDate = "null",
characteristicName = "null",
siteType = "null"
) {

startDate_Low = lubridate::ymd(startDate)
startYearLo = lubridate::year(startDate_Low)

endDate_High = lubridate::ymd(endDate)
startYearHi = lubridate::year(endDate_High)

if (length(characteristicName)>1) {
characteristicName = list(characteristicName)
} else if (characteristicName != "null") {
characteristicName = characteristicName
}

if (length(siteType)>1) {
siteType = list(siteType)
} else if (siteType != "null") {
siteType = siteType
}

state_cd_cont = utils::read.csv(file = "inst/extdata/statecode.csv")

for(i in seq_len(nrow(state_cd_cont))){

state_cd = as.numeric(state_cd_cont$STATE[i])
state_nm = state_cd_cont$STUSAB[i]

df_summary = dataRetrieval::readWQPsummary(statecode = state_cd,
characteristicName = characteristicName,
siteType = siteType,
startDate = startDate)

sites = df_summary %>%
dplyr::filter(YearSummarized >= startYearLo,
YearSummarized <= startYearHi)

siteid_all = unique(sites$MonitoringLocationIdentifier)

if(length(siteid_all) > 0) {

l=length(siteid_all) #len(sites)
g=250 #grouping size
nl=ceiling(l/g) #number of queries

i=0
j=0
k=0

while (i < nl) {

j=i*g
k=j+g-1

if (k>l){k=l}
sites=siteid_all[j:k]

results.DR <- dataRetrieval::readWQPdata(siteid = sites,
characteristicName = characteristicName)
#startDate = startDate)

narrow.DR <- dataRetrieval::readWQPdata(siteid = sites,
characteristicName = characteristicName,
dataProfile = "narrowResult")

sites.DR <- dataRetrieval::whatWQPsites(siteid = sites,
characteristicName = characteristicName)

#projects.DR <- dataRetrieval::readWQPdata(siteid = siteid,
#characteristicName = characteristicName,
#service = "Project")

#})

# Join station data to full phys/chem (results.DR)
join1 <- results.DR %>%
# join stations to results
dplyr::left_join(sites.DR, by = "MonitoringLocationIdentifier") %>%
# remove ".x" suffix from column names
dplyr::rename_at(dplyr::vars(ends_with(".x")), ~ stringr::str_replace(., "\\..$", "")) %>%
# remove columns with ".y" suffix
dplyr::select_at(dplyr::vars(-ends_with(".y")))

# Join Speciation column from narrow to full profile
join2 <- join1 %>%
dplyr::left_join(dplyr::select(
narrow.DR, ActivityIdentifier, MonitoringLocationIdentifier,
CharacteristicName, ResultMeasureValue,
MethodSpecificationName
),
by = c(
"ActivityIdentifier", "MonitoringLocationIdentifier",
"CharacteristicName", "ResultMeasureValue"
)
)

join2$ResultMeasureValue = as.character(join2$ResultMeasureValue)

if (i==0){
df = join2 }
else {
join2 = rbind(df, join2)
}
print(j)
print(k)

i = i+1
}
}

if(nrow(join2) > 0){

#####
#need to edit below if temporary rds files do not go away
#may be able to delete below
#https://stackoverflow.com/questions/47626331/saving-and-retrieving-temp-files-in-r-packages
#####

#original
#saveRDS(df_state, file = paste0(state_nm, "_raw_data.rds"))

tempfilename = paste0(state_nm, "_raw_data.rds")
file.path(tempdir(), saveRDS(join2, file = paste0("inst/tempdata/", tempfilename)))

}
}
all_data <- data.frame()
for(state in state_cd_cont$STUSAB){
allstates_df <- tryCatch({
#####
#need to edit line below if rds files do not go away
#####

#original below
#readRDS(paste0(state, "_raw_data.rds"))

readRDS(paste0("inst/tempdata/", tempfilename))
})

if(nrow(allstates_df) > 0){
all_data <- bind_rows(all_data, allstates_df)
}

}

finalprofile = all_data %>%
dplyr::filter(ActivityStartDate <= endDate,
ActivityStartDate >= startDate)

finalprofile2 = autoclean(finalprofile)
#not sure if above is working correctly, thousands of "duplicated" rows are removed
# you will still need to filter on activity media subdivision now

return(finalprofile2)
}
2 changes: 1 addition & 1 deletion R/Transformations.R
Original file line number Diff line number Diff line change
Expand Up @@ -331,7 +331,7 @@ WQXTargetUnits <- function(.data, transform = TRUE) {
#'
#' @return When transform = FALSE and flag = TRUE, Harmonization Reference Table
#' columns are appended to the dataset only. When transform = TRUE and flag = TRUE,
#' Harmoinzation columns are appended to the dataset and transformations are
#' Harmonization columns are appended to the dataset and transformations are
#' executed. When transform = TRUE and flag = FALSE, transformations are executed
#' only. When transform = FALSE and flag = FALSE, an error is returned (function
#' would return the input dataframe unchanged if input was allowed).
Expand Down
4 changes: 2 additions & 2 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
url: https://pkgdown.r-lib.org
url: https://usepa.github.io/TADA/
template:
bootstrap: 5
bootswatch: cerulean

50 changes: 50 additions & 0 deletions inst/extdata/statecode.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
"STATE","STATE_NAME","STUSAB","STATENS"
"01","Alabama","AL","01779775"
"04","Arizona","AZ","01779777"
"05","Arkansas","AR","00068085"
"06","California","CA","01779778"
"08","Colorado","CO","01779779"
"09","Connecticut","CT","01779780"
"10","Delaware","DE","01779781"
"11","District of Columbia","DC","01702382"
"12","Florida","FL","00294478"
"13","Georgia","GA","01705317"
"16","Idaho","ID","01779783"
"17","Illinois","IL","01779784"
"18","Indiana","IN","00448508"
"19","Iowa","IA","01779785"
"20","Kansas","KS","00481813"
"21","Kentucky","KY","01779786"
"22","Louisiana","LA","01629543"
"23","Maine","ME","01779787"
"24","Maryland","MD","01714934"
"25","Massachusetts","MA","00606926"
"26","Michigan","MI","01779789"
"27","Minnesota","MN","00662849"
"28","Mississippi","MS","01779790"
"29","Missouri","MO","01779791"
"30","Montana","MT","00767982"
"31","Nebraska","NE","01779792"
"32","Nevada","NV","01779793"
"33","New Hampshire","NH","01779794"
"34","New Jersey","NJ","01779795"
"35","New Mexico","NM","00897535"
"36","New York","NY","01779796"
"37","North Carolina","NC","01027616"
"38","North Dakota","ND","01779797"
"39","Ohio","OH","01085497"
"40","Oklahoma","OK","01102857"
"41","Oregon","OR","01155107"
"42","Pennsylvania","PA","01779798"
"44","Rhode Island","RI","01219835"
"45","South Carolina","SC","01779799"
"46","South Dakota","SD","01785534"
"47","Tennessee","TN","01325873"
"48","Texas","TX","01779801"
"49","Utah","UT","01455989"
"50","Vermont","VT","01779802"
"51","Virginia","VA","01779803"
"53","Washington","WA","01779804"
"54","West Virginia","WV","01779805"
"55","Wisconsin","WI","01779806"
"56","Wyoming","WY","01779807"
2 changes: 1 addition & 1 deletion man/HarmonizeData.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 3e954b3

Please sign in to comment.