diff --git a/.Rbuildignore b/.Rbuildignore index 5fbe8d8a..361717f7 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -7,6 +7,7 @@ man-roxygen ^\.Rproj\.user$ inst/img inst/ignore +inst/user_test_cases vignettes/margins.sty CONTRIBUTING.md ^appveyor\.yml$ @@ -17,3 +18,4 @@ inst/vign/cache README.Rmd vignettes/figure ^CONDUCT\.md$ +user_test_cases/ diff --git a/.gitignore b/.gitignore index fa73e4f2..0056eb60 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,8 @@ .Rproj.user .Rhistory .RData -rnoaa.Rproj .DS_Store inst/ignore/bath.r inst/cache/ inst/vign/cache +inst/doc diff --git a/DESCRIPTION b/DESCRIPTION index 2964f587..32791df8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -32,7 +32,9 @@ Imports: ggplot2, scales, XML, - jsonlite + jsonlite, + rappdirs, + gridExtra Suggests: testthat, roxygen2, @@ -43,5 +45,8 @@ Suggests: geojsonio, lawn, rgdal, - covr + rmarkdown, + purrr, + covr, + ggmap RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 10b6eb4f..f5b3120d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,6 +24,7 @@ export(argo_files) export(argo_plan) export(argo_qwmo) export(argo_search) +export(autoplot.meteo_coverage) export(buoy) export(buoys) export(coops_search) @@ -61,6 +62,15 @@ export(is.ncdc_stations) export(isd) export(isd_stations) export(isd_stations_search) +export(meteo_clear_cache) +export(meteo_coverage) +export(meteo_distance) +export(meteo_nearby_stations) +export(meteo_process_geographic_data) +export(meteo_pull_monitors) +export(meteo_show_cache) +export(meteo_spherical_distance) +export(meteo_tidy_ghcnd) export(ncdc) export(ncdc_combine) export(ncdc_datacats) @@ -97,6 +107,7 @@ export(swdi) export(theme_ice) export(tornadoes) export(type_summ) +export(vis_miss) importFrom(XML,htmlParse) importFrom(XML,xmlParse) importFrom(XML,xmlToList) @@ -109,6 +120,7 @@ importFrom(dplyr,bind_rows) importFrom(dplyr,contains) importFrom(dplyr,filter) importFrom(dplyr,mutate) +importFrom(dplyr,rbind_all) importFrom(dplyr,rename) importFrom(dplyr,select) importFrom(dplyr,tbl_df) @@ -122,8 +134,10 @@ importFrom(ggplot2,guide_legend) importFrom(ggplot2,guides) importFrom(ggplot2,labs) importFrom(ggplot2,scale_x_date) +importFrom(ggplot2,scale_x_datetime) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) +importFrom(gridExtra,grid.arrange) importFrom(httr,GET) importFrom(httr,add_headers) importFrom(httr,build_url) @@ -139,6 +153,8 @@ importFrom(lubridate,today) importFrom(lubridate,year) importFrom(lubridate,ymd) importFrom(methods,is) +importFrom(rappdirs,user_cache_dir) +importFrom(scales,comma) importFrom(scales,date_breaks) importFrom(scales,date_format) importFrom(stats,complete.cases) diff --git a/R/aaa.r b/R/aaa.r new file mode 100644 index 00000000..a0521395 --- /dev/null +++ b/R/aaa.r @@ -0,0 +1,12 @@ +# base app folder for rappdirs +rnoaa_app_name <- "rnoaa" +rnoaa_cache_dir <- rappdirs::user_cache_dir(rnoaa_app_name) + +# for the caches for the meteo verbs +rnoaa_meteo_dir <- file.path(rnoaa_cache_dir, "meteo") + +# Create cache dir on load if it doesn't exit +.onLoad <- function(libname, pkgname) { + dir.create(rnoaa_meteo_dir, showWarnings = FALSE, recursive = TRUE) + invisible() +} diff --git a/R/ghcnd.R b/R/ghcnd.R index fc03269d..77b54e11 100644 --- a/R/ghcnd.R +++ b/R/ghcnd.R @@ -1,45 +1,143 @@ -#' Get GHCND daily data from NOAA FTP server +#' Get a cleaned version of GHCND data from a single weather site #' -#' @export +#' This function uses ftp to access the Global Historical Climatology Network +#' daily weather data from NOAA's FTP server for a single weather monitor site. It +#' requires the site identification number for that site and will pull the +#' entire weather dataset for the site. It will then clean this data to convert +#' it to a tidier format and will also, if requested, filter it to a certain +#' date range and to certain weather variables. +#' +#' @inheritParams ghcnd +#' @param date_min A character string giving the earliest +#' date of the daily weather time series that the user would +#' like in the final output. This character string should be formatted as +#' "yyyy-mm-dd". If not specified, the default is to keep all daily data for +#' the queried weather site from the earliest available date. +#' @param date_max A character string giving the latest +#' date of the daily weather time series that the user would +#' like in the final output. This character string should be formatted as +#' "yyyy-mm-dd". If not specified, the default is to keep all daily data for +#' the queried weather site through the most current available date. +#' @param var A character vector specifying either \code{"all"} (pull all +#' available weather parameters for the site) or the weather parameters to +#' keep in the final data (e.g., \code{c("TMAX", "TMIN")} to only keep +#' maximum and minimum temperature). Example choices for this argument include: +#' \itemize{ +#' \item \code{PRCP}: Precipitation, in tenths of millimeters +#' \item \code{TAVG}: Average temperature, in tenths of degrees Celsius +#' \item \code{TMAX}: Maximum temperature, in tenths of degrees Celsius +#' \item \code{TMIN}: Minimum temperature, in tenths of degrees Celsius +#' } +#' A full list of possible weather variables is available in NOAA's README +#' file for the GHCND data (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +#' Most weather stations will only have a small subset of all the possible +#' weather variables, so the data generated by this function may not include +#' all of the variables the user specifies through this argument. #' -#' @param stationid Stationid to get -#' @param path (character) A path to store the files, Default: \code{~/.rnoaa/isd} -#' @param ... Curl options passed on to \code{\link[httr]{GET}} -#' @param n Number of rows to print -#' @param x Input object to print methods. For \code{ghcnd_splitvars()}, the output of a call -#' to \code{ghcnd()}. -#' @param date_min,date_max (character) Minimum and maximum dates. Use together to get a -#' date range -#' @param var (character) Variable to get, defaults to "all", which gives back all variables -#' in a list. To see what variables are available for a dataset, look at the dataset returned -#' from \code{ghcnd()}. +#' @return A list object with slots for each of the available specified +#' weather variables. Each element in the list is a separate time series +#' dataframe with daily observations, as well as flag values, for one of +#' the weather variables. The flag values give information on the quality +#' and source of each observation; see the NOAA README file linked above +#' for more information. #' #' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}, #' Adam Erickson \email{adam.erickson@@ubc.ca} -#' -#' @details Functions: -#' \itemize{ -#' \item \code{ghcnd_version} - Get current version of GHCND data -#' \item \code{ghcnd_stations} - Get GHCND stations and their metadata -#' \item \code{ghcnd_states} - Get US/Canada state names and 2-letter codes -#' \item \code{ghcnd_countries} - Get country names and 2-letter codes -#' \item \code{ghcnd_search} - Search GHCND data -#' \item \code{ghcnd} - Get GHCND data -#' \item \code{ghcnd_splitvars} - Split variables in data returned from \code{ghcnd} -#' \item \code{ghcnd_clear_cache} - Clear cache of locally stored files -#' } +#' +#' @note This function calls \code{\link{ghcnd}}, which will download and save +#' data from all available dates and weather variables for the queried +#' weather station. The step of limiting the dataset to only certain dates +#' and / or weather variables, using the \code{date_min}, \code{date_max}, +#' and \code{var} arguments, does not occur until after the full data has +#' been pulled. +#' +#' @seealso \code{\link{meteo_pull_monitors}}, \code{\link{meteo_tidy_ghcnd}} #' #' @examples \dontrun{ -#' # Get metadata -#' ghcnd_states() -#' ghcnd_countries() -#' ghcnd_version() #' -#' # Get stations, ghcnd-stations and ghcnd-inventory merged -#' (stations <- ghcnd_stations()) +#' # Search based on variable and/or date +#' ghcnd_search("AGE00147704", var = "PRCP") +#' ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01") +#' ghcnd_search("AGE00147704", var = "PRCP", date_max = "1915-01-01") +#' ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01", +#' date_max = "1925-01-01") +#' ghcnd_search("AGE00147704", date_min = "1920-01-01", date_max = "1925-01-01") +#' ghcnd_search("AGE00147704", var = c("PRCP","TMIN")) +#' ghcnd_search("AGE00147704", var = c("PRCP","TMIN"), date_min = "1920-01-01") +#' ghcnd_search("AGE00147704", var = "adfdf") +#' +#' } #' +#' @export +ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, var = "all", + path = "~/.rnoaa/ghcnd", ...){ + + dat <- ghcnd_splitvars(ghcnd(stationid, path = path)) + possvars <- paste0(names(dat), collapse = ", ") + + if (any(var != "all")) { + vars_null <- sort(tolower(var))[!sort(tolower(var)) %in% sort(names(dat))] + dat <- dat[tolower(var)] + } + if (any(sapply(dat, is.null))) { + dat <- noaa_compact(dat) + warning(sprintf("%s not in the dataset\nAvailable variables: %s", paste0(vars_null, collapse = ", "), possvars), call. = FALSE) + } + if (!is.null(date_min)) { + dat <- lapply(dat, function(z) z %>% dplyr::filter(date >= date_min)) + } + if (!is.null(date_max)) { + dat <- lapply(dat, function(z) z %>% dplyr::filter(date <= date_max)) + } + dat +} + +#' Get all GHCND data from a single weather site +#' +#' This function uses ftp to access the Global Historical Climatology Network +#' daily weather data from NOAA's FTP server for a single weather site. It +#' requires the site identification number for that site and will pull the +#' entire weather dataset for the site. +#' +#' @param stationid A character string giving the identification of the weather +#' station for which the user would like to pull data. To get a full and +#' current list of stations, the user can use the \code{\link{ghcnd_stations}} +#' function. To identify stations within a certain radius of a location, the +#' user can use the \code{\link{meteo_nearby_stations}} function. +#' @param path A character vector giving the path to the directory to cache +#' the files locally. By default, the function uses \code{~/.rnoaa/isd}. +#' @param ... Additional curl options to pass through to \code{\link[httr]{GET}}. +#' +#' @return A list object with a single slot, \code{data}, which contains the +#' dataframe pulled from NOAA's FTP for the queried weather site. A README +#' file with more information about the format of this file is available +#' from NOAA (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +#' This file is formatted so each line of the file gives the daily weather +#' observations for a single weather variable for all days of one month of +#' one year. In addition to measurements, columns are included for certain +#' flags, which add information on observation sources and quality and are +#' further explained in NOAA's README file for the data. +#' +#' @note This function saves the full set of weather data for the queried +#' site locally in the directory specified by the \code{path} argument. +#' +#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}, +#' Adam Erickson \email{adam.erickson@@ubc.ca} +#' +#' @seealso To generate a weather dataset for a single weather site that has been +#' cleaned to a tidier weather format, the user should use the +#' \code{\link{ghcnd_search}} function, which calls \code{\link{ghcnd}} and then +#' processes the output, or \code{\link{meteo_tidy_ghcnd}}, which wraps the +#' \code{\link{ghcnd_search}} function to output a tidy dataframe rather than a +#' list object. To pull GHCND data from multiple monitors, see +#' \code{\link{meteo_pull_monitors}}. +#' +#' @examples +#' \dontrun{ #' # Get data #' ghcnd(stationid = "AGE00147704") +#' +#' stations <- ghcnd_stations() #' ghcnd(stations$data$id[40]) #' ghcnd(stations$data$id[4000]) #' ghcnd(stations$data$id[10000]) @@ -47,7 +145,7 @@ #' ghcnd(stations$data$id[80300]) #' #' library("dplyr") -#' ghcnd(stations$data$id[80300])$data %>% select(id, element) %>% head +#' ghcnd(stations$data$id[80300])$data %>% select(id, element) %>% slice(1:3) #' #' # manipulate data #' ## using built in fxns @@ -62,17 +160,9 @@ #' dat$data %>% #' filter(element == "PRCP", year == 1909) #' -#' # Search based on variable and/or date -#' ghcnd_search("AGE00147704", var = "PRCP") -#' ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01") -#' ghcnd_search("AGE00147704", var = "PRCP", date_max = "1915-01-01") -#' ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01", date_max = "1925-01-01") -#' ghcnd_search("AGE00147704", date_min = "1920-01-01", date_max = "1925-01-01") -#' ghcnd_search("AGE00147704", var = c("PRCP","TMIN")) -#' ghcnd_search("AGE00147704", var = c("PRCP","TMIN"), date_min = "1920-01-01") -#' ghcnd_search("AGE00147704", var="adfdf") #' } - +#' +#' @export ghcnd <- function(stationid, path = "~/.rnoaa/ghcnd", ...){ csvpath <- ghcnd_local(stationid, path) if (!is_ghcnd(x = csvpath)) { @@ -82,31 +172,6 @@ ghcnd <- function(stationid, path = "~/.rnoaa/ghcnd", ...){ } } -#' @export -#' @rdname ghcnd -ghcnd_search <- function(stationid, date_min = NULL, date_max = NULL, var = "all", - path = "~/.rnoaa/ghcnd", ...){ - - dat <- ghcnd_splitvars(ghcnd(stationid, path = path)) - possvars <- paste0(names(dat), collapse = ", ") - - if (any(var != "all")) { - vars_null <- sort(tolower(var))[!sort(tolower(var)) %in% sort(names(dat))] - dat <- dat[tolower(var)] - } - if (any(sapply(dat, is.null))) { - dat <- noaa_compact(dat) - warning(sprintf("%s not in the dataset\nAvailable variables: %s", paste0(vars_null, collapse = ", "), possvars), call. = FALSE) - } - if (!is.null(date_min)) { - dat <- lapply(dat, function(z) z %>% dplyr::filter(date > date_min)) - } - if (!is.null(date_max)) { - dat <- lapply(dat, function(z) z %>% dplyr::filter(date < date_max)) - } - dat -} - #' @export print.ghcnd <- function(x, ..., n = 10){ cat("", sep = "\n") @@ -119,8 +184,110 @@ fm <- function(n) { gsub("\\s", "0", n) } +#' Get information on the GHCND weather stations +#' +#' This function returns an object with a dataframe with meta-information about +#' all available GHCND weather stations. +#' +#' @inheritParams ghcnd +#' +#' @return This function returns a list with a single element-- a dataframe with +#' a weather station on each row with the following columns: +#' \itemize{ +#' \item \code{id}: The weather station's ID number. The first two letters +#' denote the country (using FIPS country codes). +#' \item \code{latitude}: The station's latitude, in decimal degrees. +#' Southern latitudes will be negative. +#' \item \code{longitude}: The station's longitude, in decimal degrees. +#' Western longitudes will be negative. +#' \item \code{elevation}: The station's elevation, in meters. +#' \item \code{name}: The station's name. +#' \item \code{gsn_flag}: "GSN" if the monitor belongs to the GCOS Surface +#' Network (GSN). Otherwise either blank or missing. +#' \item \code{wmo_id}: If the station has a WMO number, this column gives +#' that number. Otherwise either blank or missing. +#' \item \code{element}: A weather variable recorded at some point during +#' that station's history. See the link below in "References" for definitions +#' of the abbreviations used for this variable. +#' \item \code{first_year}: The first year of data available at that station +#' for that weather element. +#' \item \code{last_year}: The last year of data available at that station +#' for that weather element. +#' } +#' If a weather station has data on more than one weather variable, it will +#' be represented in multiple rows of this output dataframe. +#' +#' @note Since this function is pulling a large dataset by ftp, it may take +#' a while to run. +#' +#' @references +#' +#' For more documentation on the returned dataset, see +#' \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. +#' +#' @examples +#' \dontrun{ +#' # Get stations, ghcnd-stations and ghcnd-inventory merged +#' stations <- ghcnd_stations() +#' } +#' +#' @export +ghcnd_stations <- function(...){ + sta <- get_stations(...) + inv <- get_inventory(...) + structure(list(data = merge(sta, inv[,-c(2,3)], by = "id")), class = "ghcnd_stations") +} + +get_stations <- function(...){ + res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt", ...) + df <- read.fwf(textConnection(utcf8(res)), widths = c(11, 9, 11, 7, 33, 5, 10), + header = FALSE, strip.white = TRUE, comment.char = "", stringsAsFactors = FALSE) + nms <- c("id","latitude", "longitude", "elevation", "name", "gsn_flag", "wmo_id") + setNames(df, nms) +} + +get_inventory <- function(...){ + res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt", ...) + df <- read.fwf(textConnection(utcf8(res)), widths = c(11, 9, 10, 5, 5, 5), + header = FALSE, strip.white = TRUE, comment.char = "", stringsAsFactors = FALSE) + nms <- c("id","latitude", "longitude", "element", "first_year", "last_year") + setNames(df, nms) +} + +#' Print out GHCND stations +#' +#' This function prints out, in a nice format, the object returned by +#' \code{\link{ghcnd_stations}}. +#' +#' @param x The object returned by a call to \code{\link{ghcnd_stations}}. +#' @param n Number of rows of station data to print. +#' @param ... Other parameters to pass to \code{print}. +#' +#' @examples +#' \dontrun{ +#' stations <- ghcnd_stations() +#' print(stations) +#' } +#' +#' @export +print.ghcnd_stations <- function(x, ..., n = 10){ + cat("", sep = "\n") + cat(sprintf("Size: %s X %s\n", NROW(x$data), NCOL(x$data)), sep = "\n") + trunc_mat_(x$data, n = n) +} + +#' Split variables in data returned from \code{ghcnd} +#' +#' This function is a helper function for \code{\link{ghcnd_search}}. It helps +#' with cleaning up the data returned from \code{\link{ghcnd}}, to get it in a +#' format that is easier to work with using R. +#' +#' @param x An object returned from \code{\link{ghcnd}}. +#' +#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}, +#' Adam Erickson \email{adam.erickson@@ubc.ca} +#' #' @export -#' @rdname ghcnd ghcnd_splitvars <- function(x){ tmp <- x$data tmp <- tmp[!is.na(tmp$id), ] @@ -170,39 +337,33 @@ strex <- function(x) str_extract_(x, "[0-9]+") # merge(x[[2]], x[[3]] %>% select(-id), by='date') # } +#' Get meta-data on the GHCND daily data +#' +#' These function allow you to pull the current versions of certain meta-datasets +#' for the GHCND, including lists of country and state abbreviations used in some +#' of the weather station IDs and information about the current version of the +#' data. +#' +#' @inheritParams ghcnd +#' +#' @author Scott Chamberlain \email{myrmecocystus@@gmail.com}, +#' Adam Erickson \email{adam.erickson@@ubc.ca} +#' +#' @details Functions: +#' \itemize{ +#' \item \code{ghcnd_version}: Get current version of GHCND data +#' \item \code{ghcnd_states}: Get US/Canada state names and 2-letter codes +#' \item \code{ghcnd_countries}: Get country names and 2-letter codes +#' } +#' +#' @examples \dontrun{ +#' # Get metadata +#' ghcnd_states() +#' ghcnd_countries() +#' ghcnd_version() +#' } +#' #' @export -#' @rdname ghcnd -ghcnd_stations <- function(..., n = 10){ - sta <- get_stations(...) - inv <- get_inventory(...) - structure(list(data = merge(sta, inv[,-c(2,3)], by = "id")), class = "ghcnd_stations") -} - -#' @export -print.ghcnd_stations <- function(x, ..., n = 10){ - cat("", sep = "\n") - cat(sprintf("Size: %s X %s\n", NROW(x$data), NCOL(x$data)), sep = "\n") - trunc_mat_(x$data, n = n) -} - -get_stations <- function(...){ - res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt", ...) - df <- read.fwf(textConnection(utcf8(res)), widths = c(11, 9, 11, 7, 33, 5, 10), - header = FALSE, strip.white = TRUE, comment.char = "", stringsAsFactors = FALSE) - nms <- c("id","latitude", "longitude", "elevation", "name", "gsn_flag", "wmo_id") - setNames(df, nms) -} - -get_inventory <- function(...){ - res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-inventory.txt", ...) - df <- read.fwf(textConnection(utcf8(res)), widths = c(11, 9, 10, 5, 5, 5), - header = FALSE, strip.white = TRUE, comment.char = "", stringsAsFactors = FALSE) - nms <- c("id","latitude", "longitude", "element", "first_year", "last_year") - setNames(df, nms) -} - -#' @export -#' @rdname ghcnd ghcnd_states <- function(...){ # res <- suppressWarnings(GET("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-states.txt", ...)) res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-states.txt", ...) @@ -212,6 +373,23 @@ ghcnd_states <- function(...){ df[ -NROW(df) ,] } +#' @export +#' @rdname ghcnd_states +ghcnd_countries <- function(...){ + res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-countries.txt", ...) + df <- read.fwf(textConnection(utcf8(res)), widths = c(2, 47), + header = FALSE, strip.white = TRUE, comment.char = "", + stringsAsFactors = FALSE, col.names = c("code","name")) + df[ -NROW(df) ,] +} + +#' @export +#' @rdname ghcnd_states +ghcnd_version <- function(...){ + res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-version.txt", ...) + utcf8(res) +} + GET_retry <- function(url, ..., times = 3) { res <- suppressWarnings(GET(url, ...)) if (res$status_code > 226) { @@ -228,23 +406,6 @@ GET_retry <- function(url, ..., times = 3) { return(res) } -#' @export -#' @rdname ghcnd -ghcnd_countries <- function(...){ - res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-countries.txt", ...) - df <- read.fwf(textConnection(utcf8(res)), widths = c(2, 47), - header = FALSE, strip.white = TRUE, comment.char = "", - stringsAsFactors = FALSE, col.names = c("code","name")) - df[ -NROW(df) ,] -} - -#' @export -#' @rdname ghcnd -ghcnd_version <- function(...){ - res <- GET_retry("ftp://ftp.ncdc.noaa.gov/pub/data/ghcn/daily/ghcnd-version.txt", ...) - utcf8(res) -} - ghcnd_zip <- function(x){ "adf" } @@ -252,10 +413,11 @@ ghcnd_zip <- function(x){ ghcnd_GET <- function(bp, stationid, ...){ dir.create(bp, showWarnings = FALSE, recursive = TRUE) fp <- ghcnd_local(stationid, bp) - res <- suppressWarnings(GET(ghcnd_remote(stationid), ...)) + res <- suppressWarnings(httr::GET(ghcnd_remote(stationid), ...)) tt <- utcf8(res) vars <- c("id","year","month","element",do.call("c", lapply(1:31, function(x) paste0(c("VALUE","MFLAG","QFLAG","SFLAG"), x)))) - df <- read.fwf(textConnection(tt), c(11,4,2,4,rep(c(5,1,1,1), 31))) + df <- read.fwf(textConnection(tt), c(11,4,2,4,rep(c(5,1,1,1), 31)), + na.strings = "-9999") dat <- setNames(df, vars) write.csv(dat, fp, row.names = FALSE) return(dat) diff --git a/R/globals.R b/R/globals.R index 66d4574e..6aded1b8 100644 --- a/R/globals.R +++ b/R/globals.R @@ -1,2 +1,4 @@ if(getRversion() >= "2.15.1") - utils::globalVariables(c('storm_columns','storm_names','value','id','element','day')) + utils::globalVariables(c('storm_columns','storm_names','value','id','element', + 'day', '.')) + diff --git a/R/helpers_ghcnd.R b/R/helpers_ghcnd.R new file mode 100644 index 00000000..99a5dba0 --- /dev/null +++ b/R/helpers_ghcnd.R @@ -0,0 +1,223 @@ +#' Pull GHCND weather data for multiple weather monitors +#' +#' This function takes a vector of one or more weather station IDs. It will pull +#' the weather data from the Global Historical Climatology Network's daily +#' data (GHCND) for each of the stations and join them together in a single tidy +#' dataframe. For any weather stations that the user calls that are not +#' available by ftp from GHCND, the function will return a warning +#' giving the station ID. +#' +#' @param monitors A character vector listing the station IDs for all +#' weather stations the user would like to pull. To get a full and +#' current list of stations, the user can use the \code{\link{ghcnd_stations}} +#' function. To identify stations within a certain radius of a location, the +#' user can use the \code{\link{meteo_nearby_stations}} function. +#' @inheritParams meteo_tidy_ghcnd +#' @inheritParams ghcnd_search +#' +#' @return A data frame of daily weather data for multiple weather monitors, +#' converted to a tidy format. All weather variables may not exist for all +#' weather stations. Examples of variables returned are: +#' \itemize{ +#' \item \code{id}: Character string with the weather station site id +#' \item \code{date}: Date of the observation +#' \item \code{prcp}: Precipitation, in tenths of mm +#' \item \code{tavg}: Average temperature, in tenths of degrees Celsius +#' \item \code{tmax}: Maximum temperature, in tenths of degrees Celsius +#' \item \code{tmin}: Minimum temperature, in tenths of degrees Celsius +#' \item \code{awnd}: Average daily wind speed, in meters / second +#' \item \code{wsfg}: Peak gust wind speed, in meters / second +#' } +#' There are other possible weather variables in the Global Historical +#' Climatology Network; see +#' \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt} for a full +#' list. If the \code{var} argument is something other than "all", then +#' only variables included in that argument will be included in the output +#' data frame. All variables are in the units specified in the linked file +#' (note that, in many cases, measurements are given in tenths of the units +#' more often used, e.g., tenths of degrees for temperature). All column names +#' correspond to variable names in the linked file, but with all uppercase +#' letters changed to lowercase. +#' +#' @note The weather flags, which are kept by specifying +#' \code{keep_flags = TRUE} are: +#' \itemize{ +#' \item \code{*_mflag}: Measurement flag, which gives some information on how +#' the observation was measured. +#' \item \code{*_qflag}: Quality flag, which gives quality information on the +#' measurement, like if it failed to pass certain quality checks. +#' \item \code{*_sflag}: Source flag. This gives some information on the +#' weather collection system (e.g., U.S. Cooperative Summary of the Day, +#' Australian Bureau of Meteorology) the weather observation comes from. +#' } +#' More information on the interpretation of these flags can be found in the +#' README file for the NCDC's Daily Global Historical Climatology Network's +#' data at \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. +#' +#' @note This function converts any value of -9999 to a missing value for the +#' variables "prcp", "tmax", "tmin", "tavg", "snow", and "snwd". However, +#' for some weather observations, there still may be missing values coded +#' using a series of "9"s of some length. You will want to check your final +#' data to see if there are lurking missing values given with series of "9"s. +#' +#' @note This function may take a while to run. +#' +#' @author Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @references +#' +#' For more information about the data pulled with this function, see: +#' +#' Menne, M.J., I. Durre, R.S. Vose, B.E. Gleason, and T.G. Houston, 2012: +#' An overview of the Global Historical Climatology Network-Daily Database. +#' Journal of Atmospheric and Oceanic Technology, 29, 897-910, +#' doi:10.1175/JTECH-D-11-00103.1. +#' +#' @examples +#' \dontrun{ +#' +#' monitors <- c("ASN00003003", "ASM00094299", "ASM00094995", "ASM00094998") +#' all_monitors_clean <- meteo_pull_monitors(monitors) +#' +#' } +#' +#' @export +meteo_pull_monitors <- function(monitors, keep_flags = FALSE, date_min = NULL, + date_max = NULL, var = "all"){ + monitors <- unique(monitors) + + safe_meteo_tidy_ghcnd <- purrr::safely(meteo_tidy_ghcnd) + all_monitors_clean <- lapply(monitors, safe_meteo_tidy_ghcnd, + keep_flags = keep_flags, date_min = date_min, + date_max = date_max, var = var) + + check_station <- sapply(all_monitors_clean, function(x) is.null(x$result)) + bad_stations <- monitors[check_station] + if(length(bad_stations) > 0){ + warning(paste("The following stations could not be pulled from", + "the GHCN ftp:\n", paste(bad_stations, collapse = ", "), + "\nAny other monitors were successfully pulled from GHCN.")) + } + + all_monitors_out <- lapply(all_monitors_clean[!check_station], + function(x) x$result) + all_monitors_out <- suppressWarnings(dplyr::bind_rows(all_monitors_out)) + return(all_monitors_out) +} + +#' Create a tidy GHCND dataset from a single monitor +#' +#' This function inputs an object created by \code{\link{ghcnd}} and cleans up +#' the data into a tidy form. +#' +#' @param keep_flags TRUE / FALSE for whether the user would like to keep all the flags +#' for each weather variable. The default is to not keep the flags (FALSE). +#' See the note below for more information on these flags. +#' @inheritParams ghcnd_search +#' +#' @return A data frame of daily weather data for a single weather monitor, +#' converted to a tidy format. All weather variables may not exist for all +#' weather stations. Examples of variables returned are: +#' \itemize{ +#' \item \code{id}: Character string with the weather station site id +#' \item \code{date}: Date of the observation +#' \item \code{prcp}: Precipitation, in mm +#' \item \code{tavg}: Average temperature, in degrees Celsius +#' \item \code{tmax}: Maximum temperature, in degrees Celsius +#' \item \code{tmin}: Minimum temperature, in degrees Celsius +#' \item \code{awnd}: Average daily wind speed, in meters / second +#' \item \code{wsfg}: Peak gust wind speed, in meters / second +#' } +#' There are other possible weather variables in the Global Historical +#' Climatology Network; see +#' \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt} for a full +#' list. The variables \code{prcp}, \code{tmax}, \code{tmin}, and \code{tavg} +#' have all been converted from tenths of their metric to the metric (e.g., +#' from tenths of degrees Celsius to degrees Celsius). All other variables +#' are in the units specified in the linked file. +#' +#' @note The weather flags, which are kept by specifying +#' \code{keep_flags = TRUE} are: +#' \itemize{ +#' \item \code{*_mflag}: Measurement flag, which gives some information on how +#' the observation was measured. +#' \item \code{*_qflag}: Quality flag, which gives quality information on the +#' measurement, like if it failed to pass certain quality checks. +#' \item \code{*_sflag}: Source flag. This gives some information on the +#' weather collection system (e.g., U.S. Cooperative Summary of the Day, +#' Australian Bureau of Meteorology) the weather observation comes from. +#' } +#' More information on the interpretation of these flags can be found in the +#' README file for the NCDC's Daily Global Historical Climatology Network's +#' data at \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. +#' +#' @author Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @seealso \code{\link{meteo_pull_monitors}} +#' +#' @examples +#' \dontrun{ +#' # One station in Australia is ASM00094275 +#' meteo_tidy_ghcnd(stationid = "ASN00003003") +#' meteo_tidy_ghcnd(stationid = "ASN00003003", var = "tavg") +#' meteo_tidy_ghcnd(stationid = "ASN00003003", date_min = "1950-01-01") +#' } +#' +#' @importFrom dplyr %>% +#' +#' @export +meteo_tidy_ghcnd <- function(stationid, keep_flags = FALSE, var = "all", + date_min = NULL, date_max = NULL){ + + dat <- suppressWarnings(ghcnd_search(stationid = stationid, var = var, + date_min = date_min, + date_max = date_max)) %>% + lapply(meteo_tidy_ghcnd_element, keep_flags = keep_flags) + cleaned_df <- do.call(rbind.data.frame, dat) %>% + tidyr::spread_("key", "value") + + which_vars_to_clean <- which(colnames(cleaned_df) %in% + c("prcp", "tmax", "tmin", "tavg", "snow", "snwd")) + cleaned_df <- dplyr::tbl_df(cleaned_df) + cleaned_df[, which_vars_to_clean] <- vapply(cleaned_df[ , which_vars_to_clean], + FUN.VALUE = numeric(nrow(cleaned_df)), + FUN = function(x){ + x <- ifelse(x == -9999, NA, x) + x <- as.numeric(x) + }) + return(cleaned_df) +} + +#' Restructure element of ghcnd_search list +#' +#' This function restructures a single element of the list object created +#' by \code{\link{ghcnd_search}}, to add a column giving the variable name +#' (\code{key}) and change the name of the variable column to \code{value}. +#' These changes facilitate combining all elements from the list created by +#' \code{\link{ghcnd_search}}, to create a tidy dataframe of the weather +#' observations from the station. +#' +#' @param x A dataframe with daily observations for a single monitor for a +#' single weather variable. This dataframe is one of the elements returned +#' by \code{\link{ghcnd_search}}. +#' @inheritParams meteo_tidy_ghcnd +#' +#' @return A dataframe reformatted to allow easy aggregation of all weather +#' variables for a single monitor. +#' +#' @author Brooke Anderson \email{brooke.anderson@@colostate.edu} +meteo_tidy_ghcnd_element <- function(x, keep_flags = FALSE){ + var_name <- colnames(x)[2] + if(keep_flags){ + flag_locs <- grep("flag", colnames(x)) + colnames(x)[flag_locs] <- paste(colnames(x)[flag_locs], var_name, sep = "_") + x <- tidyr::gather_(x, "key", "value", + gather_cols = dplyr::select_vars(names(x), -id, -date)) + } else { + x <- dplyr::select_(x, "-dplyr::ends_with('flag')") + x <- tidyr::gather_(x, "key", "value", + gather_cols = dplyr::select_vars(names(x), -id, -date)) + } + return(x) +} + diff --git a/R/meteo_cache.r b/R/meteo_cache.r new file mode 100644 index 00000000..9dd5fd0d --- /dev/null +++ b/R/meteo_cache.r @@ -0,0 +1,22 @@ +#' Clear \emph{meteo} cached files +#' +#' The \emph{meteo} functions use an aplication +#' +#' @note This function will clear all cached \emph{meteo} files. +#' @family meteo +#' @export +meteo_clear_cache <- function() { + files <- list.files(rnoaa_cache_dir, full.names = TRUE) + unlink(files, recursive = TRUE, force = force) +} + +#' Show the \emph{meteo} cache directory +#' +#' Displays the full path to the \code{meteo} cache directory +#' +#' @family meteo +#' @export +meteo_show_cache <- function() { + cat(rnoaa_cache_dir, "\n") +} + diff --git a/R/meteo_distance.R b/R/meteo_distance.R new file mode 100644 index 00000000..f12b5862 --- /dev/null +++ b/R/meteo_distance.R @@ -0,0 +1,283 @@ +#' Find weather monitors near locations +#' +#' This function inputs a dataframe with latitudes and longitudes of locations +#' and creates a dataframe with monitors within a certain radius of those +#' locations. The function can also be used, with the \code{limit} argument, to pull +#' a certain number of the closest weather monitors to each location. +#' The weather monitor IDs in the output dataframe can be used with other +#' \code{rnoaa} functions to pull data from all available weather stations near +#' a location (e.g., \code{\link{meteo_pull_monitors}}). +#' +#' Great circle distance is used to determine whether a weather monitor is +#' within the required radius. +#' +#' @param lat_lon_df A dataframe that contains the latitude, longitude, and +#' a unique identifier for each location (\code{id}). For an example of the +#' proper format for this dataframe, see the examples below. Latitude and +#' longitude must both be in units of decimal degrees. Southern latitudes +#' and Western longitudes should be given as negative values. +#' @param lat_colname A character string giving the name of the latitude column +#' in the \code{lat_lon_df} dataframe. +#' @param lon_colname A character string giving the name of the longitude column +#' in the \code{lat_lon_df} dataframe. +#' @param station_data The output of \code{ghcnd_stations()[[1]]}, which is +#' a current list of weather stations available through NOAA for the GHCND +#' dataset. The format of this is a dataframe +#' with one row per weather station. Latitude and longitude for the station +#' locations should be in columns with the names "latitude" and "longitude", +#' consistent with the output from \code{ghcnd_stations()[[1]]}. To save time, run the +#' \code{ghcnd_stations} call and save the output to an object, rather than +#' rerunning the default every time (see the examples in +#' \code{\link{meteo_nearby_stations}}). +#' @param year_min A numeric value giving the earliest year from which you +#' ultimately want weather data (e.g., 2013, if you only are interested in +#' data from 2013 and later). +#' @param year_max A numeric value giving the latest year from which you +#' ultimately want weather data. +#' @param radius A numeric vector giving the radius (in kilometers) within which +#' to search for monitors near a location. +#' @param limit An integer giving the maximum number of monitors to include for +#' each location. The [x] closest monitors will be kept. Default is NULL +#' (pull everything available, within the radius if the radius is specified). +#' @inheritParams ghcnd_search +#' +#' @return A list containing dataframes with the sets of unique weather stations within +#' the search radius for each location. Site IDs for the weather stations +#' given in this dataframe can be used in conjunction with other functions in the +#' \code{rnoaa} package to pull weather data for the station. The dataframe +#' for each location includes: +#' \itemize{ +#' \item \code{id}: The weather station ID, which can be used in other +#' functions to pull weather data from the station; +#' \item \code{name}: The weather station name; +#' \item \code{latitude}: The station's latitude, in decimal degrees. Southern +#' latitudes will be negative; +#' \item \code{longitude}: The station's longitude, in decimal degrees. Western +#' longitudes will be negative; +#' \item \code{distance}: The station's distance, in kilometers, from the +#' location. +#' } +#' +#' @note By default, this function will pull the full station list from NOAA +#' to use to identify nearby locations. If you will be creating lists of +#' monitors nearby several stations, you can save some time by using the +#' \code{\link{ghcnd_stations}} function separately to create an object +#' with all stations and then use the argument \code{station_data} in +#' this function to reference that object, rather than using this function's +#' defaults (see examples). +#' +#' @seealso The weather monitor IDs generated by this function can be used in +#' other functions in the \code{rnoaa} package, like +#' \code{\link{meteo_pull_monitors}} and \code{\link{meteo_tidy_ghcnd}}, to +#' pull weather data from weather monitors near a location. +#' +#' @author Alex Simmons \email{a2.simmons@@qut.edu.au}, +#' Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @examples +#' \dontrun{ +#' +#' station_data <- ghcnd_stations()[[1]] # Takes a while to run +#' +#' lat_lon_df <- data.frame(id = c("sydney", "brisbane"), +#' latitude = c(-33.8675, -27.4710), +#' longitude = c(151.2070, 153.0234)) +#' nearby_stations <- meteo_nearby_stations(lat_lon_df = lat_lon_df, +#' station_data = station_data, radius = 10) +#' +#' miami <- data.frame(id = "miami", latitude = 25.7617, longitude = -80.1918) +#' +#' # Get all stations within 50 kilometers +#' meteo_nearby_stations(lat_lon_df = miami, station_data = station_data, +#' radius = 50, var = c("PRCP", "TMAX"), +#' year_min = 1992, year_max = 1992) +#' # Get the closest 10 monitors +#' meteo_nearby_stations(lat_lon_df = miami, station_data = station_data, +#' limit = 10, var = c("PRCP", "TMAX"), +#' year_min = 1992, year_max = 1992) +#' } +#' +#' @importFrom dplyr %>% +#' +#' @export +meteo_nearby_stations <- function(lat_lon_df, lat_colname = "latitude", + lon_colname = "longitude", + station_data = ghcnd_stations()[[1]], + var = "all", year_min = NULL, + year_max = NULL, radius = NULL, + limit = NULL){ + var <- tolower(var) + + # Ensure `id` in `lat_lon_df` is character, not factor + lat_lon_df$id <- as.character(lat_lon_df$id) + + # Handle generic values for `var`, `year_min`, and `year_max` arguments + if(is.null(year_min)) year_min <- min(station_data$first_year, na.rm = TRUE) + if(is.null(year_max)) year_max <- max(station_data$last_year, na.rm = TRUE) + if(length(var) == 1 && var == "all"){ + var <- unique(station_data$element) + } + + dots <- list(~last_year >= year_min & first_year <= year_max & + element %in% toupper(var) & !is.na(element)) + station_data <- dplyr::filter_(station_data, .dots = dots) %>% + dplyr::select_(~id, ~name, ~latitude, ~longitude) %>% + dplyr::distinct_() + + location_stations <- as.data.frame(lat_lon_df) %>% + split(.[, "id"]) %>% + purrr::map(function(x) { + station_ids <- meteo_distance(station_data = station_data, + lat = x[ , lat_colname], + long = x[ , lon_colname], + radius = radius, + limit = limit) + return(station_ids) + }) + return(location_stations) +} + +#' Find all monitors within a radius of a location +#' +#' This function will identify all weather stations with a specified radius of +#' a location. If no radius is given, the function will return a dataframe +#' of all available monitors, sorted by distance to the location. The +#' \code{limit} argument can be used to limit the output dataframe to the [x] +#' closest monitors to the location. +#' +#' @param lat Latitude of the location. Southern latitudes should be given +#' as negative values. +#' @param long Longitude of the location. Western longitudes should be given as +#' negative values. +#' @param units Units of the latitude and longitude values. Possible values +#' are: +#' \itemize{ +#' \item \code{deg}: Degrees (default); +#' \item \code{rad}: Radians. +#' } +#' @inheritParams meteo_nearby_stations +#' +#' @return A dataframe of weather stations near the location. This is the +#' single-location version of the return value for +#' \code{\link{meteo_nearby_stations}}. +#' +#' @author Alex Simmons \email{a2.simmons@@qut.edu.au}, +#' Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @export +meteo_distance <- function(station_data, lat, long, + units = 'deg', radius = NULL, limit = NULL) { + + data <- meteo_process_geographic_data( + station_data = station_data, + lat = lat, + long = long + ) + + if(!is.null(radius)) { + data <- data[data$distance < radius, ] + } + + if(!is.null(limit)) { + data <- data[1:min(limit, nrow(data)), ] + } + return(data) +} + +#' Calculate the distances between a location and all available stations +#' +#' This function takes a single location and a dataset of available weather stations +#' and calculates the distance between the location and each of the stations, +#' using the great circle method. A new column is added to the dataset of +#' available weather stations giving the distance between each station and +#' the input location. The station dataset is then sorted from closest to +#' furthest distance to the location and returned as the function output. +#' +#' @inheritParams meteo_distance +#' +#' @return The \code{station_data} dataframe that is input, but with a +#' \code{distance} column added that gives the distance to the location +#' (in kilometers), and re-ordered by distance between each station and +#' the location (closest weather stations first). +#' +#' @author Alex Simmons \email{a2.simmons@@qut.edu.au}, +#' Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @export +meteo_process_geographic_data <- function(station_data, + lat, + long, + units = 'deg') { + + # Convert headers to lowercase for consistency across code + names(station_data) <- tolower(names(station_data)) + + # Caluclate distance between points + station_data$distance <- meteo_spherical_distance(lat1 = lat, long1 = long, + lat2 = station_data$latitude, + long2 = station_data$longitude, + units = "deg") + + # Sort data into ascending order by distance column + station_data <- dplyr::arrange_(station_data, ~ distance) + + return(station_data) +} # End meteo_process_geographic_data + +#' Calculate the distance between two locations +#' +#' This function uses the haversine formula to calculate the great circle +#' distance between two locations, identified by their latitudes and longitudes. +#' +#' @param lat1 Latitude of the first location. +#' @param long1 Longitude of the first location. +#' @param lat2 Latitude of the second location. +#' @param long2 Longitude of the second location. +#' @inheritParams meteo_distance +#' +#' @return A numeric value giving the distance (in kilometers) between the +#' pair of locations. +#' +#' @note This function assumes an earth radius of 6,371 km. +#' +#' @author Alex Simmons \email{a2.simmons@@qut.edu.au}, +#' Brooke Anderson \email{brooke.anderson@@colostate.edu} +#' +#' @examples +#' +#' meteo_spherical_distance(lat1 = -27.4667, long1 = 153.0217, +#' lat2 = -27.4710, long2 = 153.0234) +#' +#' @export +meteo_spherical_distance <- function(lat1, long1, lat2, long2, units = 'deg') { + + radius_earth <- 6371 + + # Convert angle values into radians + if (units == 'deg') { + lat1 <- deg2rad(lat1) + long1 <- deg2rad(long1) + lat2 <- deg2rad(lat2) + long2 <- deg2rad(long2) + } else if(units != 'rad'){ + stop("The `units` argument must be `deg` or `rad`.") + } + + # Determine distance using the haversine formula, assuming a spherical earth + a <- sin((lat2 - lat1) / 2) ^ 2 + cos(lat1) * cos(lat2) * + sin((long2 - long1) / 2) ^ 2 + + d <- 2 * atan2(sqrt(a), sqrt(1 - a)) * radius_earth + return(d) + +} # End calculate_spherical_distance + +#' Convert from degrees to radians +#' +#' @param deg A numeric vector in units of degrees. +#' +#' @return The input numeric vector, converted to units of radians. +deg2rad <- function(deg) { + return(deg*pi/180) +} # End deg2rad + diff --git a/R/meteo_utils.r b/R/meteo_utils.r new file mode 100644 index 00000000..924a2518 --- /dev/null +++ b/R/meteo_utils.r @@ -0,0 +1,139 @@ +#' Determine the "coverage" for a station data frame +#' +#' Call this function after pulling down observations for a set of stations +#' to retrieve the "coverage" (i.e. how complete each field is). If either +#' or both \code{obs_start_date} or \code{obs_end_date} are specified, +#' the coverage test will be limited to that date range. +#' +#' There is an \code{autoplot} method for the output of this function. +#' @importFrom scales comma +#' @param meteo_df a \emph{meteo} \code{data.frame} +#' @param obs_start_date specify either or both (obs_start_date, obs_end_date) to constrain +#' coverate tests. These should be \code{Date} objects. +#' @param obs_end_date specify either or both (obs_start_date, obs_end_date) to constrain +#' coverate tests. These should be \code{Date} objects. +#' @param verbose if \code{TRUE} will display the coverage summary along +#' with returning the coverage data.frame +#' @param df The dataframe resulting from a call to \code{meteo_coverage}, +#' used as an input to \code{autoplot.meteo_coverage} +#' @return a \code{data.frame} with the coverage for each station, minimally +#' containing: \preformatted{ +#' $ id (chr) +#' $ start_date (time) +#' $ end_date (time) +#' $ total_obs (int) +#' } +#' with additional fields (and their coverage percent) depending on which +#' weather variables were queried and available for the weather station. +#' @export +#' @examples \dontrun{ +#' +#' monitors <- c("ASN00095063", "ASN00024025", "ASN00040112", "ASN00041023", +#' "ASN00009998", "ASN00066078", "ASN00003069", "ASN00090162", +#' "ASN00040126", "ASN00058161") +#' obs <- meteo_pull_monitors(monitors) +#' obs_covr <- meteo_coverage(obs) +#' autoplot.meteo_coverage(obs_covr) +#' +#' } +meteo_coverage <- function(meteo_df, + obs_start_date=NULL, + obs_end_date=NULL, + verbose=FALSE) { + + if (!is.null(obs_start_date)) { + dots <- list(~as.Date(date) >= obs_start_date) + meteo_df <- dplyr::filter_(meteo_df, .dots = dots) + } + + if (!is.null(obs_end_date)) { + dots <- list(~as.Date(date) <= obs_end_date) + meteo_df <- dplyr::filter_(meteo_df, .dots = dots) + } + + dplyr::group_by_(meteo_df, ~id) %>% + dplyr::do({ + rng <- range(.$date) + dat <- data.frame(start_date = rng[1], + end_date = rng[2], + total_obs = nrow(.), stringsAsFactors=FALSE) + if (verbose) cat(sprintf("Station Id: %s\n", .$id[1])) + if (verbose) cat(sprintf("\n Date range for observations: %s\n\n", + paste0(as.character(rng), sep="", collapse=" to "))) + if (verbose) cat(sprintf(" Total number of observations: %s\n\n", + scales::comma(nrow(.)))) + meteo_cols <- dplyr::setdiff(colnames(.), c("id", "date")) + col_cov <- lapply(meteo_cols, function(x, n) { + if (verbose) cat(sprintf(" Column %s completeness: %5s\n", + formatC(sprintf("'%s'", x), width = (n+2)), + scales::percent(sum(!is.na(.[,x])) / nrow(.)))) + sum(!is.na(.[,x])) / nrow(.) + }, max(vapply(colnames(.), nchar, numeric(1), USE.NAMES=FALSE))) + if (verbose) cat("\n") + col_cov <- setNames(cbind.data.frame(col_cov, stringsAsFactors=FALSE), meteo_cols) + dplyr::bind_cols(dat, col_cov) + }) -> out + class(out) <- c("meteo_coverage", class(out)) + if (verbose) return(invisible(out)) + out +} + +#' @export +#' @rdname meteo_coverage +autoplot.meteo_coverage <- function(df) { + + gg <- ggplot2::ggplot(df) + gg <- gg + ggplot2::geom_segment(data = df, + ggplot2::aes_(x = ~ stats::reorder(id, start_date), + xend = ~ stats::reorder(id, start_date), + y = ~ start_date, yend = ~ end_date)) + gg <- gg + ggplot2::scale_x_discrete(expand = c(0, 0.25)) + # gg <- gg + ggplot2::scale_y_datetime(expand = c(0, 0)) + gg <- gg + ggplot2::coord_flip() + gg <- gg + ggplot2::labs(x = NULL, y = NULL, title = "Time coverage by station") + gg <- gg + ggplot2::theme_bw(base_family = "Arial Narrow") + gg <- gg + ggplot2::theme(panel.grid = ggplot2::element_line(color="#b2b2b2", size=0.1)) + gg <- gg + ggplot2::theme(panel.grid.major.x = ggplot2::element_line(color = "#b2b2b2", size = 0.1)) + gg <- gg + ggplot2::theme(panel.grid.major.y = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(panel.border = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(axis.ticks = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(plot.title = ggplot2::element_text(margin = ggplot2::margin(b = 12))) + ggtime <- gg + + df_reduced <- dplyr::select_(df, .dots = c('-start_date', '-end_date', + '-total_obs')) + df_long <- tidyr::gather_(df_reduced, + key_col = "observation", value_col = "value", + gather_cols = colnames(df_reduced[-1])) + + gg <- ggplot2::ggplot(df_long) + gg <- gg + ggplot2::geom_segment(ggplot2::aes_(x = 0, xend = ~ value, + y = ~ observation, yend = ~ observation, + group = ~ id)) + #gg <- gg + ggplot2::scale_x_continuous(labels = percent, limits = c(0, 1)) + gg <- gg + ggplot2::facet_wrap(~id, scales = "free_x") + gg <- gg + ggplot2::labs(x = NULL, y = NULL, title = "Observation coverage by station") + gg <- gg + ggplot2::theme_bw(base_family = "Arial Narrow") + gg <- gg + ggplot2::theme(panel.grid = ggplot2::element_line(color = "#b2b2b2", + size = 0.1)) + gg <- gg + ggplot2::theme(panel.grid.major.x = ggplot2::element_line(color = "#b2b2b2", + size = 0.1)) + gg <- gg + ggplot2::theme(panel.grid.major.y = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(panel.grid.minor = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(panel.border = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(axis.ticks = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(plot.title = ggplot2::element_text(margin = + ggplot2::margin(b = 12))) + gg <- gg + ggplot2::theme(strip.background = ggplot2::element_blank()) + gg <- gg + ggplot2::theme(strip.text = ggplot2::element_text(hjust = 0)) + gg <- gg + ggplot2::theme(panel.margin.x = grid::unit(12, "pt")) + gg <- gg + ggplot2::theme(panel.margin.y = grid::unit(8, "pt")) + gg <- gg + ggplot2::theme(plot.margin = ggplot2::margin(t = 30, b = 5, l = 20, r = 20)) + ggobs <- gg + + gridExtra::grid.arrange(ggtime, ggobs, ncol=1, heights=c(0.4, 0.6)) + +} + + diff --git a/R/rnoaa-package.r b/R/rnoaa-package.r index 27e90c35..097cb697 100644 --- a/R/rnoaa-package.r +++ b/R/rnoaa-package.r @@ -36,6 +36,18 @@ #' functions. You'll get an informative error telling you to install \code{ncdf4} #' if you don't have it and you try to use the buoy functions. #' + +#' @section The \code{meteo} family of functions: +#' +#' The \code{meteo} family of functions are prefixed with \code{meteo_} and provide +#' a set of helper functions to: +#' +#' \itemize{ +#' \item Identify candidate stations from a latitude/longitude pair +#' \item Retrieve complete data for one or more stations (\code{meteo_coverage()}) +#' } +#' + #' @importFrom methods is #' @importFrom stats var setNames complete.cases #' @importFrom utils head download.file read.csv read.delim read.fwf read.table @@ -43,13 +55,15 @@ #' @importFrom lubridate ymd year today month #' @importFrom scales date_breaks date_format #' @importFrom ggplot2 ggplot aes facet_wrap theme theme_bw geom_line labs -#' guides guide_legend fortify scale_x_date element_blank +#' guides guide_legend fortify scale_x_date scale_x_datetime element_blank #' @importFrom httr GET add_headers content warn_for_status stop_for_status #' write_disk parse_url build_url http_status #' @importFrom XML xpathSApply xpathApply xmlValue xmlParse xmlToList htmlParse #' @importFrom jsonlite fromJSON #' @importFrom tidyr gather -#' @importFrom dplyr %>% select mutate rename tbl_df filter bind_rows as_data_frame contains +#' @importFrom rappdirs user_cache_dir +#' @importFrom gridExtra grid.arrange +#' @importFrom dplyr %>% rbind_all select mutate rename tbl_df filter bind_rows as_data_frame contains #' @name rnoaa-package #' @aliases rnoaa #' @docType package diff --git a/R/swdi.r b/R/swdi.r index 92eff62e..6eb4d2db 100644 --- a/R/swdi.r +++ b/R/swdi.r @@ -6,7 +6,7 @@ #' @param format File format to download. One of xml, csv, shp, or kmz. #' @param startdate Start date. See details. #' @param enddate End date. See details. -#' @param limit Number of results to return. Defaults to 25. Any number from 1 to 10000000. +#' @param limit Number of results to return. Defaults to 25. Any number from 1 to 10000000. Time out issues likely to occur at higher limits. #' @param offset Any number from 1 to 10000000. Default is NULL, no offset, start from 1. #' @param radius Search radius in miles (current limit is 15 miles) #' @param center Center coordinate in lon,lat decimal degree format, e.g.: c(-95.45,36.88) diff --git a/R/utils.r b/R/utils.r index be09f86e..dfbadfe4 100644 --- a/R/utils.r +++ b/R/utils.r @@ -131,3 +131,4 @@ contains <- function(vars, match, ignore.case = TRUE){ is.string <- function(x){ is.character(x) && length(x) == 1 } + diff --git a/R/vis_miss.R b/R/vis_miss.R new file mode 100644 index 00000000..8a6bac92 --- /dev/null +++ b/R/vis_miss.R @@ -0,0 +1,106 @@ +#' Visualize missingness in a dataframe +#' +#' Gives you an at-a-glance ggplot of the missingness inside a dataframe, +#' colouring cells according to missingness, where black indicates a present +#' cell and grey indicates a missing cell. As it returns a \code{ggplot} object, +#' it is very easy to customize and change labels, and so on. +#' +#' @details \code{vis_miss} visualises a data.frame to display missingness. This is +#' taken from the visdat package, currently only available on github: +#' \url{https://github.com/tierneyn/visdat} +#' +#' @param x a data.frame +#' +#' @param cluster logical TRUE/FALSE. TRUE specifies that you want to use +#' hierarchical clustering (mcquitty method) to arrange rows according to +#' missingness. FALSE specifies that you want to leave it as is. +#' @param sort_miss logical TRUE/FALSE. TRUE arranges the columns in order of +#' missingness. +#' +#' @examples \dontrun{ +#' monitors <- c("ASN00003003", "ASM00094299") +#' weather_df <- meteo_pull_monitors(monitors) +#' vis_miss(weather_df) +#' } +#' +#' @export +vis_miss <- function(x, + cluster = FALSE, + sort_miss = FALSE){ + # make a TRUE/FALSE matrix of the data. + # This tells us whether it is missing (true) or not (false) + x.na <- is.na(x) + + # switch for creating the missing clustering + if (cluster == TRUE){ + + # this retrieves a row order of the clustered missingness + row_order_index <- + stats::dist(x.na*1) %>% + stats::hclust(method = "mcquitty") %>% + stats::as.dendrogram %>% + stats::order.dendrogram + } else { + row_order_index <- 1:nrow(x) + } # end else + + if (sort_miss == TRUE) { + + # arrange by the columns with the highest missingness + # code inspired from https://r-forge.r-project.org/scm/viewvc.php/pkg/R/missing.pattern.plot.R?view=markup&root=mi-dev + # get the order of columns with highest missingness + na_sort <- order(colSums(is.na(x)), decreasing = TRUE) + + # get the names of those columns + col_order_index <- names(x)[na_sort] + + # original code was a bit slower: + # + # col_order_index <- + # x.na %>% + # as.data.frame %>% + # dplyr::summarise_each(funs(mean)) %>% + # names + + } else { + + col_order_index <- names(x) + + } + + # Arranged data by dendrogram order index + + d <- x.na[row_order_index , ] %>% + as.data.frame %>% + dplyr::mutate_(rows = ~ row_number()) %>% + # gather the variables together for plotting + # here we now have a column of the row number (row), + # then the variable(variables), + # then the contents of that variable (value) + tidyr::gather_(key_col = "variables", + value_col = "valueType", + gather_cols = names(.)[-length(.)]) + + d$value <- suppressWarnings(tidyr::gather_(x, "variables", "value", + names(x))$value) + + # then we plot it + ggplot(data = d, + ggplot2::aes_string(x = "variables", + y = "rows", + # text assists with plotly mouseover + text = "value")) + + ggplot2::geom_raster(ggplot2::aes_string(fill = "valueType")) + + # change the colour, so that missing is grey, present is black + ggplot2::scale_fill_grey(name = "", labels = c("Present", "Missing")) + + ggplot2::theme_minimal() + + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, + vjust = 1, + hjust = 1)) + + ggplot2::labs(x = "Variables in Data", + y = "Observations") + + ggplot2::scale_x_discrete(limits = col_order_index) + # Thanks to http://www.markhneedham.com/blog/2015/02/27/rggplot-controlling-x-axis-order/ + # For the tip on using scale_x_discrete + +} diff --git a/man/deg2rad.Rd b/man/deg2rad.Rd new file mode 100644 index 00000000..1f4e19ca --- /dev/null +++ b/man/deg2rad.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_distance.R +\name{deg2rad} +\alias{deg2rad} +\title{Convert from degrees to radians} +\usage{ +deg2rad(deg) +} +\arguments{ +\item{deg}{A numeric vector in units of degrees.} +} +\value{ +The input numeric vector, converted to units of radians. +} +\description{ +Convert from degrees to radians +} + diff --git a/man/ghcnd.Rd b/man/ghcnd.Rd index 1041d2ea..3f3074d3 100644 --- a/man/ghcnd.Rd +++ b/man/ghcnd.Rd @@ -2,76 +2,49 @@ % Please edit documentation in R/ghcnd.R \name{ghcnd} \alias{ghcnd} -\alias{ghcnd_countries} -\alias{ghcnd_search} -\alias{ghcnd_splitvars} -\alias{ghcnd_states} -\alias{ghcnd_stations} -\alias{ghcnd_version} -\title{Get GHCND daily data from NOAA FTP server} +\title{Get all GHCND data from a single weather site} \usage{ ghcnd(stationid, path = "~/.rnoaa/ghcnd", ...) - -ghcnd_search(stationid, date_min = NULL, date_max = NULL, var = "all", - path = "~/.rnoaa/ghcnd", ...) - -ghcnd_splitvars(x) - -ghcnd_stations(..., n = 10) - -ghcnd_states(...) - -ghcnd_countries(...) - -ghcnd_version(...) } \arguments{ -\item{stationid}{Stationid to get} - -\item{path}{(character) A path to store the files, Default: \code{~/.rnoaa/isd}} - -\item{...}{Curl options passed on to \code{\link[httr]{GET}}} - -\item{date_min, date_max}{(character) Minimum and maximum dates. Use together to get a -date range} +\item{stationid}{A character string giving the identification of the weather +station for which the user would like to pull data. To get a full and +current list of stations, the user can use the \code{\link{ghcnd_stations}} +function. To identify stations within a certain radius of a location, the +user can use the \code{\link{meteo_nearby_stations}} function.} -\item{var}{(character) Variable to get, defaults to "all", which gives back all variables -in a list. To see what variables are available for a dataset, look at the dataset returned -from \code{ghcnd()}.} +\item{path}{A character vector giving the path to the directory to cache +the files locally. By default, the function uses \code{~/.rnoaa/isd}.} -\item{x}{Input object to print methods. For \code{ghcnd_splitvars()}, the output of a call -to \code{ghcnd()}.} - -\item{n}{Number of rows to print} +\item{...}{Additional curl options to pass through to \code{\link[httr]{GET}}.} } -\description{ -Get GHCND daily data from NOAA FTP server +\value{ +A list object with a single slot, \code{data}, which contains the + dataframe pulled from NOAA's FTP for the queried weather site. A README + file with more information about the format of this file is available + from NOAA (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). + This file is formatted so each line of the file gives the daily weather + observations for a single weather variable for all days of one month of + one year. In addition to measurements, columns are included for certain + flags, which add information on observation sources and quality and are + further explained in NOAA's README file for the data. } -\details{ -Functions: -\itemize{ - \item \code{ghcnd_version} - Get current version of GHCND data - \item \code{ghcnd_stations} - Get GHCND stations and their metadata - \item \code{ghcnd_states} - Get US/Canada state names and 2-letter codes - \item \code{ghcnd_countries} - Get country names and 2-letter codes - \item \code{ghcnd_search} - Search GHCND data - \item \code{ghcnd} - Get GHCND data - \item \code{ghcnd_splitvars} - Split variables in data returned from \code{ghcnd} - \item \code{ghcnd_clear_cache} - Clear cache of locally stored files +\description{ +This function uses ftp to access the Global Historical Climatology Network +daily weather data from NOAA's FTP server for a single weather site. It +requires the site identification number for that site and will pull the +entire weather dataset for the site. } +\note{ +This function saves the full set of weather data for the queried +site locally in the directory specified by the \code{path} argument. } \examples{ \dontrun{ -# Get metadata -ghcnd_states() -ghcnd_countries() -ghcnd_version() - -# Get stations, ghcnd-stations and ghcnd-inventory merged -(stations <- ghcnd_stations()) - # Get data ghcnd(stationid = "AGE00147704") + +stations <- ghcnd_stations() ghcnd(stations$data$id[40]) ghcnd(stations$data$id[4000]) ghcnd(stations$data$id[10000]) @@ -79,7 +52,7 @@ ghcnd(stations$data$id[80000]) ghcnd(stations$data$id[80300]) library("dplyr") -ghcnd(stations$data$id[80300])$data \%>\% select(id, element) \%>\% head +ghcnd(stations$data$id[80300])$data \%>\% select(id, element) \%>\% slice(1:3) # manipulate data ## using built in fxns @@ -94,19 +67,20 @@ dat <- ghcnd(stationid="AGE00147704") dat$data \%>\% filter(element == "PRCP", year == 1909) -# Search based on variable and/or date -ghcnd_search("AGE00147704", var = "PRCP") -ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01") -ghcnd_search("AGE00147704", var = "PRCP", date_max = "1915-01-01") -ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01", date_max = "1925-01-01") -ghcnd_search("AGE00147704", date_min = "1920-01-01", date_max = "1925-01-01") -ghcnd_search("AGE00147704", var = c("PRCP","TMIN")) -ghcnd_search("AGE00147704", var = c("PRCP","TMIN"), date_min = "1920-01-01") -ghcnd_search("AGE00147704", var="adfdf") } + } \author{ Scott Chamberlain \email{myrmecocystus@gmail.com}, Adam Erickson \email{adam.erickson@ubc.ca} } +\seealso{ +To generate a weather dataset for a single weather site that has been +cleaned to a tidier weather format, the user should use the +\code{\link{ghcnd_search}} function, which calls \code{\link{ghcnd}} and then +processes the output, or \code{\link{meteo_tidy_ghcnd}}, which wraps the +\code{\link{ghcnd_search}} function to output a tidy dataframe rather than a +list object. To pull GHCND data from multiple monitors, see +\code{\link{meteo_pull_monitors}}. +} diff --git a/man/ghcnd_search.Rd b/man/ghcnd_search.Rd new file mode 100644 index 00000000..80293f7c --- /dev/null +++ b/man/ghcnd_search.Rd @@ -0,0 +1,98 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ghcnd.R +\name{ghcnd_search} +\alias{ghcnd_search} +\title{Get a cleaned version of GHCND data from a single weather site} +\usage{ +ghcnd_search(stationid, date_min = NULL, date_max = NULL, var = "all", + path = "~/.rnoaa/ghcnd", ...) +} +\arguments{ +\item{stationid}{A character string giving the identification of the weather +station for which the user would like to pull data. To get a full and +current list of stations, the user can use the \code{\link{ghcnd_stations}} +function. To identify stations within a certain radius of a location, the +user can use the \code{\link{meteo_nearby_stations}} function.} + +\item{date_min}{A character string giving the earliest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site from the earliest available date.} + +\item{date_max}{A character string giving the latest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site through the most current available date.} + +\item{var}{A character vector specifying either \code{"all"} (pull all +available weather parameters for the site) or the weather parameters to +keep in the final data (e.g., \code{c("TMAX", "TMIN")} to only keep +maximum and minimum temperature). Example choices for this argument include: +\itemize{ +\item \code{PRCP}: Precipitation, in tenths of millimeters +\item \code{TAVG}: Average temperature, in tenths of degrees Celsius +\item \code{TMAX}: Maximum temperature, in tenths of degrees Celsius +\item \code{TMIN}: Minimum temperature, in tenths of degrees Celsius +} +A full list of possible weather variables is available in NOAA's README +file for the GHCND data (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +Most weather stations will only have a small subset of all the possible +weather variables, so the data generated by this function may not include +all of the variables the user specifies through this argument.} + +\item{path}{A character vector giving the path to the directory to cache +the files locally. By default, the function uses \code{~/.rnoaa/isd}.} + +\item{...}{Additional curl options to pass through to \code{\link[httr]{GET}}.} +} +\value{ +A list object with slots for each of the available specified + weather variables. Each element in the list is a separate time series + dataframe with daily observations, as well as flag values, for one of + the weather variables. The flag values give information on the quality + and source of each observation; see the NOAA README file linked above + for more information. +} +\description{ +This function uses ftp to access the Global Historical Climatology Network +daily weather data from NOAA's FTP server for a single weather monitor site. It +requires the site identification number for that site and will pull the +entire weather dataset for the site. It will then clean this data to convert +it to a tidier format and will also, if requested, filter it to a certain +date range and to certain weather variables. +} +\note{ +This function calls \code{\link{ghcnd}}, which will download and save + data from all available dates and weather variables for the queried + weather station. The step of limiting the dataset to only certain dates + and / or weather variables, using the \code{date_min}, \code{date_max}, + and \code{var} arguments, does not occur until after the full data has + been pulled. +} +\examples{ +\dontrun{ + +# Search based on variable and/or date +ghcnd_search("AGE00147704", var = "PRCP") +ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01") +ghcnd_search("AGE00147704", var = "PRCP", date_max = "1915-01-01") +ghcnd_search("AGE00147704", var = "PRCP", date_min = "1920-01-01", + date_max = "1925-01-01") +ghcnd_search("AGE00147704", date_min = "1920-01-01", date_max = "1925-01-01") +ghcnd_search("AGE00147704", var = c("PRCP","TMIN")) +ghcnd_search("AGE00147704", var = c("PRCP","TMIN"), date_min = "1920-01-01") +ghcnd_search("AGE00147704", var = "adfdf") + +} + +} +\author{ +Scott Chamberlain \email{myrmecocystus@gmail.com}, +Adam Erickson \email{adam.erickson@ubc.ca} +} +\seealso{ +\code{\link{meteo_pull_monitors}}, \code{\link{meteo_tidy_ghcnd}} +} + diff --git a/man/ghcnd_splitvars.Rd b/man/ghcnd_splitvars.Rd new file mode 100644 index 00000000..8ec45c20 --- /dev/null +++ b/man/ghcnd_splitvars.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ghcnd.R +\name{ghcnd_splitvars} +\alias{ghcnd_splitvars} +\title{Split variables in data returned from \code{ghcnd}} +\usage{ +ghcnd_splitvars(x) +} +\arguments{ +\item{x}{An object returned from \code{\link{ghcnd}}.} +} +\description{ +This function is a helper function for \code{\link{ghcnd_search}}. It helps +with cleaning up the data returned from \code{\link{ghcnd}}, to get it in a +format that is easier to work with using R. +} +\author{ +Scott Chamberlain \email{myrmecocystus@gmail.com}, +Adam Erickson \email{adam.erickson@ubc.ca} +} + diff --git a/man/ghcnd_states.Rd b/man/ghcnd_states.Rd new file mode 100644 index 00000000..3661d027 --- /dev/null +++ b/man/ghcnd_states.Rd @@ -0,0 +1,45 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ghcnd.R +\name{ghcnd_states} +\alias{ghcnd_countries} +\alias{ghcnd_states} +\alias{ghcnd_version} +\title{Get meta-data on the GHCND daily data} +\usage{ +ghcnd_states(...) + +ghcnd_countries(...) + +ghcnd_version(...) +} +\arguments{ +\item{...}{Additional curl options to pass through to \code{\link[httr]{GET}}.} +} +\description{ +These function allow you to pull the current versions of certain meta-datasets +for the GHCND, including lists of country and state abbreviations used in some +of the weather station IDs and information about the current version of the +data. +} +\details{ +Functions: +\itemize{ + \item \code{ghcnd_version}: Get current version of GHCND data + \item \code{ghcnd_states}: Get US/Canada state names and 2-letter codes + \item \code{ghcnd_countries}: Get country names and 2-letter codes +} +} +\examples{ +\dontrun{ +# Get metadata +ghcnd_states() +ghcnd_countries() +ghcnd_version() +} + +} +\author{ +Scott Chamberlain \email{myrmecocystus@gmail.com}, +Adam Erickson \email{adam.erickson@ubc.ca} +} + diff --git a/man/ghcnd_stations.Rd b/man/ghcnd_stations.Rd new file mode 100644 index 00000000..d3b2d122 --- /dev/null +++ b/man/ghcnd_stations.Rd @@ -0,0 +1,58 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ghcnd.R +\name{ghcnd_stations} +\alias{ghcnd_stations} +\title{Get information on the GHCND weather stations} +\usage{ +ghcnd_stations(...) +} +\arguments{ +\item{...}{Additional curl options to pass through to \code{\link[httr]{GET}}.} +} +\value{ +This function returns a list with a single element-- a dataframe with + a weather station on each row with the following columns: + \itemize{ + \item \code{id}: The weather station's ID number. The first two letters + denote the country (using FIPS country codes). + \item \code{latitude}: The station's latitude, in decimal degrees. + Southern latitudes will be negative. + \item \code{longitude}: The station's longitude, in decimal degrees. + Western longitudes will be negative. + \item \code{elevation}: The station's elevation, in meters. + \item \code{name}: The station's name. + \item \code{gsn_flag}: "GSN" if the monitor belongs to the GCOS Surface + Network (GSN). Otherwise either blank or missing. + \item \code{wmo_id}: If the station has a WMO number, this column gives + that number. Otherwise either blank or missing. + \item \code{element}: A weather variable recorded at some point during + that station's history. See the link below in "References" for definitions + of the abbreviations used for this variable. + \item \code{first_year}: The first year of data available at that station + for that weather element. + \item \code{last_year}: The last year of data available at that station + for that weather element. + } + If a weather station has data on more than one weather variable, it will + be represented in multiple rows of this output dataframe. +} +\description{ +This function returns an object with a dataframe with meta-information about +all available GHCND weather stations. +} +\note{ +Since this function is pulling a large dataset by ftp, it may take +a while to run. +} +\examples{ +\dontrun{ +# Get stations, ghcnd-stations and ghcnd-inventory merged +stations <- ghcnd_stations() +} + +} +\references{ +For more documentation on the returned dataset, see +\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. +} + diff --git a/man/meteo_clear_cache.Rd b/man/meteo_clear_cache.Rd new file mode 100644 index 00000000..db8b51e0 --- /dev/null +++ b/man/meteo_clear_cache.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_cache.r +\name{meteo_clear_cache} +\alias{meteo_clear_cache} +\title{Clear \emph{meteo} cached files} +\usage{ +meteo_clear_cache() +} +\description{ +The \emph{meteo} functions use an aplication +} +\note{ +This function will clear all cached \emph{meteo} files. +} +\seealso{ +Other meteo: \code{\link{meteo_show_cache}} +} + diff --git a/man/meteo_coverage.Rd b/man/meteo_coverage.Rd new file mode 100644 index 00000000..cde95689 --- /dev/null +++ b/man/meteo_coverage.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_utils.r +\name{meteo_coverage} +\alias{autoplot.meteo_coverage} +\alias{meteo_coverage} +\title{Determine the "coverage" for a station data frame} +\usage{ +meteo_coverage(meteo_df, obs_start_date = NULL, obs_end_date = NULL, + verbose = FALSE) + +autoplot.meteo_coverage(df) +} +\arguments{ +\item{meteo_df}{a \emph{meteo} \code{data.frame}} + +\item{obs_start_date}{specify either or both (obs_start_date, obs_end_date) to constrain +coverate tests. These should be \code{Date} objects.} + +\item{obs_end_date}{specify either or both (obs_start_date, obs_end_date) to constrain +coverate tests. These should be \code{Date} objects.} + +\item{verbose}{if \code{TRUE} will display the coverage summary along +with returning the coverage data.frame} + +\item{df}{The dataframe resulting from a call to \code{meteo_coverage}, +used as an input to \code{autoplot.meteo_coverage}} +} +\value{ +a \code{data.frame} with the coverage for each station, minimally +containing: \preformatted{ +$ id (chr) +$ start_date (time) +$ end_date (time) +$ total_obs (int) +} +with additional fields (and their coverage percent) depending on which +weather variables were queried and available for the weather station. +} +\description{ +Call this function after pulling down observations for a set of stations +to retrieve the "coverage" (i.e. how complete each field is). If either +or both \code{obs_start_date} or \code{obs_end_date} are specified, +the coverage test will be limited to that date range. +} +\details{ +There is an \code{autoplot} method for the output of this function. +} +\examples{ +\dontrun{ + +monitors <- c("ASN00095063", "ASN00024025", "ASN00040112", "ASN00041023", + "ASN00009998", "ASN00066078", "ASN00003069", "ASN00090162", + "ASN00040126", "ASN00058161") +obs <- meteo_pull_monitors(monitors) +obs_covr <- meteo_coverage(obs) +autoplot.meteo_coverage(obs_covr) + +} +} + diff --git a/man/meteo_distance.Rd b/man/meteo_distance.Rd new file mode 100644 index 00000000..9ed5df0c --- /dev/null +++ b/man/meteo_distance.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_distance.R +\name{meteo_distance} +\alias{meteo_distance} +\title{Find all monitors within a radius of a location} +\usage{ +meteo_distance(station_data, lat, long, units = "deg", radius = NULL, + limit = NULL) +} +\arguments{ +\item{station_data}{The output of \code{ghcnd_stations()[[1]]}, which is +a current list of weather stations available through NOAA for the GHCND +dataset. The format of this is a dataframe +with one row per weather station. Latitude and longitude for the station +locations should be in columns with the names "latitude" and "longitude", +consistent with the output from \code{ghcnd_stations()[[1]]}. To save time, run the +\code{ghcnd_stations} call and save the output to an object, rather than +rerunning the default every time (see the examples in +\code{\link{meteo_nearby_stations}}).} + +\item{lat}{Latitude of the location. Southern latitudes should be given +as negative values.} + +\item{long}{Longitude of the location. Western longitudes should be given as +negative values.} + +\item{units}{Units of the latitude and longitude values. Possible values +are: +\itemize{ +\item \code{deg}: Degrees (default); +\item \code{rad}: Radians. +}} + +\item{radius}{A numeric vector giving the radius (in kilometers) within which +to search for monitors near a location.} + +\item{limit}{An integer giving the maximum number of monitors to include for +each location. The [x] closest monitors will be kept. Default is NULL +(pull everything available, within the radius if the radius is specified).} +} +\value{ +A dataframe of weather stations near the location. This is the + single-location version of the return value for + \code{\link{meteo_nearby_stations}}. +} +\description{ +This function will identify all weather stations with a specified radius of +a location. If no radius is given, the function will return a dataframe +of all available monitors, sorted by distance to the location. The +\code{limit} argument can be used to limit the output dataframe to the [x] +closest monitors to the location. +} +\author{ +Alex Simmons \email{a2.simmons@qut.edu.au}, + Brooke Anderson \email{brooke.anderson@colostate.edu} +} + diff --git a/man/meteo_nearby_stations.Rd b/man/meteo_nearby_stations.Rd new file mode 100644 index 00000000..7102238a --- /dev/null +++ b/man/meteo_nearby_stations.Rd @@ -0,0 +1,139 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_distance.R +\name{meteo_nearby_stations} +\alias{meteo_nearby_stations} +\title{Find weather monitors near locations} +\usage{ +meteo_nearby_stations(lat_lon_df, lat_colname = "latitude", + lon_colname = "longitude", station_data = ghcnd_stations()[[1]], + var = "all", year_min = NULL, year_max = NULL, radius = NULL, + limit = NULL) +} +\arguments{ +\item{lat_lon_df}{A dataframe that contains the latitude, longitude, and +a unique identifier for each location (\code{id}). For an example of the +proper format for this dataframe, see the examples below. Latitude and +longitude must both be in units of decimal degrees. Southern latitudes +and Western longitudes should be given as negative values.} + +\item{lat_colname}{A character string giving the name of the latitude column +in the \code{lat_lon_df} dataframe.} + +\item{lon_colname}{A character string giving the name of the longitude column +in the \code{lat_lon_df} dataframe.} + +\item{station_data}{The output of \code{ghcnd_stations()[[1]]}, which is +a current list of weather stations available through NOAA for the GHCND +dataset. The format of this is a dataframe +with one row per weather station. Latitude and longitude for the station +locations should be in columns with the names "latitude" and "longitude", +consistent with the output from \code{ghcnd_stations()[[1]]}. To save time, run the +\code{ghcnd_stations} call and save the output to an object, rather than +rerunning the default every time (see the examples in +\code{\link{meteo_nearby_stations}}).} + +\item{var}{A character vector specifying either \code{"all"} (pull all +available weather parameters for the site) or the weather parameters to +keep in the final data (e.g., \code{c("TMAX", "TMIN")} to only keep +maximum and minimum temperature). Example choices for this argument include: +\itemize{ +\item \code{PRCP}: Precipitation, in tenths of millimeters +\item \code{TAVG}: Average temperature, in tenths of degrees Celsius +\item \code{TMAX}: Maximum temperature, in tenths of degrees Celsius +\item \code{TMIN}: Minimum temperature, in tenths of degrees Celsius +} +A full list of possible weather variables is available in NOAA's README +file for the GHCND data (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +Most weather stations will only have a small subset of all the possible +weather variables, so the data generated by this function may not include +all of the variables the user specifies through this argument.} + +\item{year_min}{A numeric value giving the earliest year from which you +ultimately want weather data (e.g., 2013, if you only are interested in +data from 2013 and later).} + +\item{year_max}{A numeric value giving the latest year from which you +ultimately want weather data.} + +\item{radius}{A numeric vector giving the radius (in kilometers) within which +to search for monitors near a location.} + +\item{limit}{An integer giving the maximum number of monitors to include for +each location. The [x] closest monitors will be kept. Default is NULL +(pull everything available, within the radius if the radius is specified).} +} +\value{ +A list containing dataframes with the sets of unique weather stations within + the search radius for each location. Site IDs for the weather stations + given in this dataframe can be used in conjunction with other functions in the + \code{rnoaa} package to pull weather data for the station. The dataframe + for each location includes: + \itemize{ + \item \code{id}: The weather station ID, which can be used in other + functions to pull weather data from the station; + \item \code{name}: The weather station name; + \item \code{latitude}: The station's latitude, in decimal degrees. Southern + latitudes will be negative; + \item \code{longitude}: The station's longitude, in decimal degrees. Western + longitudes will be negative; + \item \code{distance}: The station's distance, in kilometers, from the + location. + } +} +\description{ +This function inputs a dataframe with latitudes and longitudes of locations +and creates a dataframe with monitors within a certain radius of those +locations. The function can also be used, with the \code{limit} argument, to pull +a certain number of the closest weather monitors to each location. +The weather monitor IDs in the output dataframe can be used with other +\code{rnoaa} functions to pull data from all available weather stations near +a location (e.g., \code{\link{meteo_pull_monitors}}). +} +\details{ +Great circle distance is used to determine whether a weather monitor is +within the required radius. +} +\note{ +By default, this function will pull the full station list from NOAA + to use to identify nearby locations. If you will be creating lists of + monitors nearby several stations, you can save some time by using the + \code{\link{ghcnd_stations}} function separately to create an object + with all stations and then use the argument \code{station_data} in + this function to reference that object, rather than using this function's + defaults (see examples). +} +\examples{ +\dontrun{ + +station_data <- ghcnd_stations()[[1]] # Takes a while to run + +lat_lon_df <- data.frame(id = c("sydney", "brisbane"), + latitude = c(-33.8675, -27.4710), + longitude = c(151.2070, 153.0234)) +nearby_stations <- meteo_nearby_stations(lat_lon_df = lat_lon_df, + station_data = station_data, radius = 10) + +miami <- data.frame(id = "miami", latitude = 25.7617, longitude = -80.1918) + +# Get all stations within 50 kilometers +meteo_nearby_stations(lat_lon_df = miami, station_data = station_data, + radius = 50, var = c("PRCP", "TMAX"), + year_min = 1992, year_max = 1992) +# Get the closest 10 monitors +meteo_nearby_stations(lat_lon_df = miami, station_data = station_data, + limit = 10, var = c("PRCP", "TMAX"), + year_min = 1992, year_max = 1992) +} + +} +\author{ +Alex Simmons \email{a2.simmons@qut.edu.au}, + Brooke Anderson \email{brooke.anderson@colostate.edu} +} +\seealso{ +The weather monitor IDs generated by this function can be used in + other functions in the \code{rnoaa} package, like + \code{\link{meteo_pull_monitors}} and \code{\link{meteo_tidy_ghcnd}}, to + pull weather data from weather monitors near a location. +} + diff --git a/man/meteo_process_geographic_data.Rd b/man/meteo_process_geographic_data.Rd new file mode 100644 index 00000000..a5078ebd --- /dev/null +++ b/man/meteo_process_geographic_data.Rd @@ -0,0 +1,51 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_distance.R +\name{meteo_process_geographic_data} +\alias{meteo_process_geographic_data} +\title{Calculate the distances between a location and all available stations} +\usage{ +meteo_process_geographic_data(station_data, lat, long, units = "deg") +} +\arguments{ +\item{station_data}{The output of \code{ghcnd_stations()[[1]]}, which is +a current list of weather stations available through NOAA for the GHCND +dataset. The format of this is a dataframe +with one row per weather station. Latitude and longitude for the station +locations should be in columns with the names "latitude" and "longitude", +consistent with the output from \code{ghcnd_stations()[[1]]}. To save time, run the +\code{ghcnd_stations} call and save the output to an object, rather than +rerunning the default every time (see the examples in +\code{\link{meteo_nearby_stations}}).} + +\item{lat}{Latitude of the location. Southern latitudes should be given +as negative values.} + +\item{long}{Longitude of the location. Western longitudes should be given as +negative values.} + +\item{units}{Units of the latitude and longitude values. Possible values +are: +\itemize{ +\item \code{deg}: Degrees (default); +\item \code{rad}: Radians. +}} +} +\value{ +The \code{station_data} dataframe that is input, but with a + \code{distance} column added that gives the distance to the location + (in kilometers), and re-ordered by distance between each station and + the location (closest weather stations first). +} +\description{ +This function takes a single location and a dataset of available weather stations +and calculates the distance between the location and each of the stations, +using the great circle method. A new column is added to the dataset of +available weather stations giving the distance between each station and +the input location. The station dataset is then sorted from closest to +furthest distance to the location and returned as the function output. +} +\author{ +Alex Simmons \email{a2.simmons@qut.edu.au}, + Brooke Anderson \email{brooke.anderson@colostate.edu} +} + diff --git a/man/meteo_pull_monitors.Rd b/man/meteo_pull_monitors.Rd new file mode 100644 index 00000000..b579ba6f --- /dev/null +++ b/man/meteo_pull_monitors.Rd @@ -0,0 +1,126 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers_ghcnd.R +\name{meteo_pull_monitors} +\alias{meteo_pull_monitors} +\title{Pull GHCND weather data for multiple weather monitors} +\usage{ +meteo_pull_monitors(monitors, keep_flags = FALSE, date_min = NULL, + date_max = NULL, var = "all") +} +\arguments{ +\item{monitors}{A character vector listing the station IDs for all +weather stations the user would like to pull. To get a full and +current list of stations, the user can use the \code{\link{ghcnd_stations}} +function. To identify stations within a certain radius of a location, the +user can use the \code{\link{meteo_nearby_stations}} function.} + +\item{keep_flags}{TRUE / FALSE for whether the user would like to keep all the flags +for each weather variable. The default is to not keep the flags (FALSE). +See the note below for more information on these flags.} + +\item{date_min}{A character string giving the earliest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site from the earliest available date.} + +\item{date_max}{A character string giving the latest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site through the most current available date.} + +\item{var}{A character vector specifying either \code{"all"} (pull all +available weather parameters for the site) or the weather parameters to +keep in the final data (e.g., \code{c("TMAX", "TMIN")} to only keep +maximum and minimum temperature). Example choices for this argument include: +\itemize{ +\item \code{PRCP}: Precipitation, in tenths of millimeters +\item \code{TAVG}: Average temperature, in tenths of degrees Celsius +\item \code{TMAX}: Maximum temperature, in tenths of degrees Celsius +\item \code{TMIN}: Minimum temperature, in tenths of degrees Celsius +} +A full list of possible weather variables is available in NOAA's README +file for the GHCND data (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +Most weather stations will only have a small subset of all the possible +weather variables, so the data generated by this function may not include +all of the variables the user specifies through this argument.} +} +\value{ +A data frame of daily weather data for multiple weather monitors, + converted to a tidy format. All weather variables may not exist for all + weather stations. Examples of variables returned are: + \itemize{ + \item \code{id}: Character string with the weather station site id + \item \code{date}: Date of the observation + \item \code{prcp}: Precipitation, in tenths of mm + \item \code{tavg}: Average temperature, in tenths of degrees Celsius + \item \code{tmax}: Maximum temperature, in tenths of degrees Celsius + \item \code{tmin}: Minimum temperature, in tenths of degrees Celsius + \item \code{awnd}: Average daily wind speed, in meters / second + \item \code{wsfg}: Peak gust wind speed, in meters / second + } + There are other possible weather variables in the Global Historical + Climatology Network; see + \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt} for a full + list. If the \code{var} argument is something other than "all", then + only variables included in that argument will be included in the output + data frame. All variables are in the units specified in the linked file + (note that, in many cases, measurements are given in tenths of the units + more often used, e.g., tenths of degrees for temperature). All column names + correspond to variable names in the linked file, but with all uppercase + letters changed to lowercase. +} +\description{ +This function takes a vector of one or more weather station IDs. It will pull +the weather data from the Global Historical Climatology Network's daily +data (GHCND) for each of the stations and join them together in a single tidy +dataframe. For any weather stations that the user calls that are not +available by ftp from GHCND, the function will return a warning +giving the station ID. +} +\note{ +The weather flags, which are kept by specifying +\code{keep_flags = TRUE} are: +\itemize{ +\item \code{*_mflag}: Measurement flag, which gives some information on how + the observation was measured. +\item \code{*_qflag}: Quality flag, which gives quality information on the + measurement, like if it failed to pass certain quality checks. +\item \code{*_sflag}: Source flag. This gives some information on the + weather collection system (e.g., U.S. Cooperative Summary of the Day, + Australian Bureau of Meteorology) the weather observation comes from. +} +More information on the interpretation of these flags can be found in the +README file for the NCDC's Daily Global Historical Climatology Network's +data at \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. + +This function converts any value of -9999 to a missing value for the + variables "prcp", "tmax", "tmin", "tavg", "snow", and "snwd". However, + for some weather observations, there still may be missing values coded + using a series of "9"s of some length. You will want to check your final + data to see if there are lurking missing values given with series of "9"s. + +This function may take a while to run. +} +\examples{ +\dontrun{ + +monitors <- c("ASN00003003", "ASM00094299", "ASM00094995", "ASM00094998") +all_monitors_clean <- meteo_pull_monitors(monitors) + +} + +} +\author{ +Brooke Anderson \email{brooke.anderson@colostate.edu} +} +\references{ +For more information about the data pulled with this function, see: + +Menne, M.J., I. Durre, R.S. Vose, B.E. Gleason, and T.G. Houston, 2012: +An overview of the Global Historical Climatology Network-Daily Database. +Journal of Atmospheric and Oceanic Technology, 29, 897-910, +doi:10.1175/JTECH-D-11-00103.1. +} + diff --git a/man/meteo_show_cache.Rd b/man/meteo_show_cache.Rd new file mode 100644 index 00000000..0ec761ec --- /dev/null +++ b/man/meteo_show_cache.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_cache.r +\name{meteo_show_cache} +\alias{meteo_show_cache} +\title{Show the \emph{meteo} cache directory} +\usage{ +meteo_show_cache() +} +\description{ +Displays the full path to the \code{meteo} cache directory +} +\seealso{ +Other meteo: \code{\link{meteo_clear_cache}} +} + diff --git a/man/meteo_spherical_distance.Rd b/man/meteo_spherical_distance.Rd new file mode 100644 index 00000000..ff9b92a4 --- /dev/null +++ b/man/meteo_spherical_distance.Rd @@ -0,0 +1,46 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/meteo_distance.R +\name{meteo_spherical_distance} +\alias{meteo_spherical_distance} +\title{Calculate the distance between two locations} +\usage{ +meteo_spherical_distance(lat1, long1, lat2, long2, units = "deg") +} +\arguments{ +\item{lat1}{Latitude of the first location.} + +\item{long1}{Longitude of the first location.} + +\item{lat2}{Latitude of the second location.} + +\item{long2}{Longitude of the second location.} + +\item{units}{Units of the latitude and longitude values. Possible values +are: +\itemize{ +\item \code{deg}: Degrees (default); +\item \code{rad}: Radians. +}} +} +\value{ +A numeric value giving the distance (in kilometers) between the + pair of locations. +} +\description{ +This function uses the haversine formula to calculate the great circle +distance between two locations, identified by their latitudes and longitudes. +} +\note{ +This function assumes an earth radius of 6,371 km. +} +\examples{ + +meteo_spherical_distance(lat1 = -27.4667, long1 = 153.0217, + lat2 = -27.4710, long2 = 153.0234) + +} +\author{ +Alex Simmons \email{a2.simmons@qut.edu.au}, + Brooke Anderson \email{brooke.anderson@colostate.edu} +} + diff --git a/man/meteo_tidy_ghcnd.Rd b/man/meteo_tidy_ghcnd.Rd new file mode 100644 index 00000000..dcaadf40 --- /dev/null +++ b/man/meteo_tidy_ghcnd.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers_ghcnd.R +\name{meteo_tidy_ghcnd} +\alias{meteo_tidy_ghcnd} +\title{Create a tidy GHCND dataset from a single monitor} +\usage{ +meteo_tidy_ghcnd(stationid, keep_flags = FALSE, var = "all", + date_min = NULL, date_max = NULL) +} +\arguments{ +\item{stationid}{A character string giving the identification of the weather +station for which the user would like to pull data. To get a full and +current list of stations, the user can use the \code{\link{ghcnd_stations}} +function. To identify stations within a certain radius of a location, the +user can use the \code{\link{meteo_nearby_stations}} function.} + +\item{keep_flags}{TRUE / FALSE for whether the user would like to keep all the flags +for each weather variable. The default is to not keep the flags (FALSE). +See the note below for more information on these flags.} + +\item{var}{A character vector specifying either \code{"all"} (pull all +available weather parameters for the site) or the weather parameters to +keep in the final data (e.g., \code{c("TMAX", "TMIN")} to only keep +maximum and minimum temperature). Example choices for this argument include: +\itemize{ +\item \code{PRCP}: Precipitation, in tenths of millimeters +\item \code{TAVG}: Average temperature, in tenths of degrees Celsius +\item \code{TMAX}: Maximum temperature, in tenths of degrees Celsius +\item \code{TMIN}: Minimum temperature, in tenths of degrees Celsius +} +A full list of possible weather variables is available in NOAA's README +file for the GHCND data (\url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}). +Most weather stations will only have a small subset of all the possible +weather variables, so the data generated by this function may not include +all of the variables the user specifies through this argument.} + +\item{date_min}{A character string giving the earliest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site from the earliest available date.} + +\item{date_max}{A character string giving the latest +date of the daily weather time series that the user would +like in the final output. This character string should be formatted as +"yyyy-mm-dd". If not specified, the default is to keep all daily data for +the queried weather site through the most current available date.} +} +\value{ +A data frame of daily weather data for a single weather monitor, + converted to a tidy format. All weather variables may not exist for all + weather stations. Examples of variables returned are: + \itemize{ + \item \code{id}: Character string with the weather station site id + \item \code{date}: Date of the observation + \item \code{prcp}: Precipitation, in mm + \item \code{tavg}: Average temperature, in degrees Celsius + \item \code{tmax}: Maximum temperature, in degrees Celsius + \item \code{tmin}: Minimum temperature, in degrees Celsius + \item \code{awnd}: Average daily wind speed, in meters / second + \item \code{wsfg}: Peak gust wind speed, in meters / second + } + There are other possible weather variables in the Global Historical + Climatology Network; see + \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt} for a full + list. The variables \code{prcp}, \code{tmax}, \code{tmin}, and \code{tavg} + have all been converted from tenths of their metric to the metric (e.g., + from tenths of degrees Celsius to degrees Celsius). All other variables + are in the units specified in the linked file. +} +\description{ +This function inputs an object created by \code{\link{ghcnd}} and cleans up +the data into a tidy form. +} +\note{ +The weather flags, which are kept by specifying +\code{keep_flags = TRUE} are: +\itemize{ +\item \code{*_mflag}: Measurement flag, which gives some information on how + the observation was measured. +\item \code{*_qflag}: Quality flag, which gives quality information on the + measurement, like if it failed to pass certain quality checks. +\item \code{*_sflag}: Source flag. This gives some information on the + weather collection system (e.g., U.S. Cooperative Summary of the Day, + Australian Bureau of Meteorology) the weather observation comes from. +} +More information on the interpretation of these flags can be found in the +README file for the NCDC's Daily Global Historical Climatology Network's +data at \url{http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt}. +} +\examples{ +\dontrun{ +# One station in Australia is ASM00094275 +meteo_tidy_ghcnd(stationid = "ASN00003003") +meteo_tidy_ghcnd(stationid = "ASN00003003", var = "tavg") +meteo_tidy_ghcnd(stationid = "ASN00003003", date_min = "1950-01-01") +} + +} +\author{ +Brooke Anderson \email{brooke.anderson@colostate.edu} +} +\seealso{ +\code{\link{meteo_pull_monitors}} +} + diff --git a/man/meteo_tidy_ghcnd_element.Rd b/man/meteo_tidy_ghcnd_element.Rd new file mode 100644 index 00000000..af3b425f --- /dev/null +++ b/man/meteo_tidy_ghcnd_element.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/helpers_ghcnd.R +\name{meteo_tidy_ghcnd_element} +\alias{meteo_tidy_ghcnd_element} +\title{Restructure element of ghcnd_search list} +\usage{ +meteo_tidy_ghcnd_element(x, keep_flags = FALSE) +} +\arguments{ +\item{x}{A dataframe with daily observations for a single monitor for a +single weather variable. This dataframe is one of the elements returned +by \code{\link{ghcnd_search}}.} + +\item{keep_flags}{TRUE / FALSE for whether the user would like to keep all the flags +for each weather variable. The default is to not keep the flags (FALSE). +See the note below for more information on these flags.} +} +\value{ +A dataframe reformatted to allow easy aggregation of all weather + variables for a single monitor. +} +\description{ +This function restructures a single element of the list object created +by \code{\link{ghcnd_search}}, to add a column giving the variable name +(\code{key}) and change the name of the variable column to \code{value}. +These changes facilitate combining all elements from the list created by +\code{\link{ghcnd_search}}, to create a tidy dataframe of the weather +observations from the station. +} +\author{ +Brooke Anderson \email{brooke.anderson@colostate.edu} +} + diff --git a/man/print.ghcnd_stations.Rd b/man/print.ghcnd_stations.Rd new file mode 100644 index 00000000..8027f159 --- /dev/null +++ b/man/print.ghcnd_stations.Rd @@ -0,0 +1,27 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ghcnd.R +\name{print.ghcnd_stations} +\alias{print.ghcnd_stations} +\title{Print out GHCND stations} +\usage{ +\method{print}{ghcnd_stations}(x, ..., n = 10) +} +\arguments{ +\item{x}{The object returned by a call to \code{\link{ghcnd_stations}}.} + +\item{...}{Other parameters to pass to \code{print}.} + +\item{n}{Number of rows of station data to print.} +} +\description{ +This function prints out, in a nice format, the object returned by +\code{\link{ghcnd_stations}}. +} +\examples{ +\dontrun{ +stations <- ghcnd_stations() +print(stations) +} + +} + diff --git a/man/rnoaa-package.Rd b/man/rnoaa-package.Rd index d975ffae..329cc008 100644 --- a/man/rnoaa-package.Rd +++ b/man/rnoaa-package.Rd @@ -45,5 +45,17 @@ this package, meaning you only need \code{ncdf4} if you are using the buoy functions. You'll get an informative error telling you to install \code{ncdf4} if you don't have it and you try to use the buoy functions. } + +\section{The \code{meteo} family of functions}{ + + +The \code{meteo} family of functions are prefixed with \code{meteo_} and provide +a set of helper functions to: + +\itemize{ + \item Identify candidate stations from a latitude/longitude pair + \item Retrieve complete data for one or more stations (\code{meteo_coverage()}) +} +} \keyword{package} diff --git a/man/swdi.Rd b/man/swdi.Rd index 0a73745e..ed1b7913 100644 --- a/man/swdi.Rd +++ b/man/swdi.Rd @@ -18,7 +18,7 @@ swdi(dataset = NULL, format = "xml", startdate = NULL, enddate = NULL, \item{enddate}{End date. See details.} -\item{limit}{Number of results to return. Defaults to 25. Any number from 1 to 10000000.} +\item{limit}{Number of results to return. Defaults to 25. Any number from 1 to 10000000. Time out issues likely to occur at higher limits.} \item{offset}{Any number from 1 to 10000000. Default is NULL, no offset, start from 1.} diff --git a/man/vis_miss.Rd b/man/vis_miss.Rd new file mode 100644 index 00000000..29b5cb7c --- /dev/null +++ b/man/vis_miss.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/vis_miss.R +\name{vis_miss} +\alias{vis_miss} +\title{Visualize missingness in a dataframe} +\usage{ +vis_miss(x, cluster = FALSE, sort_miss = FALSE) +} +\arguments{ +\item{x}{a data.frame} + +\item{cluster}{logical TRUE/FALSE. TRUE specifies that you want to use +hierarchical clustering (mcquitty method) to arrange rows according to +missingness. FALSE specifies that you want to leave it as is.} + +\item{sort_miss}{logical TRUE/FALSE. TRUE arranges the columns in order of +missingness.} +} +\description{ +Gives you an at-a-glance ggplot of the missingness inside a dataframe, +colouring cells according to missingness, where black indicates a present +cell and grey indicates a missing cell. As it returns a \code{ggplot} object, +it is very easy to customize and change labels, and so on. +} +\details{ +\code{vis_miss} visualises a data.frame to display missingness. This is +taken from the visdat package, currently only available on github: +\url{https://github.com/tierneyn/visdat} +} +\examples{ +\dontrun{ + monitors <- c("ASN00003003", "ASM00094299") + weather_df <- meteo_pull_monitors(monitors) + vis_miss(weather_df) +} + +} + diff --git a/rnoaa.Rproj b/rnoaa.Rproj new file mode 100644 index 00000000..88ff2b5d --- /dev/null +++ b/rnoaa.Rproj @@ -0,0 +1,21 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: knitr +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/tests/testthat/test-meteo.R b/tests/testthat/test-meteo.R new file mode 100644 index 00000000..f78f5c42 --- /dev/null +++ b/tests/testthat/test-meteo.R @@ -0,0 +1,26 @@ +context("meteo") + +test_that("search for multi-monitor data", { + skip_on_cran() + + monitors <- c("ASN00003003", "ASM00094299") + search_a <- meteo_pull_monitors(monitors) + search_b <- meteo_pull_monitors(monitors, var = "PRCP") + + expect_is(search_a, "data.frame") + expect_is(search_a$prcp, "numeric") +}) + +test_that("determine monitors' data coverage", { + skip_on_cran() + monitors <- c("ASN00003003", "ASM00094299") + search_a <- meteo_pull_monitors(monitors) + obs_covr <- meteo_coverage(search_a) + + expect_is(obs_covr, "data.frame") + expect_is(obs_covr$start_date, "Date") + expect_is(obs_covr$total_obs, "integer") + expect_is(obs_covr$prcp, "numeric") + + expect_equal(NROW(obs_covr), length(monitors)) +}) diff --git a/vignettes/.build.timestamp b/vignettes/.build.timestamp new file mode 100644 index 00000000..e69de29b diff --git a/vignettes/data/measurementsDelhi.RData b/vignettes/data/measurementsDelhi.RData new file mode 100644 index 00000000..ff581120 Binary files /dev/null and b/vignettes/data/measurementsDelhi.RData differ diff --git a/vignettes/data/stationsDelhi.RData b/vignettes/data/stationsDelhi.RData new file mode 100644 index 00000000..faad7d30 Binary files /dev/null and b/vignettes/data/stationsDelhi.RData differ diff --git a/vignettes/rnoaa_openaq.Rmd b/vignettes/rnoaa_openaq.Rmd new file mode 100644 index 00000000..e3c0574b --- /dev/null +++ b/vignettes/rnoaa_openaq.Rmd @@ -0,0 +1,156 @@ +--- +title: "Complementing air quality data with weather data using rnoaa" +author: "Maƫlle Salmon" +date: "`r Sys.Date()`" +output: rmarkdown::html_vignette +vignette: > + %\VignetteIndexEntry{air_quality_and_weather_data} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +# Introduction: getting air quality data + +This vignette aims at explaining how you can complement a data.frame with weather data using rnoaa. In this vignette we shall use air quality data from the OpenAQ platform queried with the ropenaq package, for India. Using [ropenaq](https://github.com/masalmon/ropenaq) one can get e.g. PM2.5 values over time in Delhi in March 2016. For getting all data for march we'll loop over several pages. + +First, we need to know how many measures are available for Delhi for March 2016. + +```{r, message = FALSE, warning = FALSE, eval = FALSE, echo = TRUE} +library("ropenaq") + +countMeasures <- aq_measurements(city = "Delhi", + parameter = "pm25", + date_from = "2016-03-01", + date_to = "2016-03-31", + limit = 1)$meta$found +measurementsDelhi <- NULL +for (page in 1:ceiling(countMeasures/1000)){ + measurementsDelhi <- rbind(measurementsDelhi, + aq_measurements(city = "Delhi", + parameter = "pm25", + date_from = "2016-03-01", + date_to = "2016-03-31", + limit = 1000, + page = page)$results) +} +save(measurementsDelhi, file = "data/measurementsDelhi.RData") +``` + +We filter negative values. + +```{r, message = FALSE, warning = FALSE, eval = TRUE, echo = TRUE} +library("dplyr") +load("data/measurementsDelhi.RData") +measurementsDelhi %>% head() %>% knitr::kable() +measurementsDelhi <- filter(measurementsDelhi, value > 0) +``` + +We now transform these data into daily data. + +```{r, message = FALSE, warning = FALSE} +# only keep stations with geographical information +measurementsDelhi <- filter(measurementsDelhi, !is.na(latitude)) +# now transform to daily data +measurementsDelhi <- measurementsDelhi %>% + mutate(day = as.Date(dateLocal)) %>% + group_by(location, day) %>% + summarize(value = mean(value), + longitude = longitude[1], + latitude = latitude[1]) %>% + ungroup() +measurementsDelhi %>% head() %>% knitr::kable() + +``` + +Air quality and weather are correlated, so one could be interested in getting a time series of say temperature for the same location. The OpenAQ platform itself does not provide weather data but nearly all stations have geographical coordinates. Our goal here will be to use rnoaa to complement this table with precipitation and temperature. + +# Find weather stations + +For finding the right station(s) we shall use the `meteo_nearby_stations` function. It returns a list with the weather stations nearby each latitude/longitude given as arguments, respecting the other arguments such as maximal radius, first year with data, etc. For finding stations one might have to play a bit with the parameters until there is at least one station for each location. + +Here we query stations with a less than 15km distance from the air quality stations, with precipitation and temperature data, and with data starting from 2016. Note that the query takes a while. + +```{r, eval = FALSE, echo = TRUE} +library("rnoaa") +station_data <- ghcnd_stations()[[1]] +lat_lon_df <- select(measurementsDelhi, + location, + latitude, + longitude) %>% unique() %>% + ungroup() %>% + rename(id = location) %>% + mutate(id = factor(id)) + +stationsDelhi <- meteo_nearby_stations(lat_lon_df = as.data.frame(lat_lon_df), + station_data = station_data, + radius = 15, + year_min = 2016, + var = c("TAVG", "PRCP")) +stationsDelhi <- unique(bind_rows(stationsDelhi) %>% select(- distance)) + +save(stationsDelhi, file = "data/stationsDelhi.RData") + +``` + +```{r} +load("data/stationsDelhi.RData") +stationsDelhi %>% knitr::kable() +``` + +Now let us plot the AQ and weather stations on a quick and dirty map with no legend, red for AQ stations, blue for weather stations. + +```{r, message = FALSE, warning = FALSE, fig.width = 10, fig.height = 10} +library("ggmap") +map <- get_map(location = "Delhi", zoom = 11) +ggmap(map) + + geom_point(aes(x = longitude, y = latitude), + data = stationsDelhi, col = "blue", size = 4)+ + geom_point(aes(x = longitude, y = latitude), + data = measurementsDelhi, col = "red", size = 4) +``` + + +# Query weather data for these stations + +For pulling weather data from these weather monitors, we shall use the `meteo_pull_monitors` function. + +```{r} +library("rnoaa") +monitors <- stationsDelhi$id +all_monitors_clean <- meteo_pull_monitors(monitors, + date_min = "2016-03-01", + date_max = "2016-03-31") %>% + rename(day = date, + location = id) +all_monitors_clean %>% head() %>% knitr::kable() + +``` + +Here we notice some values are not available. Therefore, we might need to go back to weather stations searching with, for instance, a larger radius. In this case let's say we're ok with the result of the search. + +# Join the two tables, thus complementing the original table + +Therefore, in this case we will bind the rows of the air quality table with the weather table. + +```{r} +measurementsDelhi <- bind_rows(measurementsDelhi, all_monitors_clean) +measurementsDelhi %>% head() %>% knitr::kable() +``` + + + Now some locations are air quality locations and have only missing values in the weather columns, and some locations are weather locations and have only missing values in the air quality columns. + +We can plot the data we got. + +```{r, fig.width = 10, fig.height = 5, warning = FALSE} +data_plot <- measurementsDelhi %>% + rename(pm25 = value) %>% + select(- longitude, - latitude, - tmax, - tmin) %>% + tidyr::gather(parameter, value, pm25:prcp) + +library("ggplot2") +ggplot(data_plot) + + geom_line(aes(x = day, y = value, col = location)) + + facet_grid(parameter ~ ., scales = "free_y") + +```