Skip to content

Commit

Permalink
fix #283 further parsing in lcd fxn
Browse files Browse the repository at this point in the history
  • Loading branch information
sckott committed May 15, 2019
1 parent 0d193c4 commit 3f14352
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 8 deletions.
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export(isd_read)
export(isd_stations)
export(isd_stations_search)
export(lcd)
export(lcd_cleanup)
export(meteo_clear_cache)
export(meteo_coverage)
export(meteo_distance)
Expand Down Expand Up @@ -162,6 +163,7 @@ importFrom(scales,date_format)
importFrom(tibble,as_data_frame)
importFrom(tibble,data_frame)
importFrom(tidyr,gather)
importFrom(tidyr,separate)
importFrom(utils,download.file)
importFrom(utils,head)
importFrom(utils,read.csv)
Expand Down
104 changes: 101 additions & 3 deletions R/lcd.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
#' @export
#' @param station (character) station code, e.g., "02413099999". we will
#' allow integer/numeric passed here, but station ids can have leading
#' zeros, so it's a good idea to keep stations as character class
#' @param year (integer) year, e.g., 2017
#' zeros, so it's a good idea to keep stations as character class. required
#' @param year (integer) year, e.g., 2017. required
#' @param ... curl options passed on to [crul::verb-GET]
#' @param x result of a call to `lcd()`. required
#' @return a data.frame, with many columns, and variable rows
#' depending on how frequently data was collected in the given year
#'
Expand All @@ -16,6 +17,10 @@
#' joined together. In the next version of this package we'll try to have the
#' data cleaning done for you.
#'
#' @details `lcd_cleanup()` takes the output of `lcd()` and parses additional
#' columns that have comma separated strings into separate columns with
#' headings; so `lcd_cleanup()` adds many additional columns
#'
#' @return a data.frame with many columns. the first 10 are metadata:
#'
#' - station
Expand All @@ -31,6 +36,27 @@
#'
#' And the rest should be all data columns. See Note about data joined
#' together.
#'
#' Other groups of fields under the following:
#'
#' - wnd: wind
#' - tmp: air temperature
#' - aa: LIQUID-PRECIPITATION
#' - aj: snow depth
#' - ay: PAST-WEATHER-OBSERVATION
#' - oc: WIND-GUST-OBSERVATION
#' - dew: dew point
#' - slp: sea level pressure
#' - cig: sky condition
#' - ka: extreme air temperature
#' - ma: atmospheric pressure
#' - md: atmospheric pressure change
#' - ga: SKY-COVER-LAYER
#' - ge: SKY-CONDITION-OBSERVATION
#' - gf: SKY-CONDITION-OBSERVATION
#' - ia: GROUND-SURFACE-OBSERVATION
#' - mw: PRESENT-WEATHER-OBSERVATION
#' - rem: remarks
#'
#' @examples \dontrun{
#' lcd(station = "01338099999", year = "2017")
Expand All @@ -41,6 +67,11 @@
#'
#' # pass curl options
#' lcd(station = "02413099999", year = "2002", verbose = TRUE)
#'
#' # clean up columns
#' w <- lcd(station = "01338099999", year = "2017")
#' w
#' lcd_cleanup(w)
#' }
lcd <- function(station, year, ...) {
assert(station, c("character", "numeric", "integer"))
Expand All @@ -50,7 +81,74 @@ lcd <- function(station, year, ...) {
path <- lcd_get(station = station, year = year, ...)
tmp <- read.csv(path, header = TRUE, sep = ",", stringsAsFactors = FALSE)
names(tmp) <- tolower(names(tmp))
tibble::as_data_frame(tmp)
df <- tibble::as_tibble(tmp)
structure(df, class = c(class(df), "lcd"))
}

lcd_cols_to_clean <- c("wnd", "cig", "vis", "tmp", "dew", "slp", "aa1",
"aa2", "aj1", "ay1", "ay2", "ga1", "ga2", "ga3", "ge1", "gf1", "ia1",
"ka1", "ka2", "ma1", "md1", "mw1", "oc1", "od1", "rem", "eqd")

aa_cols <- c("aa%s_hrs", "aa%s_mm", "aa%s_code", "aa%s_qa")
aj_cols <- c("aj%s_depth_cm", "aj%s_code", "aj%s_code_qa", "aj%s_eq_water_mm",
"aj%s_eq_water_code", "aj%s_eq_water_code_qa")
ay_cols <- c("ay%s_code", "ay%s_code_qa", "ay%s_period_hrs",
"ay%s_period_hrs_qa")
ka_cols <- c("ka%s_hrs", "ka%s_min", "ka%s_cel", "ka%s_cel_qa")
ga_cols <- c("ga%s_cov", "ga%s_cov_qa", "ga%s_height_m", "ga%s_height_qa",
"ga%s_cloud_type", "ga%s_cloud_type_qa")
ge_cols <- c("ge%s_conn_cloud", "ge%s_vert", "ge%s_hgt_upper_m",
"ge%s_hgt_lower_m")
gf_cols <- c("gf%s_code", "gf%s_opaque_code", "gf%s_code_qa",
"gf%s_lowest_code", "gf%s_lowest_code_qa", "gf%s_low_code",
"gf%s_low_code_qa", "gf%s_low_hgt_m", "gf%s_low_hgt_qa",
"gf%s_mid_code", "gf%s_mide_code_qa", "gf%s_high_code",
"gf%s_high_code_qa")
ia_cols <- c("ia%s_code", "ia%s_code_qa")
mw_cols <- c("mw%s_code", "mw%s_code_qa")

lcd_cols <- list(
wnd = c("w_angle", "w_a_qa", "w_type", "w_speed_mps", "w_s_qa"),
tmp = c("t_degree_c", "t_qa"),
aa1 = sprintf(aa_cols, 1),
aa2 = sprintf(aa_cols, 2),
aj1 = sprintf(aj_cols, 1),
ay1 = sprintf(ay_cols, 1),
ay2 = sprintf(ay_cols, 2),
oc1 = c("wg_speed_mps", "wg_qa"),
dew = c("dew_temp", "dew_qa"),
slp = c("slp_hg", "slp_qa"),
# cig = c("cig_", ""),
vis = c("vis_m", "vis_m_qa", "vis_var", "vis_var_qa"),
ka1 = sprintf(ka_cols, 1),
ka2 = sprintf(ka_cols, 2),
ma1 = c("ma1_hg", "ma1_hPa", "ma1_hPa_aq", "ma1_hPa_qa"),
md1 = c("md1_tendency", "md1_tendency_qa", "md1_hPa", "md1_hPa_qa",
"md1_hPa_24hr", "md1_hPa_24hr_qa"),
od1 = c("od1_type", "od1_period_hrs", "od1_speed_mps",
"od1_speed_qa", "od1_anular_deg"),
ga1 = sprintf(ga_cols, 1),
ga2 = sprintf(ga_cols, 2),
ga3 = sprintf(ga_cols, 3),
ge1 = sprintf(ge_cols, 1),
gf1 = sprintf(gf_cols, 1),
ia1 = sprintf(ia_cols, 1),
mw1 = sprintf(mw_cols, 1)
)

separ_ate <- function(x, col, new_cols) {
tidyr::separate(x, col, new_cols, sep = ",",
remove = TRUE, convert = TRUE, extra = "drop")
}

#' @export
#' @rdname lcd
lcd_cleanup <- function(x) {
assert(x, "lcd")
for (i in seq_along(lcd_cols)) {
x <- separ_ate(x, names(lcd_cols)[i], lcd_cols[[names(lcd_cols)[i]]])
}
return(x)
}

lcd_get <- function(station, year, overwrite = FALSE, ...) {
Expand Down
2 changes: 1 addition & 1 deletion R/rnoaa-package.r
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
#' @importFrom xml2 read_html read_xml xml_find_all xml_attr as_list
#' xml_text xml_find_first
#' @importFrom jsonlite fromJSON
#' @importFrom tidyr gather
#' @importFrom tidyr gather separate
#' @importFrom rappdirs user_cache_dir
#' @importFrom gridExtra grid.arrange
#' @importFrom dplyr %>% select mutate rename tbl_df filter bind_rows
Expand Down
30 changes: 30 additions & 0 deletions man/lcd.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 22 additions & 4 deletions tests/testthat/test-lcd.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
context("lcd")

test_that("lcd works", {
test_that("lcd", {
skip_on_cran()
skip_if_government_down()

Expand All @@ -26,7 +24,7 @@ test_that("lcd fails well", {

# a station/year combination that doesn't exist
expect_error(lcd(station = "02413099999", year = "1945"),
"Not Found")
"Not Found", class = "error")

# class
expect_error(lcd(5),
Expand All @@ -38,3 +36,23 @@ test_that("lcd fails well", {
expect_error(lcd(5, list(1)),
"year must be of class")
})

test_that("lcd_cleanup", {
skip_on_cran()
skip_if_government_down()

# clean up first
lcd_cache$delete_all()

# get data
aa <- lcd(station = "01338099999", year = "2017")
expect_is(aa, "tbl_df")
bb <- suppressWarnings(lcd_cleanup(aa))
expect_gt(NCOL(bb), NCOL(aa))
})

test_that("lcd_cleanup fails well", {
skip_on_cran()
expect_error(lcd_cleanup(5), "must be of class lcd", class = "error")
})

0 comments on commit 3f14352

Please sign in to comment.