From dca9adfdc69b914ce6268f677ef0199cbddc2bb7 Mon Sep 17 00:00:00 2001 From: Nicholas Potter Date: Mon, 16 Sep 2019 20:26:18 -0700 Subject: [PATCH 1/4] fix issues #316, #317. --- R/gefs.R | 93 ++++++++++++++++---------------------- tests/testthat/test-gefs.R | 15 ++++++ 2 files changed, 55 insertions(+), 53 deletions(-) diff --git a/R/gefs.R b/R/gefs.R index 6ee8717b..a52d82c4 100644 --- a/R/gefs.R +++ b/R/gefs.R @@ -8,11 +8,10 @@ #' #' @param var the variable to get. Must be one of the variables listed in #' `gefs_variables()` -#' @param lat,lon the longitude. Will be converted to the nearest GEFS available -#' longitude. If lon is a list of vlaues, it must be a sequential list, and -#' data are returned for the number of longitudes in the list starting with -#' the maximum value and incrementing through the indexed values for the -#' length of the list. +#' @param lat the latitude. Values must be sequential and are rounded to the nearest GEFS available +#' latitude. +#' @param lon the longitude. Values must be sequential and are rounded to the nearest GEFS available +#' longitude. #' @param date A date/string formatted as YYYYMMDD. #' @param forecast_time a string indicating which time of day UTC the #' forecast is from. Options are "0000", "0600", "1200", "1800". @@ -32,7 +31,7 @@ #' unprocessed matrix. #' #' @references -#' +#' #' - Data description - \url{http://bit.ly/noaagefs}. #' - Adapted from Python code written by Von P. Walden, Washington State #' University @@ -123,66 +122,54 @@ gefs_GET <- function(var, lat, lon, #get a connection con <- gefs_CONNECT(date, forecast_time) + # Rename extra dimensions, to be changed later + if (!is.null(dims)) { warning("Can't select additional dimensions yet.", + .call = FALSE) + } else { + additional_dims <- dims + } + + #Get a subset of data to speed up access v <- con$var[[var]] # lon, lat, height_above_ground, ens (ensemble), time1 varsize <- v$varsize ndims <- v$ndims n_time <- varsize[ndims] #time is always the last dimension - dims <- c() - for (i in 1:length(v$dim)) { dims[i] <- v$dim[[i]]$name } - - #get lat/lon indices - lon_start_idx <- if(!missing(lon)) which(v$dim[[1]]$vals==round(max(lon) %% 360, 0)) else 1 - lat_start_idx <- if(!missing(lat)) which(v$dim[[2]]$vals==round(max(lat), 0)) else 1 - - #indices of dimensions to read from data - start <- rep(1,ndims) #number of dims - start[1] <- lon_start_idx #first is always longitude - start[2] <- lat_start_idx #second is always latitude - start[ndims - 1] <- ens_idx[1] #first ensemble index - start[ndims] <- time_idx[1] #first time index - - count_n <- rep(1,ndims) - #if not specified, get all locations. - count_n[1] <- if(missing(lon)) -1 else length(lon) - count_n[2] <- if(missing(lat)) -1 else length(lat) - - #ensemble is always the 2nd to last dimension. Take either the variable - #max or the last value indicated by the parameter ens - count_n[ndims-1] <- min(varsize[ndims-1], length(ens_idx)) - - #time is always the last dimension - #take the minimum of the variable max or the value indicated by the parameter time - count_n[ndims] <- min(varsize[ndims], length(time_idx)) + # Set the indices for each dimension + dim_idxs <- list() + for (i in 1:length(v$dim)) { + dn <- v$dim[[i]]$name + if(dn == "lon") { + dim_idxs[[i]] <- if(!missing(lon)) which(v$dim[[i]]$vals %in% (round(lon,0) %% 360)) else 1:v$dim[[i]]$len + } else if (dn == "lat") { + dim_idxs[[i]] <- if(!missing(lat)) which(v$dim[[2]]$vals %in% round(lat, 0)) else 1:v$dim[[2]]$len + } else if (dn == "ens") { + dim_idxs[[i]] <- ens_idx + } else if (dn %in% c("time1", "time2")) { + dim_idxs[[i]] <- time_idx + } else if (dn %in% names(additional_dims)) { + dim_idxs[[i]] <- which(v$dim[[j]]$vals %in% additional_dims[[dn]]) + } else { + dim_idxs[[i]] <- 1:v$dim[[i]]$len + } + } + names(dim_idxs) <- lapply(1:length(v$dim), function(i) { v$dim[[i]]$name }) - # actual data - # Do not modify the data, so don't convert (- 273.15) * 1.8 + 32. - #d <- ncvar_get(con, v, start = start, count = count_n, ...) + #start indices of dimensions to read from data + start <- sapply(dim_idxs, function(d) { min(d) }) + count_n <- sapply(dim_idxs, function(d) { length(d) }) ##ncdf4 version - d <- ncdf4::ncvar_get(con, v, start = start, count = count_n, ...) + d_raw <- ncdf4::ncvar_get(con, v, start = start, count = count_n, ...) #create the data frame #For now, if lat/lon are not specified, just return a matrix. if (raw==FALSE) { - d = as.data.frame(as.vector(d)) - - #get the dimension values for all but the time variable - for (i in 1:(length(count_n)-1)) { - dim_vals <- v$dim[[i]]$vals - if(count_n[i]==1) { - d[dims[i]] <- dim_vals[start[i]] - } - else { - d[dims[i]] <- dim_vals[0:(count_n[i]-1) + start[i]] - } - } - #get the dimension values for the time variable - time_vals <- v$dim[[ndims]]$vals[0:(count_n[ndims] - 1) + start[ndims]] - d[dims[ndims]] <- as.vector(sapply(time_vals, function(t) { rep(t, count_n[ndims-1])})) - - names(d) <- c(var, dims) + dim_vals <- lapply(1:length(dim_idxs), function(i) { v$dim[[i]]$vals[dim_idxs[[i]]] }) + names(dim_vals) <- names(dim_idxs) + d = cbind(as.data.frame(as.vector(d_raw)), expand.grid(dim_vals)) + names(d)[[1]] <- var } fname <- strsplit(con$filename, "_")[[1]] diff --git a/tests/testthat/test-gefs.R b/tests/testthat/test-gefs.R index e6c7c54b..6bc964ec 100644 --- a/tests/testthat/test-gefs.R +++ b/tests/testthat/test-gefs.R @@ -15,6 +15,21 @@ test_that("gefs errors", { expect_error(gefs(lat=lat, lon=lon), "Need to specify the variable to get. A list of variables is available from gefs_variables().") }) +test_that("gefs latitude and longitude selection returns correct values", { + skip_on_cran() + skip_on_travis() + skip_on_appveyor() + + a <- gefs("u-component_of_wind_height_above_ground_ens", + ens_idx = 1, + time_idx = 1, + forecast_time = "0000", + lon = c(-1:2), lat = c(50:54)) + + expect_true(all(unique(a$data$lon) == c(0,1,2,359))) + expect_true(all(unique(a$data$lat) == c(54,54,54,54))) +}) + test_that("gefs time and ensemble selection returns correct indices.", { skip_on_cran() skip_on_travis() From 8101b28001dc74f1ca647bc7d2dac8f62c508364 Mon Sep 17 00:00:00 2001 From: Nicholas Potter Date: Wed, 18 Sep 2019 18:46:48 -0700 Subject: [PATCH 2/4] fix broken test and add some others. --- R/gefs.R | 14 ++++++- tests/testthat/test-gefs.R | 79 +++++++++++++++++++++++--------------- 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/R/gefs.R b/R/gefs.R index a52d82c4..fef411fc 100644 --- a/R/gefs.R +++ b/R/gefs.R @@ -116,9 +116,19 @@ gefs_GET <- function(var, lat, lon, raw = FALSE, ...) { - #Sanity Checks + ###Sanity Checks if (missing(var)) stop("Need to specify the variable to get. A list of variables is available from gefs_variables().") + # lats and lons must be sequential and within ranges + lats <- sort(round(lat, 0)) + if (!all(lats == seq(lats[1], length.out = length(lats)))) stop("Latitudes must be sequential.") + if (any(lats < -90 | lats > 90)) stop("Latitudes must be in c(-90,90).") + + lons <- sort(round(lon, 0)) + if (!all(lons == seq(lons[1], length.out = length(lons)))) stop("Longitudes must be sequential.") + if (any(lons < -180 | lons > 360)) stop("Longitudes must be in c(-180,180) or c(0,360).") + + #get a connection con <- gefs_CONNECT(date, forecast_time) @@ -170,6 +180,8 @@ gefs_GET <- function(var, lat, lon, names(dim_vals) <- names(dim_idxs) d = cbind(as.data.frame(as.vector(d_raw)), expand.grid(dim_vals)) names(d)[[1]] <- var + } else { + d <- d_raw } fname <- strsplit(con$filename, "_")[[1]] diff --git a/tests/testthat/test-gefs.R b/tests/testthat/test-gefs.R index 6bc964ec..6d26f030 100644 --- a/tests/testthat/test-gefs.R +++ b/tests/testthat/test-gefs.R @@ -1,11 +1,25 @@ context("gefs") #set a location -lat = 46.28125 -lon = -116.2188 +lons <- c(-1.1, 0, 0.8, 2) +lats <- c(50.1, 51, 51.9, 53, 54) #variable -var = "Temperature_height_above_ground_ens" +temp = "Temperature_height_above_ground_ens" + +# Get raw and processed data +d_raw <- gefs(var = temp, + ens_idx = 1:2, + time_idx = 1:2, + forecast_time = "0000", + lon = lons, lat = lats, raw = TRUE) + +d <- gefs(var = temp, + ens_idx = 1:2, + time_idx = 1:2, + forecast_time = "0000", + lon = lons, lat = lats) + test_that("gefs errors", { skip_on_cran() @@ -13,49 +27,54 @@ test_that("gefs errors", { skip_on_travis() expect_error(gefs(lat=lat, lon=lon), "Need to specify the variable to get. A list of variables is available from gefs_variables().") + expect_error(gefs(var = temp, lat = c(-43, -41), lon = lons, ens_idx = 1, time_idx = 1), "Latitudes must be sequential.", fixed = TRUE) + expect_error(gefs(var = temp, lat = lats, lon = c(213, 211), ens_idx = 1, time_idx = 1), "Longitudes must be sequential.", fixed = TRUE) + expect_error(gefs(var = temp, lat = -91, lon = lons, ens_idx = 1, time_idx = 1), "Latitudes must be in c(-90,90).", fixed = TRUE) + expect_error(gefs(var = temp, lat = 91, lon = lons, ens_idx = 1, time_idx = 1), "Latitudes must be in c(-90,90).", fixed = TRUE) + expect_error(gefs(var = temp, lat = lats, lon = 361, ens_idx = 1, time_idx = 1), "Longitudes must be in c(-180,180) or c(0,360).", fixed = TRUE) + expect_error(gefs(var = temp, lat = lats, lon = -181, ens_idx = 1, time_idx = 1), "Longitudes must be in c(-180,180) or c(0,360).", fixed = TRUE) +}) + +test_that("gefs returns a correct object", { + expect_type(d, "list") + expect_equal(names(d), + c("forecast_date", "forecast_time", "dimensions", "data")) + expect_equal(d$forecast_date, format(Sys.time(), "%Y%m%d")) + expect_equal(d$forecast_time, "0000") + expect_s3_class(d$data, "data.frame") }) -test_that("gefs latitude and longitude selection returns correct values", { +test_that("gefs correctly selects the dimension values", { skip_on_cran() skip_on_travis() skip_on_appveyor() - a <- gefs("u-component_of_wind_height_above_ground_ens", - ens_idx = 1, - time_idx = 1, - forecast_time = "0000", - lon = c(-1:2), lat = c(50:54)) - - expect_true(all(unique(a$data$lon) == c(0,1,2,359))) - expect_true(all(unique(a$data$lat) == c(54,54,54,54))) + expect_true(all(sort(unique(d$data$lon)) == sort(round(lons %% 360, 0)))) + expect_true(all(sort(unique(d$data$lat)) == sort(round(lats, 0)))) }) -test_that("gefs time and ensemble selection returns correct indices.", { +test_that("gefs data arrays are correctly transformed", { skip_on_cran() skip_on_travis() skip_on_appveyor() - ens_idx = 2:4 - time_idx = 5:10 - d = gefs(var, lat, lon, ens_idx = ens_idx, time_idx = time_idx) + grid <- expand.grid(lon = round(lons %% 360, 0), lat = round(lats, 0), + ens = 1:2, time = 1:2) - time_var <- names(d$data)[6] - expect_equal(dim(d$data), c(length(ens_idx) * length(time_idx), 6)) - expect_equal(unique(d$data$ens), ens_idx - 1) -}) + expect_equal(nrow(d$data), nrow(grid)) -test_that("gefs metadata", { - skip_on_cran() - skip_on_travis() - skip_on_appveyor() + # NOTE: Remember that GEFS returns latitude in reverse order, i.e. -90:90 - today = format(as.Date(Sys.time()) - 2, "%Y%m%d") - forecast_time = "0600" - d = gefs(var, lat, lon, ens=1, date=today, forecast_time=forecast_time) + # 2nd lon, all lats, 1st ens, 2nd time + expect_true(all(d_raw$data[2,,1,2] == d$data[d$data$lon == lons[[2]] & + d$data$ens == 0 & + d$data$time2 == 1,][[temp]])) - expect_equal(d$forecast_date, today) - expect_equal(d$forecast_time, forecast_time) - expect_equal(d$dimensions[1:4], c("lon", "lat", "height_above_ground", "ens")) + # All lons, last lat, 1st ens, 1st time + expect_true(all(d_raw$data[,length(lats),1,2] == d$data[ + d$data$lat == round(sort(lats, decreasing = TRUE)[[length(lats)]]) & + d$data$ens == 0 & + d$data$time2 == 1,][[temp]])) }) test_that("gefs_variables returns characters.", { From 8420be7fb361b12ad7f201511bb1751fdc0d24d9 Mon Sep 17 00:00:00 2001 From: Nicholas Potter Date: Wed, 18 Sep 2019 18:54:54 -0700 Subject: [PATCH 3/4] regenerate gefs documentation. --- man/gefs.Rd | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/man/gefs.Rd b/man/gefs.Rd index de749556..e093e5c2 100644 --- a/man/gefs.Rd +++ b/man/gefs.Rd @@ -34,11 +34,11 @@ gefs_dimension_values(dim, con = NULL, ...) \item{var}{the variable to get. Must be one of the variables listed in \code{gefs_variables()}} -\item{lat}{the latitude. Values must be sequential and are rounded to the nearest GEFS available -latitude.} +\item{lat}{the latitude. Values must be sequential and are rounded to the +nearest GEFS available latitude.} -\item{lon}{the longitude. Values must be sequential and are rounded to the nearest GEFS available -longitude.} +\item{lon}{the longitude. Values must be sequential and are rounded to the +nearest GEFS available longitude.} \item{...}{additional parameters passed to \code{ncvar_get}} From ea7b5e27274db1d342c58c95928fc5ee6e4de852 Mon Sep 17 00:00:00 2001 From: Nicholas Potter Date: Thu, 19 Sep 2019 12:50:48 -0700 Subject: [PATCH 4/4] don't make http requests outside of tests that aren't skipped on cran. --- R/gefs.R | 2 +- tests/testthat/test-gefs.R | 50 +++++++++++++++++--------------------- 2 files changed, 23 insertions(+), 29 deletions(-) diff --git a/R/gefs.R b/R/gefs.R index 2f8c9bf0..73e265af 100644 --- a/R/gefs.R +++ b/R/gefs.R @@ -175,7 +175,7 @@ gefs_GET <- function(var, lat, lon, #create the data frame #For now, if lat/lon are not specified, just return a matrix. - if (raw==FALSE) { + if (!raw) { dim_vals <- lapply(1:length(dim_idxs), function(i) { v$dim[[i]]$vals[dim_idxs[[i]]] }) names(dim_vals) <- names(dim_idxs) d = cbind(as.data.frame(as.vector(d_raw)), expand.grid(dim_vals)) diff --git a/tests/testthat/test-gefs.R b/tests/testthat/test-gefs.R index 0e547b1a..9f248064 100644 --- a/tests/testthat/test-gefs.R +++ b/tests/testthat/test-gefs.R @@ -7,20 +7,6 @@ lats <- c(50.1, 51, 51.9, 53, 54) #variable temp = "Temperature_height_above_ground_ens" -# Get raw and processed data -d_raw <- gefs(var = temp, - ens_idx = 1:2, - time_idx = 1:2, - forecast_time = "0000", - lon = lons, lat = lats, raw = TRUE) - -d <- gefs(var = temp, - ens_idx = 1:2, - time_idx = 1:2, - forecast_time = "0000", - lon = lons, lat = lats) - - test_that("gefs errors", { skip_on_cran() skip_on_appveyor() @@ -35,29 +21,36 @@ test_that("gefs errors", { expect_error(gefs(var = temp, lat = lats, lon = -181, ens_idx = 1, time_idx = 1), "Longitudes must be in c(-180,180) or c(0,360).", fixed = TRUE) }) -test_that("gefs returns a correct object", { +test_that("gefs HTTP requests", { + skip_on_cran() + skip_on_travis() + skip_on_appveyor() + + ### Get raw and processed data + d_raw <- gefs(var = temp, + ens_idx = 1:2, + time_idx = 1:2, + forecast_time = "0000", + lon = lons, lat = lats, raw = TRUE) + d <- gefs(var = temp, + ens_idx = 1:2, + time_idx = 1:2, + forecast_time = "0000", + lon = lons, lat = lats) + + ### Tests of object type expect_type(d, "list") expect_equal(names(d), c("forecast_date", "forecast_time", "dimensions", "data")) expect_equal(d$forecast_date, format(Sys.time(), "%Y%m%d")) expect_equal(d$forecast_time, "0000") expect_s3_class(d$data, "data.frame") -}) - -test_that("gefs time and ensemble selection returns correct indices.", { - skip_on_cran() - skip_on_travis() - skip_on_appveyor() + ### Tests of indices expect_true(all(sort(unique(d$data$lon)) == sort(round(lons %% 360, 0)))) expect_true(all(sort(unique(d$data$lat)) == sort(round(lats, 0)))) -}) - -test_that("gefs data arrays are correctly transformed", { - skip_on_cran() - skip_on_travis() - skip_on_appveyor() + ### Tests of data transformation from multidimensional array to data frame grid <- expand.grid(lon = round(lons %% 360, 0), lat = round(lats, 0), ens = 1:2, time = 1:2) @@ -75,8 +68,9 @@ test_that("gefs data arrays are correctly transformed", { d$data$lat == round(sort(lats, decreasing = TRUE)[[length(lats)]]) & d$data$ens == 0 & d$data$time2 == 1,][[temp]])) -}) +}) + test_that("gefs_variables returns characters.", { skip_on_cran() skip_on_travis()