From dca9adfdc69b914ce6268f677ef0199cbddc2bb7 Mon Sep 17 00:00:00 2001 From: Nicholas Potter Date: Mon, 16 Sep 2019 20:26:18 -0700 Subject: [PATCH] fix issues #316, #317. --- R/gefs.R | 93 ++++++++++++++++---------------------- tests/testthat/test-gefs.R | 15 ++++++ 2 files changed, 55 insertions(+), 53 deletions(-) diff --git a/R/gefs.R b/R/gefs.R index 6ee8717b..a52d82c4 100644 --- a/R/gefs.R +++ b/R/gefs.R @@ -8,11 +8,10 @@ #' #' @param var the variable to get. Must be one of the variables listed in #' `gefs_variables()` -#' @param lat,lon the longitude. Will be converted to the nearest GEFS available -#' longitude. If lon is a list of vlaues, it must be a sequential list, and -#' data are returned for the number of longitudes in the list starting with -#' the maximum value and incrementing through the indexed values for the -#' length of the list. +#' @param lat the latitude. Values must be sequential and are rounded to the nearest GEFS available +#' latitude. +#' @param lon the longitude. Values must be sequential and are rounded to the nearest GEFS available +#' longitude. #' @param date A date/string formatted as YYYYMMDD. #' @param forecast_time a string indicating which time of day UTC the #' forecast is from. Options are "0000", "0600", "1200", "1800". @@ -32,7 +31,7 @@ #' unprocessed matrix. #' #' @references -#' +#' #' - Data description - \url{http://bit.ly/noaagefs}. #' - Adapted from Python code written by Von P. Walden, Washington State #' University @@ -123,66 +122,54 @@ gefs_GET <- function(var, lat, lon, #get a connection con <- gefs_CONNECT(date, forecast_time) + # Rename extra dimensions, to be changed later + if (!is.null(dims)) { warning("Can't select additional dimensions yet.", + .call = FALSE) + } else { + additional_dims <- dims + } + + #Get a subset of data to speed up access v <- con$var[[var]] # lon, lat, height_above_ground, ens (ensemble), time1 varsize <- v$varsize ndims <- v$ndims n_time <- varsize[ndims] #time is always the last dimension - dims <- c() - for (i in 1:length(v$dim)) { dims[i] <- v$dim[[i]]$name } - - #get lat/lon indices - lon_start_idx <- if(!missing(lon)) which(v$dim[[1]]$vals==round(max(lon) %% 360, 0)) else 1 - lat_start_idx <- if(!missing(lat)) which(v$dim[[2]]$vals==round(max(lat), 0)) else 1 - - #indices of dimensions to read from data - start <- rep(1,ndims) #number of dims - start[1] <- lon_start_idx #first is always longitude - start[2] <- lat_start_idx #second is always latitude - start[ndims - 1] <- ens_idx[1] #first ensemble index - start[ndims] <- time_idx[1] #first time index - - count_n <- rep(1,ndims) - #if not specified, get all locations. - count_n[1] <- if(missing(lon)) -1 else length(lon) - count_n[2] <- if(missing(lat)) -1 else length(lat) - - #ensemble is always the 2nd to last dimension. Take either the variable - #max or the last value indicated by the parameter ens - count_n[ndims-1] <- min(varsize[ndims-1], length(ens_idx)) - - #time is always the last dimension - #take the minimum of the variable max or the value indicated by the parameter time - count_n[ndims] <- min(varsize[ndims], length(time_idx)) + # Set the indices for each dimension + dim_idxs <- list() + for (i in 1:length(v$dim)) { + dn <- v$dim[[i]]$name + if(dn == "lon") { + dim_idxs[[i]] <- if(!missing(lon)) which(v$dim[[i]]$vals %in% (round(lon,0) %% 360)) else 1:v$dim[[i]]$len + } else if (dn == "lat") { + dim_idxs[[i]] <- if(!missing(lat)) which(v$dim[[2]]$vals %in% round(lat, 0)) else 1:v$dim[[2]]$len + } else if (dn == "ens") { + dim_idxs[[i]] <- ens_idx + } else if (dn %in% c("time1", "time2")) { + dim_idxs[[i]] <- time_idx + } else if (dn %in% names(additional_dims)) { + dim_idxs[[i]] <- which(v$dim[[j]]$vals %in% additional_dims[[dn]]) + } else { + dim_idxs[[i]] <- 1:v$dim[[i]]$len + } + } + names(dim_idxs) <- lapply(1:length(v$dim), function(i) { v$dim[[i]]$name }) - # actual data - # Do not modify the data, so don't convert (- 273.15) * 1.8 + 32. - #d <- ncvar_get(con, v, start = start, count = count_n, ...) + #start indices of dimensions to read from data + start <- sapply(dim_idxs, function(d) { min(d) }) + count_n <- sapply(dim_idxs, function(d) { length(d) }) ##ncdf4 version - d <- ncdf4::ncvar_get(con, v, start = start, count = count_n, ...) + d_raw <- ncdf4::ncvar_get(con, v, start = start, count = count_n, ...) #create the data frame #For now, if lat/lon are not specified, just return a matrix. if (raw==FALSE) { - d = as.data.frame(as.vector(d)) - - #get the dimension values for all but the time variable - for (i in 1:(length(count_n)-1)) { - dim_vals <- v$dim[[i]]$vals - if(count_n[i]==1) { - d[dims[i]] <- dim_vals[start[i]] - } - else { - d[dims[i]] <- dim_vals[0:(count_n[i]-1) + start[i]] - } - } - #get the dimension values for the time variable - time_vals <- v$dim[[ndims]]$vals[0:(count_n[ndims] - 1) + start[ndims]] - d[dims[ndims]] <- as.vector(sapply(time_vals, function(t) { rep(t, count_n[ndims-1])})) - - names(d) <- c(var, dims) + dim_vals <- lapply(1:length(dim_idxs), function(i) { v$dim[[i]]$vals[dim_idxs[[i]]] }) + names(dim_vals) <- names(dim_idxs) + d = cbind(as.data.frame(as.vector(d_raw)), expand.grid(dim_vals)) + names(d)[[1]] <- var } fname <- strsplit(con$filename, "_")[[1]] diff --git a/tests/testthat/test-gefs.R b/tests/testthat/test-gefs.R index e6c7c54b..6bc964ec 100644 --- a/tests/testthat/test-gefs.R +++ b/tests/testthat/test-gefs.R @@ -15,6 +15,21 @@ test_that("gefs errors", { expect_error(gefs(lat=lat, lon=lon), "Need to specify the variable to get. A list of variables is available from gefs_variables().") }) +test_that("gefs latitude and longitude selection returns correct values", { + skip_on_cran() + skip_on_travis() + skip_on_appveyor() + + a <- gefs("u-component_of_wind_height_above_ground_ens", + ens_idx = 1, + time_idx = 1, + forecast_time = "0000", + lon = c(-1:2), lat = c(50:54)) + + expect_true(all(unique(a$data$lon) == c(0,1,2,359))) + expect_true(all(unique(a$data$lat) == c(54,54,54,54))) +}) + test_that("gefs time and ensemble selection returns correct indices.", { skip_on_cran() skip_on_travis()