Skip to content

Commit

Permalink
Merge pull request #106 from potterzot/gefs
Browse files Browse the repository at this point in the history
Add functions to access GEFS ensemble forecasts.
  • Loading branch information
sckott committed Sep 10, 2015
2 parents d69658d + 243e76a commit d5aba66
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 0 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ export(erddap_grid)
export(erddap_info)
export(erddap_search)
export(erddap_table)
export(gefs)
export(gefs_dimension_values)
export(gefs_dimensions)
export(gefs_latitudes)
export(gefs_longitudes)
export(gefs_variables)
export(ghcnd)
export(ghcnd_clear_cache)
export(ghcnd_countries)
Expand Down
225 changes: 225 additions & 0 deletions R/gefs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,225 @@
#' Get GEFS ensemble forecast data for a specific lat/lon.
#'
#' Fetches GEFS forecast data for every 6 hours out to 384 hours past selected date. GEFS
#' is an ensemble of 21 models that can be summarized to estimate likelihoods of forecasts.
#'
#' @importFrom tidyr gather
#' @export
#'
#' @param var the variable to get. Must be one of the variables listed in \code{gefs_variables()}.
#' @param lat,lon the longitude. Will be converted to the nearest GEFS available
#' longitude. If lon is a list of vlaues, it must be a sequential list, and
#' data are returned for the number of longitudes in the list starting with
#' the maximum value and incrementing through the indexed values for the
#' length of the list.
#' @param date A date/string formatted as YYYYMMDD.
#' @param forecast_time a string indicating which time of day UTC the forecast is from. Options are "0000", "0600", "1200", "1800".
#' @param ens_idx sequential list of ensembles to fetch. Default is all 21. Note that the ensembles are labelled 0-20, so ens_idx=1:3 will return ensembles 0, 1, and 2.
#' @param time_idx sequential list of time increments to return. List is the index
#' of times, which are in 6 hour increments. (e.g. c(1,2) fetches the 6 and 12 hour forecast.)
#' @param dims (not implemented) indices for additional dimensions to be included between lat, lon, ens, and time.
#' @param raw logical to indicate whether to return raw data matrix or reshaped data frame.
#' @param ... additional parameters passed to \code{ncvar_get}.
#' @return a list containing metadata and accompanying data frame of forecast
#' values. If lat/lon are not specified, the $data is an unprocessed matrix.
#'
#' @references \url{https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-ensemble-forecast-system-gefs}
#'
#' @author Nicholas Potter \email{potterzot@@gmail.com}
#' @examples \dontrun{
#'
#' #avialable latitudes and longitudes
#' gefs_latitudes()
#' gefs_longitudes()
#'
#' #get a list of all gefs variables
#' gefs_variables()
#'
#' #All GEFS dimensions
#' gefs_dimensions()
#'
#' #values for a specific dimension
#' gefs_dimension_values("height_above_ground")
#'
#' #example location.
#' lat = 46.28125
#' lon = -116.2188
#'
#' #Get forecast for a certain variable.
#' forecast = gefs("Total_precipitation_surface_6_Hour_Accumulation_ens", lat, lon)
#'
#' #Fetch a different date (available up to 10 days prior to today)
#' forecast_yesterday_prec = gefs("Total_precipitation_surface_6_Hour_Accumulation_ens",
#' lat,
#' lon,
#' date=format(as.Date(Sys.time()) - 1, "%Y%m%d"))
#'
#' #specific ensemble and times, for the 1800 forecast.
#' # here ensembles 1-3 (ensembles are numbered starting with 0)
#' # and time for 2 days from today at 1800
#' date=format(as.Date(Sys.time()) - 1, "%Y%m%d")
#' var = "Temperature_height_above_ground_ens"
#' gefs(var, lat, lon, date = date, forecast_time = "1800", ens_idx=2:4, time_idx=1:8)
#'
#' #One ensemble, all latitudes and longitudes (this is a big file) for the
#' # next 3 days.
#' gefs(var, ens=1, time=1:12)
#' }
#'
gefs <- function(var, lat, lon, ...) {
check4ncdf()
gefs_GET(var, lat, lon, ...)
}

#' @rdname gefs
gefs_CONNECT <- function(date = format(Sys.time(), "%Y%m%d"),
forecast_time = c("0000", "0600", "1200", "1800")) {

#forecast time
forecast_time = match.arg(forecast_time)

#url parts
gefs_url_pre = 'http://motherlode.ucar.edu/thredds/dodsC/grib/NCEP/GEFS/Global_1p0deg_Ensemble/members/GEFS_Global_1p0deg_Ensemble_'
gefs_url_suf = ".grib2"

#final url
gefs_url = paste0(gefs_url_pre, date, "_", forecast_time, gefs_url_suf)

#open the connection
#nc_open(gefs_url) #ncdf4 version
ncdf::open.ncdf(gefs_url)
}

#' @rdname gefs
gefs_GET <- function(var, lat, lon,
date = format(Sys.time(), "%Y%m%d"),
forecast_time = c("0000", "0600", "1200", "1800"),
ens_idx = 1:21,
time_idx = 1:65,
dims = NULL,
raw = FALSE,
...) {

#Sanity Checks
if (missing(var)) stop("Need to specify the variable to get. A list of variables is available from gefs_variables().")

#get a connection
con = gefs_CONNECT(date, forecast_time)

#Get a subset of data to speed up access
v = con$var[[var]] # lon, lat, height_above_ground, ens (ensemble), time1
varsize = v$varsize
ndims = v$ndims
n_time = varsize[ndims] #time is always the last dimension
dims = c()
for (i in 1:length(v$dim)) { dims[i] = v$dim[[i]]$name }

#get lat/lon indices
lon_start_idx = if(!missing(lon)) which(v$dim[[1]]$vals==round(max(lon) %% 360, 0)) else 1
lat_start_idx = if(!missing(lat)) which(v$dim[[2]]$vals==round(max(lat), 0)) else 1

#indices of dimensions to read from data
start = rep(1,ndims) #number of dims
start[1] = lon_start_idx #first is always longitude
start[2] = lat_start_idx #first is always latitude
start[ndims - 1] = ens_idx[1] #first ensemble
start[ndims] = time_idx[1] #first time

count_n = rep(1,ndims)

#if not specified, get all locations.
count_n[1] = if(missing(lon)) -1 else length(lon)
count_n[2] = if(missing(lat)) -1 else length(lat)

#ensemble is always the 2nd to last dimension. Take either the variable
#max or the last value indicated by the parameter ens
count_n[ndims-1] = min(varsize[ndims-1], length(ens_idx))

#time is always the last dimension
#take the minimum of the variable max or the value indicated by the parameter time
count_n[ndims] = min(varsize[ndims], length(time_idx))

# actual data
# Do not modify the data, so don't convert (- 273.15) * 1.8 + 32. #convert from K to F
#d = ncvar_get(con, v, start = start, count = count_n, ...) #ncdf4 version
d = ncdf::get.var.ncdf(con, v, start = start, count = count_n, ...)

#create the data frame
#For now, if lat/lon are not specified, just return a matrix.
if (raw==FALSE) {
d = gather(as.data.frame(d))

for (i in 1:length(count_n)) {
dim_vals = v$dim[[i]]$vals
if(count_n[i]==1) {
d[dims[i]] = dim_vals[start[i]]
}
else {
d[dims[i]] = dim_vals[0:(count_n[i]-1) + start[i]]
}
}
names(d) = c("key", var, dims)
d = d[,c(dims, var)]
}

fname = strsplit(con$filename, "_")[[1]]
date = fname[7]
forecast_time = strsplit(fname, ".grib2")[[8]]
list(forecast_date = date,
forecast_time = forecast_time,
dimensions = dims,
data = d)
}

########################
# helper functions

#' @export
#'
#' @param con an ncdf4 connection.
#' @rdname gefs
gefs_latitudes <- function(con = NULL, ...) {
gefs_dimension_values("lat", con)
}

#' @export
#' @rdname gefs
gefs_longitudes <- function(con = NULL, ...) {
gefs_dimension_values("lon", con)
}

#' @export
#' @rdname gefs
gefs_variables <- function(con = NULL, ...) {
if (is.null(con)) con = gefs_CONNECT(...)
names(con$var)
}

#' @export
#' @rdname gefs
gefs_dimensions <- function(con = NULL, ...) {
if (is.null(con)) con = gefs_CONNECT(...)
names(con$dim)
}

#' @export
#'
#' @param dim (character) the dimension.
#' @rdname gefs
gefs_dimension_values <- function(dim, con = NULL, ...) {
if (is.null(dim) || missing(dim)) stop("dim cannot be NULL or missing.")
if (is.null(con)) con = gefs_CONNECT(...)
con$dim[[dim]]$vals
}

#Check that ncdf is installed
check4ncdf <- function() {
if (!requireNamespace("ncdf", quietly = TRUE)) {
stop("Please install ncdf", call. = FALSE)
} else {
invisible(TRUE)
}
}



1 change: 1 addition & 0 deletions R/rnoaa-package.r
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#'
#' \itemize{
#' \item \code{buoy_*} - NOAA Buoy data from the National Buoy Data Center
#' \item \code{gefs} - GEFS forecast ensemble data
#' \item \code{ghcnd_*} - GHCND daily data from NOAA
#' \item \code{isd_*} - ISD/ISH data from NOAA
#' \item \code{homr_*} - Historical Observing Metadata Repository (HOMR) vignette
Expand Down
116 changes: 116 additions & 0 deletions man/gefs.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/gefs.R
\name{gefs}
\alias{gefs}
\alias{gefs_CONNECT}
\alias{gefs_GET}
\alias{gefs_dimension_values}
\alias{gefs_dimensions}
\alias{gefs_latitudes}
\alias{gefs_longitudes}
\alias{gefs_variables}
\title{Get GEFS ensemble forecast data for a specific lat/lon.}
\usage{
gefs(var, lat, lon, ...)

gefs_CONNECT(date = format(Sys.time(), "\%Y\%m\%d"),
forecast_time = c("0000", "0600", "1200", "1800"))

gefs_GET(var, lat, lon, date = format(Sys.time(), "\%Y\%m\%d"),
forecast_time = c("0000", "0600", "1200", "1800"), ens_idx = 1:21,
time_idx = 1:65, dims = NULL, raw = FALSE, ...)

gefs_latitudes(con = NULL, ...)

gefs_longitudes(con = NULL, ...)

gefs_variables(con = NULL, ...)

gefs_dimensions(con = NULL, ...)

gefs_dimension_values(dim, con = NULL, ...)
}
\arguments{
\item{var}{the variable to get. Must be one of the variables listed in \code{gefs_variables()}.}

\item{lat,lon}{the longitude. Will be converted to the nearest GEFS available
longitude. If lon is a list of vlaues, it must be a sequential list, and
data are returned for the number of longitudes in the list starting with
the maximum value and incrementing through the indexed values for the
length of the list.}

\item{...}{additional parameters passed to \code{ncvar_get}.}

\item{date}{A date/string formatted as YYYYMMDD.}

\item{forecast_time}{a string indicating which time of day UTC the forecast is from. Options are "0000", "0600", "1200", "1800".}

\item{ens_idx}{sequential list of ensembles to fetch. Default is all 21. Note that the ensembles are labelled 0-20, so ens_idx=1:3 will return ensembles 0, 1, and 2.}

\item{time_idx}{sequential list of time increments to return. List is the index
of times, which are in 6 hour increments. (e.g. c(1,2) fetches the 6 and 12 hour forecast.)}

\item{dims}{(not implemented) indices for additional dimensions to be included between lat, lon, ens, and time.}

\item{raw}{logical to indicate whether to return raw data matrix or reshaped data frame.}

\item{con}{an ncdf4 connection.}

\item{dim}{(character) the dimension.}
}
\value{
a list containing metadata and accompanying data frame of forecast
values. If lat/lon are not specified, the $data is an unprocessed matrix.
}
\description{
Fetches GEFS forecast data for every 6 hours out to 384 hours past selected date. GEFS
is an ensemble of 21 models that can be summarized to estimate likelihoods of forecasts.
}
\examples{
\dontrun{

#avialable latitudes and longitudes
gefs_latitudes()
gefs_longitudes()

#get a list of all gefs variables
gefs_variables()

#All GEFS dimensions
gefs_dimensions()

#values for a specific dimension
gefs_dimension_values("height_above_ground")

#example location.
lat = 46.28125
lon = -116.2188

#Get forecast for a certain variable.
forecast = gefs("Total_precipitation_surface_6_Hour_Accumulation_ens", lat, lon)

#Fetch a different date (available up to 10 days prior to today)
forecast_yesterday_prec = gefs("Total_precipitation_surface_6_Hour_Accumulation_ens",
lat,
lon,
date=format(as.Date(Sys.time()) - 1, "\%Y\%m\%d"))

#specific ensemble and times, for the 1800 forecast.
# here ensembles 1-3 (ensembles are numbered starting with 0)
# and time for 2 days from today at 1800
date=format(as.Date(Sys.time()) - 1, "\%Y\%m\%d")
var = "Temperature_height_above_ground_ens"
gefs(var, lat, lon, date = date, forecast_time = "1800", ens_idx=2:4, time_idx=1:8)

#One ensemble, all latitudes and longitudes (this is a big file) for the
# next 3 days.
gefs(var, ens=1, time=1:12)
}
}
\author{
Nicholas Potter \email{potterzot@gmail.com}
}
\references{
\url{https://www.ncdc.noaa.gov/data-access/model-data/model-datasets/global-ensemble-forecast-system-gefs}
}

1 change: 1 addition & 0 deletions man/rnoaa-package.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ are:

\itemize{
\item \code{buoy_*} - NOAA Buoy data from the National Buoy Data Center
\item \code{gefs} - GEFS forecast ensemble data
\item \code{ghcnd_*} - GHCND daily data from NOAA
\item \code{isd_*} - ISD/ISH data from NOAA
\item \code{homr_*} - Historical Observing Metadata Repository (HOMR) vignette
Expand Down
Loading

0 comments on commit d5aba66

Please sign in to comment.