From 2148879b8f66e1f53a3faafb342a3d7e5cd3d8b8 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Wed, 9 May 2018 21:04:21 -0500 Subject: [PATCH] Added assertions on input types --- DESCRIPTION | 1 + NAMESPACE | 5 +++++ R/elasticsearch_eda_funs.R | 24 ++++++++++++++++++++++++ R/elasticsearch_parsers.R | 28 ++++++++++++++++++++++++++++ R/utils.R | 21 +++++++++++++++++++++ 5 files changed, 79 insertions(+) create mode 100644 R/utils.R diff --git a/DESCRIPTION b/DESCRIPTION index 464ec5d..10267ae 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -22,6 +22,7 @@ Description: Depends: R (>= 3.3.0) Imports: + assertthat (>= 0.2.0), data.table, futile.logger, httr, diff --git a/NAMESPACE b/NAMESPACE index 6902192..15996d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,11 @@ export(get_counts) export(get_fields) export(parse_date_time) export(unpack_nested_data) +importFrom(assertthat,is.flag) +importFrom(assertthat,is.number) +importFrom(assertthat,is.string) +importFrom(assertthat,is.writeable) +importFrom(assertthat,see_if) importFrom(data.table,":=") importFrom(data.table,as.data.table) importFrom(data.table,copy) diff --git a/R/elasticsearch_eda_funs.R b/R/elasticsearch_eda_funs.R index a59c053..8c4aa1f 100644 --- a/R/elasticsearch_eda_funs.R +++ b/R/elasticsearch_eda_funs.R @@ -2,6 +2,7 @@ #' @title Examine the distribution of distinct values for a field in Elasticsearch #' @name get_counts #' @description For a given field, return a data.table with its unique values in a time range. +#' @importFrom assertthat is.string is.number #' @importFrom data.table := data.table setnames setkeyv #' @importFrom httr content RETRY #' @importFrom purrr transpose @@ -44,6 +45,24 @@ get_counts <- function(field # Input checking es_host <- .ValidateAndFormatHost(es_host) + # Other input checks we don't have explicit error messages for + .assert( + assertthat::is.string(field) + , field != "" + , assertthat::is.string(es_index) + , es_index != "" + , assertthat::is.string(start_date) + , start_date != "" + , assertthat::is.string(end_date) + , end_date != "" + , assertthat::is.string(time_field) + , time_field != "" + , assertthat::is.string(use_na) + , use_na != "" + , assertthat::is.number(max_terms) + , max_terms > 0 + ) + #===== Format and execute query =====# # Support un-dated queries @@ -137,6 +156,11 @@ get_fields <- function(es_host # Input checking url <- .ValidateAndFormatHost(es_host) + .assert( + is.character("es_indices") + , length(es_indices) > 0 + ) + # collapse character vectors into comma separated strings. If any arguments # are NULL, create an empty string indices <- paste(es_indices, collapse = ',') diff --git a/R/elasticsearch_parsers.R b/R/elasticsearch_parsers.R index f8a0b01..f58d908 100644 --- a/R/elasticsearch_parsers.R +++ b/R/elasticsearch_parsers.R @@ -8,6 +8,7 @@ #' #' This is a side-effect-free function: it returns a new data.table and the #' input data.table is unmodified. +#' @importFrom assertthat is.string #' @importFrom data.table copy #' @importFrom purrr map2 simplify #' @importFrom stringr str_extract @@ -65,6 +66,12 @@ parse_date_time <- function(input_df log_fatal(msg) } + # Other input checks we don't have explicit error messages for + .assert( + assertthat::is.string(assume_tz) + , assume_tz != "" + ) + # Work on a copy of the DT to avoid side effects outDT <- data.table::copy(input_df) @@ -576,6 +583,7 @@ chomp_hits <- function(hits_json = NULL, keep_nested_data_cols = TRUE) { #' want to change this behavior, provide a path here. `es_search` will create #' and write to a temporary directory under whatever path you provide. #' @inheritParams doc_shared +#' @importFrom assertthat is.flag is.number is.string is.writeable #' @importFrom parallel detectCores #' @export #' @examples @@ -628,6 +636,26 @@ es_search <- function(es_host log_fatal(msg) } + # Other input checks we don't have explicit error messages for + .assert( + assertthat::is.string(es_host) + , es_host != "" + , assertthat::is.string(es_index) + , es_index != "" + , assertthat::is.string(query) + , query != "" + , assertthat::is.string(scroll) + , scroll != "" + , assertthat::is.number(max_hits) + , max_hits >= 0 + , assertthat::is.number(n_cores) + , n_cores >= 1 + , assertthat::is.flag(break_on_duplicates) + , assertthat::is.flag(ignore_scroll_restriction) + , assertthat::is.string(intermediates_dir) + , assertthat::is.writeable(intermediates_dir) + ) + # Aggregation Request if (grepl('aggs', query_body)){ diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..7a49ca9 --- /dev/null +++ b/R/utils.R @@ -0,0 +1,21 @@ + +# [title] assert_that wrapper +# [name] assert +# [description] When making an assertion you might call: +# +# \code{assertthat::assert_that(assertthat::is.date(x))} +# +# or something like that. This is an alias to \code{\link[assertthat]{assert_that}} to be used +# for two benefits: \enumerate{ +# \item{This uses \code{\link{log_fatal}} instead of \code{\link{stop}} on failure} +# \item{Much less clutter in the source code} +# } +#' @importFrom assertthat see_if +.assert <- function(..., msg = NULL) { + res <- assertthat::see_if(..., env = parent.frame(), msg = msg) + if (res) { + return(invisible(TRUE)) + } else { + log_fatal(attr(res, "msg")) + } +}