uptake · geraudster · Oct 30, 2019 · Oct 30, 2019 · Nov 5, 2019 · Nov 19, 2019
diff --git a/.ci/lint.R b/.ci/lint.R
@@ -0,0 +1,65 @@
+library(lintr)
+
+args <- commandArgs(
+  trailingOnly = TRUE
+)
+SOURCE_DIR <- args[[1L]]
+
+excluded_files <- list.files(
+  paste(SOURCE_DIR, "uptasticsearch.Rcheck", sep = "/"),
+  pattern = rex::rex(".", one_of("Rr"), end),
+  all.files = TRUE,
+  recursive = TRUE,
+  full.names = TRUE
+)
+
+LINTERS_TO_USE <- list(
+  "assignment" = lintr::assignment_linter,
+  "closed_curly" = lintr::closed_curly_linter,
+  "equals_na" = lintr::equals_na_linter,
+  "function_left" = lintr::function_left_parentheses_linter,
+  "commas" = lintr::commas_linter,
+  "concatenation" = lintr::unneeded_concatenation_linter,
+  "implicit_integers" = lintr::implicit_integer_linter,
+  "infix_spaces" = lintr::infix_spaces_linter,
+  "long_lines" = lintr::line_length_linter(length = 120L),
+  "tabs" = lintr::no_tab_linter,
+  "open_curly" = lintr::open_curly_linter,
+  "paren_brace_linter" = lintr::paren_brace_linter,
+  "semicolon" = lintr::semicolon_terminator_linter,
+  "seq" = lintr::seq_linter,
+  "single_quotes" = lintr::single_quotes_linter,
+  "spaces_inside" = lintr::spaces_inside_linter,
+  "spaces_left_parens" = lintr::spaces_left_parentheses_linter,
+  "todo_comments" = lintr::todo_comment_linter,
+  "trailing_blank" = lintr::trailing_blank_lines_linter,
+  "trailing_white" = lintr::trailing_whitespace_linter,
+  "true_false" = lintr::T_and_F_symbol_linter
+)
+
+results <- lintr::lint_dir(
+  path = SOURCE_DIR,
+  linters = LINTERS_TO_USE,
+  cache = FALSE,
+  exclusions = excluded_files
+)
+
+results_df <- as.data.frame(results)
+
+cat(sprintf(
+  "Found %i linting errors in project\n",
+  length(results)
+))
+
+issues_count <- length(results)
+issues_found <- issues_count > 0L
+
+if (issues_found) {
+  print(addmargins(table(results_df[c("filename", "linter")])))
+  cat("\n")
+
+  print(results)
+}
+
+message(paste(issues_count, "issue(s) found.", sep = " "))
+quit(save = "no", status = if (issues_found) 1L else 0L)
diff --git a/.ci/setup.sh b/.ci/setup.sh
@@ -11,7 +11,7 @@ export JAVA_APT_PKG="oracle-java8-set-default"
 # install these testing packages we need
 if [[ "$TASK" == "rpkg" ]];
 then
-  Rscript -e "install.packages(c('data.table', 'devtools', 'futile.logger', 'knitr', 'testthat', 'rmarkdown', 'uuid'), repos = 'http://cran.rstudio.com')"
+  Rscript -e "install.packages(c('data.table', 'devtools', 'futile.logger', 'knitr', 'testthat', 'rmarkdown', 'uuid', 'lintr'), repos = 'http://cran.rstudio.com')"
   cp test-data/* r-pkg/inst/testdata/
 fi
 

diff --git a/.ci/styler.R b/.ci/styler.R
@@ -0,0 +1,65 @@
+args <- commandArgs(
+  trailingOnly = TRUE
+)
+SOURCE_DIR <- args[[1L]]
+
+excluded_files <- withr::with_dir(SOURCE_DIR
+                                  , dir(path = "./uptasticsearch.Rcheck"
+                                        , rex::rex(".", one_of("Rr"), end)
+                                        , ignore.case = TRUE
+                                        , recursive = TRUE
+                                        , full.names = TRUE
+                                        , all.files = TRUE))
+
+guide <- styler::tidyverse_style()
+
+fix_implicit_integer <- function(pd_flat) {
+  op <- pd_flat$token %in% "NUM_CONST"
+  pd_flat$text[op] <- lapply(
+    pd_flat$text[op],
+    function(x) {
+      gsub("^(\\d+)$", "\\1L", x)
+    }
+  )
+  pd_flat
+}
+
+fix_trailing_whitespace_in_comment <- function(pd_flat) {
+  comments <- pd_flat$token == "COMMENT"
+  pd_flat$text[comments] <- lapply(
+    pd_flat$text[comments],
+    function(x) sub("\\s+$", "", x)
+  )
+  pd_flat
+}
+
+fix_space_between_paren_and_brace <- function(pd_flat) {
+  brace_after <- pd_flat$token == "')'" & pd_flat$token_after == "'{'"
+  if (!any(brace_after)) {
+    return(pd_flat)
+  }
+  pd_flat$spaces[brace_after] <- pmax(pd_flat$spaces[brace_after], 1L)
+  pd_flat
+}
+
+uptasticsearch_guide <- function() {
+  styler::create_style_guide(
+    token = list(
+      fix_quotes = guide$token$fix_quotes,
+      fix_implicit_integer = fix_implicit_integer,
+      fix_trailing_whitespace_in_comment = fix_trailing_whitespace_in_comment
+    ),
+    space = list(
+      style_space_around_math_token = guide$space$style_space_around_math_token,
+      fix_space_between_paren_and_brace = fix_space_between_paren_and_brace,
+      style_space_around_tilde = guide$space$style_space_around_tilde,
+      add_space_before_brace = styler:::add_space_before_brace,
+      set_space_after_comma = styler:::set_space_after_comma,
+      set_space_around_op = function(...) styler:::set_space_around_op(strict = FALSE, ...),
+      add_space_after_for_if_while = guide$space$add_space_after_for_if_while
+    ),
+    use_raw_indention = TRUE
+  )
+}
+
+styler::style_dir(SOURCE_DIR, style = uptasticsearch_guide, exclude_files = excluded_files)
diff --git a/.ci/test.sh b/.ci/test.sh
@@ -5,6 +5,8 @@ set -e
 
 if [[ "$TASK" == "rpkg" ]]; then
   R_PACKAGE_DIR=$(pwd)/r-pkg
+  Rscript .ci/lint.R ${R_PACKAGE_DIR}
+
   R CMD build ${R_PACKAGE_DIR}
   R CMD check \
     --as-cran \

diff --git a/Makefile b/Makefile
@@ -18,6 +18,16 @@ coverage_r: build_r
 	echo "Done calculating coverage"
 	open coverage.html
 
+lint_r:
+	echo "Linting R source..."
+	Rscript .ci/lint.R $$(pwd)
+	echo "Done linting"
+
+style_r:
+	echo "Styling R source..."
+	Rscript .ci/styler.R $$(pwd)
+	echo "Done styling"
+
 docs_r: build_r
 	Rscript -e "devtools::document('r-pkg/')"
 	Rscript -e "pkgdown::build_site('r-pkg/')"

diff --git a/r-pkg/.Rbuildignore b/r-pkg/.Rbuildignore
@@ -58,3 +58,4 @@ vignettes/*\.pdf
 # Stuff
 .Rbuildignore
 .*\.gitkeep
+
diff --git a/r-pkg/DESCRIPTION b/r-pkg/DESCRIPTION
@@ -34,6 +34,7 @@ Imports:
 Suggests:
     covr,
     knitr,
+    lintr,
     rmarkdown,
     testthat
 License: BSD_3_clause + file LICENSE

diff --git a/r-pkg/R/assertions.R b/r-pkg/R/assertions.R
@@ -1,21 +1,21 @@
 
-# [title] assert_that wrapper
-# [name] assert
-# [description] When making an assertion you might call:
-#   
-#   \code{assertthat::assert_that(assertthat::is.date(x))}
-#   
-#   or something like that. This is an alias to \code{\link[assertthat]{assert_that}} to be used
-#   for two benefits: \enumerate{
-#     \item{This uses \code{\link{log_fatal}} instead of \code{\link{stop}} on failure}
-#     \item{Much less clutter in the source code}
-#   }
+#' [title] assert_that wrapper
+#' [name] assert
+#' [description] When making an assertion you might call:
+#'
+#'   \code{assertthat::assert_that(assertthat::is.date(x))}
+#'
+#'   or something like that. This is an alias to \code{\link[assertthat]{assert_that}} to be used
+#'   for two benefits: \enumerate{
+#'     \item{This uses \code{\link{log_fatal}} instead of \code{\link{stop}} on failure}
+#'     \item{Much less clutter in the source code}
+#'   }
 #' @importFrom assertthat see_if
 .assert <- function(..., msg = NULL) {
-    res <- assertthat::see_if(..., env = parent.frame(), msg = msg)
-    if (res) {
-        return(invisible(TRUE))
-    } else {
-        log_fatal(attr(res, "msg"))
-    }
+  res <- assertthat::see_if(..., env = parent.frame(), msg = msg)
+  if (res) {
+    return(invisible(TRUE))
+  } else {
+    log_fatal(attr(res, "msg"))
+  }
 }
diff --git a/r-pkg/R/chomp_aggs.R b/r-pkg/R/chomp_aggs.R
@@ -6,7 +6,7 @@
 #' @importFrom data.table as.data.table setnames setcolorder
 #' @export
 #' @param aggs_json A character vector. If its length is greater than 1, its elements will be pasted
-#'                  together. This can contain a JSON returned from an \code{aggs} query in 
+#'                  together. This can contain a JSON returned from an \code{aggs} query in
 #'                  Elasticsearch, or a filepath or URL pointing at one.
 #' @examples
 #' # A sample raw result from an aggs query combining date_histogram and extended_stats:
@@ -48,15 +48,15 @@ chomp_aggs <- function(aggs_json = NULL) {
     )
 
     # Gross special-case handler for one-level extended_stats aggregation
-    if (.IsExtendedStatsAgg(jsonList[["aggregations"]][[aggNames]])){
+    if (.IsExtendedStatsAgg(jsonList[["aggregations"]][[aggNames]])) {
         log_info("es_search is assuming that this result is a one-level 'extended_stats' result.")
-        jsonList[["aggregations"]][[1]][["std_deviation_bounds.upper"]] <- jsonList[["aggregations"]][[1]][["std_deviation_bounds"]][["upper"]]
-        jsonList[["aggregations"]][[1]][["std_deviation_bounds.lower"]] <- jsonList[["aggregations"]][[1]][["std_deviation_bounds"]][["lower"]]
-        jsonList[["aggregations"]][[1]][["std_deviation_bounds"]] <- NULL
+        jsonList[["aggregations"]][[1L]][["std_deviation_bounds.upper"]] <- jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]][["upper"]]
+        jsonList[["aggregations"]][[1L]][["std_deviation_bounds.lower"]] <- jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]][["lower"]]
+        jsonList[["aggregations"]][[1L]][["std_deviation_bounds"]] <- NULL
     }
 
     # Gross special-case handler for one-level percentiles aggregation
-    if (.IsPercentilesAgg(jsonList[["aggregations"]][[aggNames]])){
+    if (.IsPercentilesAgg(jsonList[["aggregations"]][[aggNames]])) {
         log_info("es_search is assuming that this result is a one-level 'percentiles' result.")
 
         # Replace names like `25.0` with something that will be easier for users to understand
@@ -66,17 +66,17 @@ chomp_aggs <- function(aggs_json = NULL) {
         jsonList[["aggregations"]][[aggNames]] <- percValues
     }
 
-    if (.IsSigTermsAgg(jsonList[["aggregations"]][[aggNames]])){
+    if (.IsSigTermsAgg(jsonList[["aggregations"]][[aggNames]])) {
         log_info("es_search is assuming that this result is a one-level 'significant terms' result.")
 
         # We can grab that nested data.frame and break out right now
         outDT <- data.table::as.data.table(jsonList[["aggregations"]][[aggNames]][["buckets"]])
-        data.table::setnames(outDT, 'key', aggNames)
+        data.table::setnames(outDT, "key", aggNames)
         return(outDT)
     }
 
     # check for an empty result
-    if (identical(jsonList[["aggregations"]][[aggNames]][["buckets"]], list())){
+    if (identical(jsonList[["aggregations"]][[aggNames]][["buckets"]], list())) {
         log_info("this aggregation result was empty. Returning NULL")
         return(invisible(NULL))
     }
@@ -85,7 +85,7 @@ chomp_aggs <- function(aggs_json = NULL) {
     outDT <- data.table::as.data.table(jsonList[["aggregations"]][[aggNames]])
 
     # Keep unpacking the nested arrays until you hit 'break'
-    while(TRUE) {
+    while (TRUE) {
         # Clean up the column names
         .clean_aggs_colnames(outDT)
 
@@ -96,7 +96,7 @@ chomp_aggs <- function(aggs_json = NULL) {
         } else {
 
             # Other bucketed aggregations (not date_histogram) will have "key"
-            if ("key" %in% names(outDT)){
+            if ("key" %in% names(outDT)) {
                 data.table::setnames(outDT, "key", aggNames[length(aggNames)])
             } else {
                 # If we get down here, we know it's not a bucketed aggregation
@@ -111,11 +111,11 @@ chomp_aggs <- function(aggs_json = NULL) {
         if (any(colTypes == "list")) {
 
             # Store the new agg name
-            aggNames[length(aggNames) + 1] <- names(colTypes[colTypes == "list"])
+            aggNames[length(aggNames) + 1L] <- names(colTypes[colTypes == "list"])
 
             # Remove unwanted columns
             badCols <- grep("doc_count", names(outDT))
-            if (length(badCols) > 0){
+            if (length(badCols) > 0L) {
                 outDT <- outDT[, !badCols, with = FALSE]
             }
 
@@ -125,7 +125,7 @@ chomp_aggs <- function(aggs_json = NULL) {
         } else {
             # Remove unwanted columns, but keep doc_count
             badCols <- base::setdiff(grep("doc_count", names(outDT), value = TRUE), "doc_count")
-            if (length(badCols) > 0) {
+            if (length(badCols) > 0L) {
                 outDT <- outDT[, !badCols, with = FALSE]
             }
             break
@@ -135,7 +135,7 @@ chomp_aggs <- function(aggs_json = NULL) {
     # Re-set the column order to mirror the way the user specified their aggs query
     # NOTE: If there's no "doc_count" in the names, we know that this was not a bucketed
     # / nested query and reordering is unnecessary
-    if ("doc_count" %in% names(outDT)){
+    if ("doc_count" %in% names(outDT)) {
         data.table::setcolorder(
             outDT,
             c(aggNames, base::setdiff(names(outDT), c(aggNames, "doc_count")), "doc_count")
@@ -160,7 +160,7 @@ chomp_aggs <- function(aggs_json = NULL) {
 #               "extended_stats" aggregation. data.table doesn't handle those
 #               in a way that's consistent with the way this package handles all other aggregations
 # [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
-.IsExtendedStatsAgg <- function(aggsList){
+.IsExtendedStatsAgg <- function(aggsList) {
     statsNames <- c("count", "min", "max", "avg", "sum", "sum_of_squares"
                     , "variance", "std_deviation", "std_deviation_bounds")
 
@@ -172,16 +172,16 @@ chomp_aggs <- function(aggs_json = NULL) {
 #               "Percentiles" aggregation. data.table doesn't handle those
 #               in a way that's consistent with the way this package handles all other aggregations
 # [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
-.IsPercentilesAgg <- function(aggsList){
+.IsPercentilesAgg <- function(aggsList) {
 
     # check 1 - has a single element called "values"
-    if (! identical("values", names(aggsList))){
+    if (! identical("values", names(aggsList))) {
         return(FALSE)
     }
 
     # check 2 - all names of "values" are convertible to numbers
     numNames <- as.numeric(names(aggsList[["values"]]))
-    if (all(vapply(numNames, function(val){!is.na(val)}, FUN.VALUE = TRUE))){
+    if (all(vapply(numNames, function(val) {!is.na(val)}, FUN.VALUE = TRUE))) {
         return(TRUE)
     } else {
         return(FALSE)
@@ -194,20 +194,20 @@ chomp_aggs <- function(aggs_json = NULL) {
 #               "significant terms" aggregation. data.table doesn't handle those
 #               in a way that's consistent with the way this package handles all other aggregations
 # [param] aggsList R list-object representation of an "aggs" result from Elasticsearch
-.IsSigTermsAgg <- function(aggsList){
+.IsSigTermsAgg <- function(aggsList) {
 
     # check 1 - has exactly two keys - "doc_count", "buckets"
-    if (! identical(sort(names(aggsList)), c('buckets', 'doc_count'))){
+    if (! identical(sort(names(aggsList)), c("buckets", "doc_count"))) {
         return(FALSE)
     }
 
     # check 2 - "buckets" is a data.frame
-    if (!is.data.frame(aggsList[['buckets']])){
+    if (!is.data.frame(aggsList[["buckets"]])) {
         return(FALSE)
     }
 
     # check 3 - "buckets" has at least the columns "key", "doc_count", and "bg_count"
-    if (!all(c('key', 'doc_count', 'bg_count') %in% names(aggsList[['buckets']]))){
+    if (!all(c("key", "doc_count", "bg_count") %in% names(aggsList[["buckets"]]))) {
         return(FALSE)
     }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -58,3 +58,4 @@ vignettes/*\.pdf
		# Stuff
		.Rbuildignore
		.*\.gitkeep